diff --git a/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb b/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb index 32068cbac86097dcbff72cd9a289cf202289eb7a..25abaf2133a14f5c9abb9ff74a1e1a617f21972a 100644 --- a/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb +++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb @@ -64,8 +64,8 @@ "thresholds_noise_hard_lg = [4, 20] # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels\n", "\n", "thresholds_gain_sigma = 5. # Gain separation sigma threshold\n", - "max_trains = 0 # Maximum number of trains to use for processing dark. Set to 0 to process all available trains.\n", - "min_trains = 1 # Miniumum number of trains for processing dark. If raw folder has less than minimum trains processing is stopped.\n", + "max_trains = 550 # Maximum number of trains to use for processing dark. Set to 0 to process all available trains. 550 added for ~500GB nodes to temporarely avoid memory issues.\n", + "min_trains = 1 # Miniumum number of trains for processing dark. If run folder has less than minimum trains, processing is stopped.\n", "high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. ~7mins extra time for 64 memory cells\n", "\n", "# This is used if modules is not specified:\n", @@ -209,25 +209,37 @@ "# Create out_folder if it doesn't exist.\n", "Path(out_folder).mkdir(parents=True, exist_ok=True)\n", "\n", - "max_trains_list = []\n", "file_sizes = []\n", "for run_dict in runs_dict.values():\n", - " missing_modules = []\n", + " missing_modules = [] # modules with no images within a run.\n", + " n_trains_list = [] # list of the number of trains for each module within a run.\n", " # This is important in case of no slurm parallelization over modules is done.\n", " # (e.g. running notebook interactively)\n", - " dc = run_dict[\"dc\"].select(\n", - " [(instrument_src.format(m), \"*\") for m in modules],\n", - " require_all=True\n", - " )\n", - " # validate that there are trains and that data sources are\n", - " # present for any of the selected modules.\n", - " n_trains = len(dc.train_ids)\n", + " for m in modules:\n", + " # validate that there are trains for the selected modules and run.\n", + " dc = run_dict[\"dc\"].select(\n", + " instrument_src.format(m), \"*\", require_all=True)\n", + " n_trains = len(dc.train_ids)\n", + "\n", + " if n_trains == 0:\n", + " print(f\"WARNING: No images for module AGIPD{m:02d}, run {run_dict['number']}.\")\n", + " missing_modules.append(m)\n", + " # Raise a warning if the module has less trains than expected.\n", + " elif n_trains < min_trains:\n", + " print(f\"WARNING: AGIPD{m:02d}, run {run_dict['number']} \"\n", + " f\"has trains less than minimum trains: {min_trains}.\")\n", + " else:\n", + " print(f\"Processing {max_trains if max_trains < n_trains else n_trains} \"\n", + " f\"for AGIPD{m:02d}, run {run_dict['number']} \")\n", "\n", - " if n_trains == 0:\n", - " raise ValueError(f\"No images to process for run: {run_dict['number']}\")\n", + " n_trains_list.append(n_trains)\n", "\n", - " max_trains_list.append(n_trains)\n", - " file_sizes += [os.path.getsize(f.filename) / 1e9 for f in dc.files]\n", + " file_sizes += [os.path.getsize(f.filename) / 1e9 for f in dc.files]\n", + "\n", + " if max(n_trains_list) == 0:\n", + " raise ValueError(f\"No images to process for run: {run_dict['number']}\")\n", + " elif max(n_trains_list) < min_trains:\n", + " raise ValueError(f\"{run_dict['number']} has less than minimum trains: {min_trains}\")\n", "\n", "# Update modules and karabo_da lists based on available modules to processes.\n", "modules = [m for m in modules if m not in missing_modules]\n", @@ -436,30 +448,6 @@ " ]" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Check if max_trains can be processed.\n", - "\n", - "# more relevant if running on multiple modules (i.e. within notebook)\n", - "# mem_cells * gains * n_constants * modules * agipd_[x,y]image_size * 2\n", - "av_mem = psutil.virtual_memory().available\n", - "possible_trains = av_mem // (352 * 3 * 3 * len(modules) * 131072 * 2)\n", - "if max_trains == 0:\n", - " max_trains = max(max_trains_list)\n", - "if max_trains > possible_trains:\n", - " max_trains = possible_trains\n", - " print(\n", - " f\"WARNING: available memory for processing is { av_mem / 1e9:.02f} GB.\"\n", - " f\" Modifing max_trains to process to {max_trains}\")\n", - "\n", - "for run_dict in runs_dict.values():\n", - " run_dict[\"dc\"] = run_dict[\"dc\"].select_trains(np.s_[:max_trains])" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -475,7 +463,7 @@ "metadata": {}, "outputs": [], "source": [ - "parallel_num_procs = min(12, len(modules)*3)\n", + "parallel_num_procs = min(6, len(modules)*3)\n", "parallel_num_threads = multiprocessing.cpu_count() // parallel_num_procs\n", "print(f\"Will use {parallel_num_procs} processes with {parallel_num_threads} threads each\")\n", "\n", @@ -486,14 +474,11 @@ " # Select the corresponding module channel.\n", " instrument_src_mod = instrument_src.format(channel)\n", "\n", - " run_dc = runs_dict[\"dc\"]\n", + " run_dc = runs_dict[\"dc\"].select(instrument_src_mod, require_all=True)\n", + " if max_trains != 0:\n", + " run_dc = run_dc.select_trains(np.s_[:max_trains])\n", " gain_index = runs_dict[\"gain\"]\n", "\n", - " if run_dc[instrument_src_mod, \"image.data\"].shape[0] < min_trains:\n", - " print(\n", - " f\"WARNING: {run_dc.files} have less than \"\n", - " \"minimum trains: {min_trains}.\")\n", - "\n", " # Read module's image and cellId data.\n", " im = run_dc[instrument_src_mod, \"image.data\"].ndarray()\n", " cell_ids = np.squeeze(run_dc[instrument_src_mod, \"image.cellId\"].ndarray())\n", diff --git a/setup.py b/setup.py index f490a0e02c2ea7524702f2a1543c446b5081df7a..42847f5dc0d7cc35b539ac5e8b2bf9fb54103b1f 100644 --- a/setup.py +++ b/setup.py @@ -78,7 +78,6 @@ install_requires = [ "pasha==0.1.1", "prettytable==0.7.2", "princess==0.5", - "psutil==5.9.0", "pypandoc==1.4", "python-dateutil==2.8.1", "pyyaml==5.3",