From 341c95cafc581f919094b09f3504674ab923f08d Mon Sep 17 00:00:00 2001 From: ahmedk <karim.ahmed@xfel.eu> Date: Tue, 5 Apr 2022 10:45:10 +0200 Subject: [PATCH] validate number of trains for each module individually --- .../Characterize_AGIPD_Gain_Darks_NBC.ipynb | 40 +++++++++++-------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb b/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb index f3998ba55..aa2f8ea59 100644 --- a/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb +++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb @@ -65,7 +65,7 @@ "\n", "thresholds_gain_sigma = 5. # Gain separation sigma threshold\n", "max_trains = 0 # Maximum number of trains to use for processing dark. Set to 0 to process all available trains.\n", - "min_trains = 1 # Miniumum number of trains for processing dark. If raw folder has less than minimum trains processing is stopped.\n", + "min_trains = 1 # Miniumum number of trains for processing dark. If run folder has less than minimum trains, processing is stopped.\n", "high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. ~7mins extra time for 64 memory cells\n", "\n", "# This is used if modules is not specified:\n", @@ -209,26 +209,34 @@ "# Create out_folder if it doesn't exist.\n", "Path(out_folder).mkdir(parents=True, exist_ok=True)\n", "\n", - "max_trains_list = []\n", "file_sizes = []\n", "for run_dict in runs_dict.values():\n", - " missing_modules = []\n", + " missing_modules = [] # modules with no images within a run.\n", + " n_trains_list = [] # list of the number of trains for each module within a run.\n", " # This is important in case of no slurm parallelization over modules is done.\n", " # (e.g. running notebook interactively)\n", - " dc = run_dict[\"dc\"].select(\n", - " [(instrument_src.format(m), \"*\") for m in modules],\n", - " require_all=True\n", - " )\n", - " # validate that there are trains and that data sources are\n", - " # present for any of the selected modules.\n", - " n_trains = len(dc.train_ids)\n", - " if n_trains < min_trains:\n", - " print(f\"WARNING: {dc.files} have less than minimum trains: {min_trains}.\")\n", - " if n_trains == 0:\n", - " raise ValueError(f\"No images to process for run: {run_dict['number']}\")\n", + " for m in modules:\n", + " # validate that there are trains for the selected modules and run.\n", + " dc = run_dict[\"dc\"].select(\n", + " instrument_src.format(m), \"*\", require_all=True)\n", + " n_trains = len(dc.train_ids)\n", + "\n", + " if n_trains == 0:\n", + " print(f\"WARNING: No images for module AGIPD{m:02d}.\")\n", + " missing_modules.append(m)\n", + "\n", + " # Raise a warning if the module has less trains than expected.\n", + " elif n_trains < min_trains:\n", + " print(f\"WARNING: {dc.files} for AGIPD{m:02d} have less than minimum trains: {min_trains}.\")\n", "\n", - " max_trains_list.append(n_trains)\n", - " file_sizes += [os.path.getsize(f.filename) / 1e9 for f in dc.files]\n", + " n_trains_list.append(n_trains)\n", + "\n", + " file_sizes += [os.path.getsize(f.filename) / 1e9 for f in dc.files]\n", + "\n", + " if max(n_trains_list) == 0:\n", + " raise ValueError(f\"No images to process for run: {run_dict['number']}\")\n", + " elif max(n_trains_list) < min_trains:\n", + " raise ValueError(f\"{run_dict['number']} has less than minimum trains: {min_trains}\")\n", "\n", "# Update modules and karabo_da lists based on available modules to processes.\n", "modules = [m for m in modules if m not in missing_modules]\n", -- GitLab