From 341c95cafc581f919094b09f3504674ab923f08d Mon Sep 17 00:00:00 2001
From: ahmedk <karim.ahmed@xfel.eu>
Date: Tue, 5 Apr 2022 10:45:10 +0200
Subject: [PATCH] validate number of trains for each module individually

---
 .../Characterize_AGIPD_Gain_Darks_NBC.ipynb   | 40 +++++++++++--------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb b/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
index f3998ba55..aa2f8ea59 100644
--- a/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
+++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
@@ -65,7 +65,7 @@
     "\n",
     "thresholds_gain_sigma = 5.  # Gain separation sigma threshold\n",
     "max_trains = 0  # Maximum number of trains to use for processing dark. Set to 0 to process all available trains.\n",
-    "min_trains = 1  # Miniumum number of trains for processing dark. If raw folder has less than minimum trains processing is stopped.\n",
+    "min_trains = 1  # Miniumum number of trains for processing dark. If run folder has less than minimum trains, processing is stopped.\n",
     "high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. ~7mins extra time for 64 memory cells\n",
     "\n",
     "# This is used if modules is not specified:\n",
@@ -209,26 +209,34 @@
     "# Create out_folder if it doesn't exist.\n",
     "Path(out_folder).mkdir(parents=True, exist_ok=True)\n",
     "\n",
-    "max_trains_list = []\n",
     "file_sizes = []\n",
     "for run_dict in runs_dict.values():\n",
-    "    missing_modules = []\n",
+    "    missing_modules = []  # modules with no images within a run.\n",
+    "    n_trains_list = []   # list of the number of trains for each module within a run.\n",
     "    # This is important in case of no slurm parallelization over modules is done.\n",
     "    # (e.g. running notebook interactively)\n",
-    "    dc = run_dict[\"dc\"].select(\n",
-    "        [(instrument_src.format(m), \"*\") for m in modules],\n",
-    "        require_all=True\n",
-    "    )\n",
-    "    # validate that there are trains and that data sources are\n",
-    "    # present for any of the selected modules.\n",
-    "    n_trains = len(dc.train_ids)\n",
-    "    if n_trains < min_trains:\n",
-    "        print(f\"WARNING: {dc.files} have less than minimum trains: {min_trains}.\")\n",
-    "    if n_trains == 0:\n",
-    "        raise ValueError(f\"No images to process for run: {run_dict['number']}\")\n",
+    "    for m in modules:\n",
+    "        # validate that there are trains for the selected modules and run.\n",
+    "        dc = run_dict[\"dc\"].select(\n",
+    "            instrument_src.format(m), \"*\", require_all=True)\n",
+    "        n_trains = len(dc.train_ids)\n",
+    "\n",
+    "        if n_trains == 0:\n",
+    "            print(f\"WARNING: No images for module AGIPD{m:02d}.\")\n",
+    "            missing_modules.append(m)\n",
+    "\n",
+    "        # Raise a warning if the module has less trains than expected.\n",
+    "        elif n_trains < min_trains:\n",
+    "            print(f\"WARNING: {dc.files} for AGIPD{m:02d} have less than minimum trains: {min_trains}.\")\n",
     "\n",
-    "    max_trains_list.append(n_trains)\n",
-    "    file_sizes += [os.path.getsize(f.filename) / 1e9 for f in dc.files]\n",
+    "        n_trains_list.append(n_trains)\n",
+    "\n",
+    "        file_sizes += [os.path.getsize(f.filename) / 1e9 for f in dc.files]\n",
+    "\n",
+    "    if max(n_trains_list) == 0:\n",
+    "        raise ValueError(f\"No images to process for run: {run_dict['number']}\")\n",
+    "    elif max(n_trains_list) < min_trains:\n",
+    "        raise ValueError(f\"{run_dict['number']} has less than minimum trains: {min_trains}\")\n",
     "\n",
     "# Update modules and karabo_da lists based on available modules to processes.\n",
     "modules = [m for m in modules if m not in missing_modules]\n",
-- 
GitLab