From 4fae716d13b8ff9a0f87ee500cebd8f949d8fef5 Mon Sep 17 00:00:00 2001
From: ahmedk <karim.ahmed@xfel.eu>
Date: Thu, 2 Dec 2021 15:43:35 +0100
Subject: [PATCH] apply suggested refactors

---
 ...Jungfrau_Gain_Correct_and_Verify_NBC.ipynb | 35 ++++++--------
 ...rk_analysis_all_gains_burst_mode_NBC.ipynb |  2 +-
 src/cal_tools/tools.py                        | 47 +++++++++++--------
 3 files changed, 44 insertions(+), 40 deletions(-)

diff --git a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
index 5e64c6ae0..bca077b80 100644
--- a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
@@ -153,17 +153,16 @@
     "print(f\"Processing a total of {num_seq_files} sequence files\")\n",
     "table = []\n",
     "fi = 0\n",
-    "if len(mapped_files) > 0: # create table\n",
-    "    for kda, sfiles in mapped_files.items():\n",
-    "        for k, f in enumerate(sfiles):\n",
-    "            if k == 0:\n",
-    "                table.append((fi, kda, k, f))\n",
-    "            else:\n",
-    "                table.append((fi, \"\", k,  f))\n",
-    "            fi += 1\n",
-    "    md = display(Latex(tabulate.tabulate(\n",
-    "        table, tablefmt='latex',\n",
-    "        headers=[\"#\", \"module\", \"# module\", \"file\"])))"
+    "for kda, sfiles in mapped_files.items():\n",
+    "    for k, f in enumerate(sfiles):\n",
+    "        if k == 0:\n",
+    "            table.append((fi, kda, k, f))\n",
+    "        else:\n",
+    "            table.append((fi, \"\", k,  f))\n",
+    "        fi += 1\n",
+    "md = display(Latex(tabulate.tabulate(\n",
+    "    table, tablefmt='latex',\n",
+    "    headers=[\"#\", \"module\", \"# module\", \"file\"])))"
    ]
   },
   {
@@ -318,7 +317,10 @@
     "\n",
     "    if 0 <= index < plt_images:\n",
     "        r_data[index, ...] = d[0, ...]\n",
-    "\n",
+    "        if memory_cells == 1:\n",
+    "            g_data[index, ...] = g\n",
+    "        else:\n",
+    "            g_data[index, ...] = g[1, ...]\n",
     "    # Select memory cells\n",
     "    \n",
     "    # TODO: This needs to be revisited.\n",
@@ -350,13 +352,6 @@
     "\n",
     "    msk = np.choose(g, np.moveaxis(mask_cell, -1, 0))\n",
     "\n",
-    "    # Store sample of data for plotting\n",
-    "    if 0 <= index < plt_images:\n",
-    "        if memory_cells == 1:\n",
-    "            g_data[index, ...] = g\n",
-    "        else:\n",
-    "            g_data[index, ...] = g[1, ...]\n",
-    "\n",
     "    data_corr[index, ...] = d\n",
     "    mask_corr[index, ...] = msk"
    ]
@@ -418,7 +413,7 @@
     "\n",
     "        sensor_size = dshape[1:]\n",
     "        # load number of data available, including trains with empty data.\n",
-    "        n_trains = seq_dc.get_data_counts(instrument_src_kda, \"data.adc\").shape[0]\n",
+    "        n_trains = len(seq_dc.train_ids)\n",
     "\n",
     "        n_imgs = dshape[0]\n",
     "\n",
diff --git a/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb b/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb
index e24b0cd43..8391912ab 100644
--- a/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb
@@ -217,7 +217,7 @@
     "        # load shape of data for memory cells, and detector size (imgs, cells, x, y)\n",
     "        n_imgs = run_dc[instrument_src, \"data.adc\"].shape[0]\n",
     "        # load number of data available, including trains with empty data.\n",
-    "        n_trains = run_dc.get_data_counts(instrument_src, \"data.adc\").shape[0]\n",
+    "        n_trains = len(run_dc.train_ids)\n",
     "        instr_dc = run_dc.select(instrument_src, require_all=True)\n",
     "        if n_trains-n_imgs != 0:\n",
     "            print(\n",
diff --git a/src/cal_tools/tools.py b/src/cal_tools/tools.py
index 0efce13d7..4613e111f 100644
--- a/src/cal_tools/tools.py
+++ b/src/cal_tools/tools.py
@@ -61,29 +61,38 @@ def run_prop_seq_from_path(filename):
 def map_seq_files(
     run_dc: "extra_data.DataCollection",
     karabo_id: str,
-    karabo_da: List[str],
-    sequences: List[int],
+    karabo_das: List[str],
+    sequences: Optional[List[int]] = None,
 ) -> Tuple[dict, int]:
-    """
-    Using a DataCollection from extra-data collect
+
+    """Using a DataCollection from extra-data to read
     available sequence files.
+
+    Returns:
+        Dict: with karabo_das keys and the corresponding sequence files.
+        Int: for number of all sequence files for all karabo_das to process.
     """
-    num_seq_files = 0
-    mapped_files = dict()
-    sequence_files = [
-        Path(f.filename) for f in run_dc.select(f"*{karabo_id}*").files]
-    for kda in karabo_da:
-        if sequences != [-1]:
-            seq_files = [
-                f for f in sequence_files if any(
-                    f.match(f"*-{kda}-S{s:05d}.h5") for s in sequences)]
-        else:
-            seq_files = [
-                f for f in sequence_files if f.match(f"*-{kda}*.h5")]
-        num_seq_files += len(seq_files)
-        mapped_files[kda] = seq_files
 
-    return mapped_files, num_seq_files
+    if sequences == [-1]:
+        sequences = None
+    if sequences is not None:
+        sequences = set(int(seq) for seq in sequences)
+
+    seq_fn_pat = re.compile(r".*-(?P<da>.*?)-S(?P<seq>.*?)\.h5")
+
+    mapped_files = {kda: [] for kda in karabo_das}
+    total_files = 0
+    for fn in run_dc.select(f"*{karabo_id}*").files:
+        fn = Path(fn.filename)
+        if (match := seq_fn_pat.match(fn.name)) is not None:
+            da = match.group("da")
+            if da in mapped_files and (
+                sequences is None or int(match.group("seq")) in sequences
+            ):
+                mapped_files[da].append(fn)
+                total_files += 1
+
+    return mapped_files, total_files
 
 
 def map_modules_from_folder(in_folder, run, path_template, karabo_da,
-- 
GitLab