From 42598d7e2dcddefa78e3bb96a5031ee8569c3422 Mon Sep 17 00:00:00 2001
From: ahmedk <karim.ahmed@xfel.eu>
Date: Wed, 22 May 2024 15:17:35 +0200
Subject: [PATCH 1/8] fix(jungfrau-correct): keep_dims data function to account
 for missing modules

---
 ...Jungfrau_Gain_Correct_and_Verify_NBC.ipynb | 53 ++++++++++++++-----
 src/cal_tools/tools.py                        | 41 ++++++++++++++
 2 files changed, 81 insertions(+), 13 deletions(-)

diff --git a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
index d9d393e87..a4ed040bb 100644
--- a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
@@ -100,6 +100,7 @@
     "from cal_tools.step_timing import StepTimer\n",
     "from cal_tools.tools import (\n",
     "    calcat_creation_time,\n",
+    "    keep_data_dims,-p\n",
     "    map_seq_files,\n",
     "    write_constants_fragment,\n",
     ")\n",
@@ -789,21 +790,26 @@
     "        detector_name=karabo_id,\n",
     "    ).select_trains(np.s_[:plot_trains])\n",
     "    tid, jf_corr_data = next(iter(jf_corr.trains(require_all=True)))\n",
-    "\n",
+    "det_mod_start = jf_corr._modnos_start_at  # TODO: replace with CALCAT value.\n",
+    "available_modules_indices = list(jf_corr.modno_to_source.keys())\n",
     "# Shape = [modules, trains, cells, x, y]\n",
-    "# TODO: Fix the case if not all modules were requested to be corrected.\n",
-    "# For example if only one modules was corrected. An assertion error is expected\n",
-    "# at `geom.plot_data_fast`, while plotting corrected images.\n",
     "corrected = jf_corr.get_array(\"data.adc\")[:, :, cell_idx_preview, ...].values\n",
-    "corrected_train = jf_corr_data[\"data.adc\"][\n",
-    "    :, cell_idx_preview, ...\n",
-    "].values  # loose the train axis.\n",
+    "corrected_train = keep_data_dims(  # loose the train axis.\n",
+    "    jf_corr_data[\"data.adc\"][:, cell_idx_preview, ...].values,\n",
+    "    geom.expected_data_shape,\n",
+    "    available_modules_indices,\n",
+    "    det_mod_start\n",
+    ")\n",
     "\n",
     "mask = jf_corr.get_array(\"data.mask\")[:, :, cell_idx_preview, ...].values\n",
-    "mask_train = jf_corr_data[\"data.mask\"][:, cell_idx_preview, ...].values\n",
+    "mask_train = keep_data_dims(\n",
+    "    jf_corr_data[\"data.mask\"][:, cell_idx_preview, ...].values,\n",
+    "    geom.expected_data_shape,\n",
+    "    available_modules_indices,\n",
+    "    det_mod_start\n",
+    ")\n",
     "\n",
     "with RunDirectory(f\"{in_folder}/r{run:04d}/\", f\"*S{first_seq:05d}*\", _use_voview=False) as raw_dc:\n",
-    "\n",
     "    # Reading RAW data for plotting.\n",
     "    jf_raw = components.JUNGFRAU(raw_dc, detector_name=karabo_id).select_trains(\n",
     "            np.s_[:plot_trains]\n",
@@ -839,7 +845,12 @@
     "print(f\"The per pixel mean of the first {raw.shape[1]} trains of the first sequence file\")\n",
     "\n",
     "fig, ax = plt.subplots(figsize=(18, 10))\n",
-    "raw_mean = np.mean(raw, axis=1)\n",
+    "raw_mean = keep_data_dims(\n",
+    "    np.mean(raw, axis=1),\n",
+    "    geom.expected_data_shape,\n",
+    "    available_modules_indices,\n",
+    "    det_mod_start\n",
+    ")\n",
     "vmin, vmax = np.percentile(raw_mean, [5, 95])\n",
     "geom.plot_data_fast(\n",
     "    raw_mean,\n",
@@ -868,7 +879,12 @@
     "print(f\"The per pixel mean of the first {corrected.shape[1]} trains of the first sequence file\")\n",
     "\n",
     "fig, ax = plt.subplots(figsize=(18, 10))\n",
-    "corrected_mean = np.nanmean(corrected, axis=1)\n",
+    "corrected_mean = keep_data_dims(\n",
+    "    np.nanmean(corrected, axis=1),\n",
+    "    geom.expected_data_shape,\n",
+    "    available_modules_indices,\n",
+    "    det_mod_start\n",
+    ")\n",
     "vmin, vmax = np.nanpercentile(corrected_mean, [5, 95])\n",
     "\n",
     "mean_plot_kwargs = dict(vmin=vmin, vmax=vmax)\n",
@@ -904,7 +920,12 @@
     "fig, ax = plt.subplots(figsize=(18, 10))\n",
     "corrected_masked = corrected.copy()\n",
     "corrected_masked[mask != 0] = np.nan\n",
-    "corrected_masked_mean = np.nanmean(corrected_masked, axis=1)\n",
+    "corrected_masked_mean = keep_data_dims(\n",
+    "    np.nanmean(corrected_masked, axis=1),\n",
+    "    geom.expected_data_shape,\n",
+    "    available_modules_indices,\n",
+    "    det_mod_start\n",
+    ")\n",
     "del corrected_masked\n",
     "\n",
     "if not strixel_sensor:\n",
@@ -1059,7 +1080,13 @@
     "display(Markdown((f\"#### The per pixel maximum of train {tid} of the GAIN data\")))\n",
     "\n",
     "fig, ax = plt.subplots(figsize=(18, 10))\n",
-    "gain_max = np.max(gain_train_cells, axis=(1, 2))\n",
+    "gain_max = keep_data_dims(\n",
+    "    np.max(gain_train_cells, axis=(1, 2)),\n",
+    "    geom.expected_data_shape,\n",
+    "    available_modules_indices,\n",
+    "    det_mod_start\n",
+    ")\n",
+    "\n",
     "geom.plot_data_fast(\n",
     "    gain_max,\n",
     "    ax=ax,\n",
diff --git a/src/cal_tools/tools.py b/src/cal_tools/tools.py
index d9e5e5f9e..bc71d72bd 100644
--- a/src/cal_tools/tools.py
+++ b/src/cal_tools/tools.py
@@ -1065,3 +1065,44 @@ def raw_data_location_string(proposal: str, runs: List[int]):
             " a preceding 'p'. Example: 'p900203'")
 
     return f"proposal:{proposal} runs:{' '.join(map(str, runs))}"
+
+
+def keep_data_dims(data, expected_shape, data_model_indices, mod_st):
+    """
+    Keep detector dimensions as expected by extra_geom for plotting.
+    This is important in case a module is missing from
+    a multi-modular detector.
+
+    Args:
+        data (np.ndarray): The stacked detector data to extend in case of
+            missing modules.
+        expected_shape (tuple): Expected data shape which the input data
+            should match.
+        data_model_indices (list): Indices for available modules' data.
+        mod_st (int): The index of the first module.
+
+    Returns:
+        (ndarray): Return a stacked detector data based on the expected shape.
+    """
+    if data.shape == expected_shape:
+        return data
+
+    if np.issubdtype(data.dtype, np.integer):
+        fill_value = np.iinfo(data.dtype).min
+    else:
+        fill_value = np.nan
+
+    # Initialize the stacked detector data.
+    expected_data = np.full(
+        expected_shape, dtype=data.dtype, fill_value=fill_value)
+
+    for i, idx in enumerate(data_model_indices):
+        adjusted_idx = idx - mod_st
+        if 0 <= adjusted_idx < expected_shape[0]:
+            expected_data[adjusted_idx] = data[i]
+        else:
+            raise IndexError(
+                f"Index {adjusted_idx} derived from {idx} - {mod_st} is "
+                f"out of bounds for expected shape {expected_shape}")
+
+    return expected_data
-- 
GitLab


From 36b05e7802560f437fe8b341e7035664f8dca919 Mon Sep 17 00:00:00 2001
From: ahmedk <karim.ahmed@xfel.eu>
Date: Wed, 22 May 2024 15:19:00 +0200
Subject: [PATCH 2/8] test: add tests for keep_data_dims

---
 tests/test_cal_tools.py | 45 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/tests/test_cal_tools.py b/tests/test_cal_tools.py
index 577a82a4f..e77fec5f6 100644
--- a/tests/test_cal_tools.py
+++ b/tests/test_cal_tools.py
@@ -16,6 +16,7 @@ from cal_tools.tools import (
     get_dir_creation_date,
     get_from_db,
     get_pdu_from_db,
+    keep_data_dims,
     map_seq_files,
     module_index_to_qm,
     raw_data_location_string,
@@ -581,3 +582,47 @@ def test_raise_raw_data_location_string():
 
     with pytest.raises(ValueError):
         raw_data_location_string("900203", [9008, 9009, 9010])
+
+
+def test_keep_data_dims_no_missing_modules():
+    data = np.array([[1., 2.], [3., 4.], [5., 6.]])
+    expected_shape = (3, 2)
+    data_model_indices = [0, 1, 2]
+    mod_st = 0
+    result = keep_data_dims(data, expected_shape, data_model_indices, mod_st)
+    np.testing.assert_array_equal(result, data)
+
+
+def test_keep_data_dims_with_missing_modules():
+    data = np.array([[1, 2], [5, 6]])
+    expected_shape = (3, 2)
+    data_model_indices = [0, 2]
+    mod_st = 0
+    expected_result = np.array(
+        [
+            [1, 2],
+            [np.iinfo(data.dtype).min, np.iinfo(data.dtype).min],
+            [5, 6]
+        ])
+    result = keep_data_dims(data, expected_shape, data_model_indices, mod_st)
+    np.testing.assert_array_equal(result, expected_result)
+
+
+def test_keep_data_dims_different_mod_st():
+    data = np.array([[1., 2.], [5., 6.], [3., 4.],])
+    expected_shape = (5, 2)
+    data_model_indices = [1, 3, 4]
+    mod_st = 1
+    expected_result = np.array(
+        [[1., 2.], [np.nan, np.nan], [5., 6.], [3., 4.], [np.nan, np.nan]])
+    result = keep_data_dims(data, expected_shape, data_model_indices, mod_st)
+    np.testing.assert_array_equal(result, expected_result)
+
+
+def test_keep_data_dims_out_of_bounds_index():
+    data = np.array([[1, 2]])
+    expected_shape = (3, 2)
+    data_model_indices = [3]
+    mod_st = 0
+    with pytest.raises(IndexError):
+        keep_data_dims(data, expected_shape, data_model_indices, mod_st)
-- 
GitLab


From eb300727c961fbd56ad34013e039f790d47a7016 Mon Sep 17 00:00:00 2001
From: ahmedk <karim.ahmed@xfel.eu>
Date: Thu, 23 May 2024 07:55:12 +0200
Subject: [PATCH 3/8] fix: slipped bug in imports

---
 notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
index a4ed040bb..668a7a165 100644
--- a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
@@ -100,7 +100,7 @@
     "from cal_tools.step_timing import StepTimer\n",
     "from cal_tools.tools import (\n",
     "    calcat_creation_time,\n",
-    "    keep_data_dims,-p\n",
+    "    keep_data_dims,\n",
     "    map_seq_files,\n",
     "    write_constants_fragment,\n",
     ")\n",
-- 
GitLab


From d59052c40b8ff68cdce1a3ae9ea058a7099abdf1 Mon Sep 17 00:00:00 2001
From: ahmedk <karim.ahmed@xfel.eu>
Date: Thu, 23 May 2024 14:10:53 +0200
Subject: [PATCH 4/8] fix: no need to use keep_data_dims function and use
 available functionalities in extra

---
 ...Jungfrau_Gain_Correct_and_Verify_NBC.ipynb | 83 +++++++------------
 1 file changed, 28 insertions(+), 55 deletions(-)

diff --git a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
index 668a7a165..059e49b4b 100644
--- a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
@@ -100,7 +100,6 @@
     "from cal_tools.step_timing import StepTimer\n",
     "from cal_tools.tools import (\n",
     "    calcat_creation_time,\n",
-    "    keep_data_dims,\n",
     "    map_seq_files,\n",
     "    write_constants_fragment,\n",
     ")\n",
@@ -780,51 +779,45 @@
     "step_timer.start()\n",
     "first_seq = 0 if sequences == [-1] else sequences[0]\n",
     "\n",
-    "corrected_files = [\n",
+    "seq_corrected_files = [\n",
     "    out_folder / f for f in fnmatch.filter(corrected_files, f\"*{run}*S{first_seq:05d}*\")\n",
     "]\n",
-    "with DataCollection.from_paths(corrected_files) as corr_dc:\n",
+    "\n",
+    "# TODO: replace with CALCAT value.\n",
+    "if \"1M\" in karabo_id:\n",
+    "    nmods = 2\n",
+    "elif \"4M\" in karabo_id:\n",
+    "    nmods = 8\n",
+    "else:  # 500K\n",
+    "    nmods = 1\n",
+    "\n",
+    "with DataCollection.from_paths(seq_corrected_files) as corr_dc:\n",
     "    # Reading CORR data for plotting.\n",
     "    jf_corr = components.JUNGFRAU(\n",
     "        corr_dc,\n",
     "        detector_name=karabo_id,\n",
+    "        n_modules=nmods,\n",
     "    ).select_trains(np.s_[:plot_trains])\n",
     "    tid, jf_corr_data = next(iter(jf_corr.trains(require_all=True)))\n",
-    "det_mod_start = jf_corr._modnos_start_at  # TODO: replace with CALCAT value.\n",
-    "available_modules_indices = list(jf_corr.modno_to_source.keys())\n",
+    "\n",
     "# Shape = [modules, trains, cells, x, y]\n",
-    "corrected = jf_corr.get_array(\"data.adc\")[:, :, cell_idx_preview, ...].values\n",
-    "corrected_train = keep_data_dims(  # loose the train axis.\n",
-    "    jf_corr_data[\"data.adc\"][:, cell_idx_preview, ...].values,\n",
-    "    geom.expected_data_shape,\n",
-    "    available_modules_indices,\n",
-    "    det_mod_start\n",
-    ")\n",
+    "corrected = jf_corr[\"data.adc\"].ndarray(module_gaps=True)[:, :, cell_idx_preview, ...]\n",
+    "corrected_train = jf_corr_data[\"data.adc\"][:, cell_idx_preview, ...]  # loose the train axis.\n",
     "\n",
-    "mask = jf_corr.get_array(\"data.mask\")[:, :, cell_idx_preview, ...].values\n",
-    "mask_train = keep_data_dims(\n",
-    "    jf_corr_data[\"data.mask\"][:, cell_idx_preview, ...].values,\n",
-    "    geom.expected_data_shape,\n",
-    "    available_modules_indices,\n",
-    "    det_mod_start\n",
-    ")\n",
+    "mask = jf_corr[\"data.mask\"].ndarray(module_gaps=True)[:, :, cell_idx_preview, ...]\n",
+    "mask_train = jf_corr_data[\"data.mask\"][:, cell_idx_preview, ...]\n",
     "\n",
     "with RunDirectory(f\"{in_folder}/r{run:04d}/\", f\"*S{first_seq:05d}*\", _use_voview=False) as raw_dc:\n",
     "    # Reading RAW data for plotting.\n",
-    "    jf_raw = components.JUNGFRAU(raw_dc, detector_name=karabo_id).select_trains(\n",
-    "            np.s_[:plot_trains]\n",
-    "    )\n",
+    "    jf_raw = components.JUNGFRAU(\n",
+    "        raw_dc, detector_name=karabo_id, n_modules=nmods\n",
+    "        ).select_trains(np.s_[:plot_trains])\n",
     "\n",
-    "raw = jf_raw.get_array(\"data.adc\")[:, :, cell_idx_preview, ...].values\n",
-    "raw_train = (\n",
-    "    jf_raw.select_trains(by_id[[tid]])\n",
-    "    .get_array(\"data.adc\")[:, 0, cell_idx_preview, ...]\n",
-    "    .values\n",
-    ")\n",
+    "raw = jf_raw[\"data.adc\"].ndarray(module_gaps=True)[:, :, cell_idx_preview, ...]\n",
     "\n",
-    "gain = jf_raw.get_array(\"data.gain\")[:, :, cell_idx_preview, ...].values\n",
+    "gain = jf_raw[\"data.gain\"].ndarray(module_gaps=True)[:, :, cell_idx_preview, ...]\n",
     "gain_train_cells = (\n",
-    "    jf_raw.select_trains(by_id[[tid]]).get_array(\"data.gain\")[:, :, :, ...].values\n",
+    "    jf_raw.select_trains(by_id[[tid]], )[\"data.gain\"].ndarray(module_gaps=True)[:, :, :, ...]\n",
     ")\n",
     "step_timer.done_step(\"Prepared data for plotting\")"
    ]
@@ -845,13 +838,8 @@
     "print(f\"The per pixel mean of the first {raw.shape[1]} trains of the first sequence file\")\n",
     "\n",
     "fig, ax = plt.subplots(figsize=(18, 10))\n",
-    "raw_mean = keep_data_dims(\n",
-    "    np.mean(raw, axis=1),\n",
-    "    geom.expected_data_shape,\n",
-    "    available_modules_indices,\n",
-    "    det_mod_start\n",
-    ")\n",
-    "vmin, vmax = np.percentile(raw_mean, [5, 95])\n",
+    "raw_mean = np.nanmean(raw, axis=1)\n",
+    "vmin, vmax = np.nanpercentile(raw_mean, [5, 95])\n",
     "geom.plot_data_fast(\n",
     "    raw_mean,\n",
     "    ax=ax,\n",
@@ -879,12 +867,7 @@
     "print(f\"The per pixel mean of the first {corrected.shape[1]} trains of the first sequence file\")\n",
     "\n",
     "fig, ax = plt.subplots(figsize=(18, 10))\n",
-    "corrected_mean = keep_data_dims(\n",
-    "    np.nanmean(corrected, axis=1),\n",
-    "    geom.expected_data_shape,\n",
-    "    available_modules_indices,\n",
-    "    det_mod_start\n",
-    ")\n",
+    "corrected_mean = np.nanmean(corrected, axis=1)\n",
     "vmin, vmax = np.nanpercentile(corrected_mean, [5, 95])\n",
     "\n",
     "mean_plot_kwargs = dict(vmin=vmin, vmax=vmax)\n",
@@ -920,12 +903,7 @@
     "fig, ax = plt.subplots(figsize=(18, 10))\n",
     "corrected_masked = corrected.copy()\n",
     "corrected_masked[mask != 0] = np.nan\n",
-    "corrected_masked_mean = keep_data_dims(\n",
-    "    np.nanmean(corrected_masked, axis=1),\n",
-    "    geom.expected_data_shape,\n",
-    "    available_modules_indices,\n",
-    "    det_mod_start\n",
-    ")\n",
+    "corrected_masked_mean = np.nanmean(corrected_masked, axis=1)\n",
     "del corrected_masked\n",
     "\n",
     "if not strixel_sensor:\n",
@@ -1080,12 +1058,7 @@
     "display(Markdown((f\"#### The per pixel maximum of train {tid} of the GAIN data\")))\n",
     "\n",
     "fig, ax = plt.subplots(figsize=(18, 10))\n",
-    "gain_max = keep_data_dims(\n",
-    "    np.max(gain_train_cells, axis=(1, 2)),\n",
-    "    geom.expected_data_shape,\n",
-    "    available_modules_indices,\n",
-    "    det_mod_start\n",
-    ")\n",
+    "gain_max = np.max(gain_train_cells, axis=(1, 2))\n",
     "\n",
     "geom.plot_data_fast(\n",
     "    gain_max,\n",
-- 
GitLab


From d90f7f18fab2a7f0f6119604b750ac436ae39043 Mon Sep 17 00:00:00 2001
From: ahmedk <karim.ahmed@xfel.eu>
Date: Thu, 23 May 2024 14:11:17 +0200
Subject: [PATCH 5/8] test: remove test and unneeded function

---
 src/cal_tools/tools.py  | 41 -------------------------------------
 tests/test_cal_tools.py | 45 -----------------------------------------
 2 files changed, 86 deletions(-)

diff --git a/src/cal_tools/tools.py b/src/cal_tools/tools.py
index bc71d72bd..d9e5e5f9e 100644
--- a/src/cal_tools/tools.py
+++ b/src/cal_tools/tools.py
@@ -1065,44 +1065,3 @@ def raw_data_location_string(proposal: str, runs: List[int]):
             " a preceding 'p'. Example: 'p900203'")
 
     return f"proposal:{proposal} runs:{' '.join(map(str, runs))}"
-
-
-def keep_data_dims(data, expected_shape, data_model_indices, mod_st):
-    """
-    Keep detector dimensions as expected by extra_geom for plotting.
-    This is important in case a module is missing from
-    a multi-modular detector.
-
-    Args:
-        data (np.ndarray): The stacked detector data to extend in case of
-            missing modules.
-        expected_shape (tuple): Expected data shape which the input data
-            should match.
-        data_model_indices (list): Indices for available modules' data.
-        mod_st (int): The index of the first module.
-
-    Returns:
-        (ndarray): Return a stacked detector data based on the expected shape.
-    """
-    if data.shape == expected_shape:
-        return data
-
-    if np.issubdtype(data.dtype, np.integer):
-        fill_value = np.iinfo(data.dtype).min
-    else:
-        fill_value = np.nan
-
-    # Initialize the stacked detector data.
-    expected_data = np.full(
-        expected_shape, dtype=data.dtype, fill_value=fill_value)
-
-    for i, idx in enumerate(data_model_indices):
-        adjusted_idx = idx - mod_st
-        if 0 <= adjusted_idx < expected_shape[0]:
-            expected_data[adjusted_idx] = data[i]
-        else:
-            raise IndexError(
-                f"Index {adjusted_idx} derived from {idx} - {mod_st} is "
-                f"out of bounds for expected shape {expected_shape}")
-
-    return expected_data
diff --git a/tests/test_cal_tools.py b/tests/test_cal_tools.py
index e77fec5f6..577a82a4f 100644
--- a/tests/test_cal_tools.py
+++ b/tests/test_cal_tools.py
@@ -16,7 +16,6 @@ from cal_tools.tools import (
     get_dir_creation_date,
     get_from_db,
     get_pdu_from_db,
-    keep_data_dims,
     map_seq_files,
     module_index_to_qm,
     raw_data_location_string,
@@ -582,47 +581,3 @@ def test_raise_raw_data_location_string():
 
     with pytest.raises(ValueError):
         raw_data_location_string("900203", [9008, 9009, 9010])
-
-
-def test_keep_data_dims_no_missing_modules():
-    data = np.array([[1., 2.], [3., 4.], [5., 6.]])
-    expected_shape = (3, 2)
-    data_model_indices = [0, 1, 2]
-    mod_st = 0
-    result = keep_data_dims(data, expected_shape, data_model_indices, mod_st)
-    np.testing.assert_array_equal(result, data)
-
-
-def test_keep_data_dims_with_missing_modules():
-    data = np.array([[1, 2], [5, 6]])
-    expected_shape = (3, 2)
-    data_model_indices = [0, 2]
-    mod_st = 0
-    expected_result = np.array(
-        [
-            [1, 2],
-            [np.iinfo(data.dtype).min, np.iinfo(data.dtype).min],
-            [5, 6]
-        ])
-    result = keep_data_dims(data, expected_shape, data_model_indices, mod_st)
-    np.testing.assert_array_equal(result, expected_result)
-
-
-def test_keep_data_dims_different_mod_st():
-    data = np.array([[1., 2.], [5., 6.], [3., 4.],])
-    expected_shape = (5, 2)
-    data_model_indices = [1, 3, 4]
-    mod_st = 1
-    expected_result = np.array(
-        [[1., 2.], [np.nan, np.nan], [5., 6.], [3., 4.], [np.nan, np.nan]])
-    result = keep_data_dims(data, expected_shape, data_model_indices, mod_st)
-    np.testing.assert_array_equal(result, expected_result)
-
-
-def test_keep_data_dims_out_of_bounds_index():
-    data = np.array([[1, 2]])
-    expected_shape = (3, 2)
-    data_model_indices = [3]
-    mod_st = 0
-    with pytest.raises(IndexError):
-        keep_data_dims(data, expected_shape, data_model_indices, mod_st)
-- 
GitLab


From 8ffe7a6c28b5cd3d3114e1e9ae6df08f03b20772 Mon Sep 17 00:00:00 2001
From: ahmedk <karim.ahmed@xfel.eu>
Date: Mon, 27 May 2024 11:27:07 +0200
Subject: [PATCH 6/8] fix: move to using xarray for simplicity

---
 ...Jungfrau_Gain_Correct_and_Verify_NBC.ipynb | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
index 059e49b4b..92be5a793 100644
--- a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
@@ -801,23 +801,23 @@
     "    tid, jf_corr_data = next(iter(jf_corr.trains(require_all=True)))\n",
     "\n",
     "# Shape = [modules, trains, cells, x, y]\n",
-    "corrected = jf_corr[\"data.adc\"].ndarray(module_gaps=True)[:, :, cell_idx_preview, ...]\n",
+    "corrected = jf_corr.get_array(\"data.adc\")[:, :, cell_idx_preview, ...]\n",
     "corrected_train = jf_corr_data[\"data.adc\"][:, cell_idx_preview, ...]  # loose the train axis.\n",
     "\n",
-    "mask = jf_corr[\"data.mask\"].ndarray(module_gaps=True)[:, :, cell_idx_preview, ...]\n",
+    "mask = jf_corr.get_array(\"data.mask\")[:, :, cell_idx_preview, ...]\n",
     "mask_train = jf_corr_data[\"data.mask\"][:, cell_idx_preview, ...]\n",
     "\n",
     "with RunDirectory(f\"{in_folder}/r{run:04d}/\", f\"*S{first_seq:05d}*\", _use_voview=False) as raw_dc:\n",
     "    # Reading RAW data for plotting.\n",
     "    jf_raw = components.JUNGFRAU(\n",
-    "        raw_dc, detector_name=karabo_id, n_modules=nmods\n",
+    "        raw_dc, detector_name=karabo_id, n_modules=nmods, first_modno=jf_corr._modnos_start_at\n",
     "        ).select_trains(np.s_[:plot_trains])\n",
     "\n",
-    "raw = jf_raw[\"data.adc\"].ndarray(module_gaps=True)[:, :, cell_idx_preview, ...]\n",
+    "raw = jf_raw.get_array(\"data.adc\")[:, :, cell_idx_preview, ...]\n",
     "\n",
-    "gain = jf_raw[\"data.gain\"].ndarray(module_gaps=True)[:, :, cell_idx_preview, ...]\n",
+    "gain = jf_raw.get_array(\"data.gain\")[:, :, cell_idx_preview, ...]\n",
     "gain_train_cells = (\n",
-    "    jf_raw.select_trains(by_id[[tid]], )[\"data.gain\"].ndarray(module_gaps=True)[:, :, :, ...]\n",
+    "    jf_raw.select_trains(by_id[[tid]]).get_array(\"data.gain\")[:, :, :, ...]\n",
     ")\n",
     "step_timer.done_step(\"Prepared data for plotting\")"
    ]
@@ -838,7 +838,7 @@
     "print(f\"The per pixel mean of the first {raw.shape[1]} trains of the first sequence file\")\n",
     "\n",
     "fig, ax = plt.subplots(figsize=(18, 10))\n",
-    "raw_mean = np.nanmean(raw, axis=1)\n",
+    "raw_mean = raw.mean(skipna=True, axis=1)\n",
     "vmin, vmax = np.nanpercentile(raw_mean, [5, 95])\n",
     "geom.plot_data_fast(\n",
     "    raw_mean,\n",
@@ -867,7 +867,7 @@
     "print(f\"The per pixel mean of the first {corrected.shape[1]} trains of the first sequence file\")\n",
     "\n",
     "fig, ax = plt.subplots(figsize=(18, 10))\n",
-    "corrected_mean = np.nanmean(corrected, axis=1)\n",
+    "corrected_mean = corrected.mean(skipna=True, axis=1, keep_attrs=True)\n",
     "vmin, vmax = np.nanpercentile(corrected_mean, [5, 95])\n",
     "\n",
     "mean_plot_kwargs = dict(vmin=vmin, vmax=vmax)\n",
@@ -902,8 +902,8 @@
    "source": [
     "fig, ax = plt.subplots(figsize=(18, 10))\n",
     "corrected_masked = corrected.copy()\n",
-    "corrected_masked[mask != 0] = np.nan\n",
-    "corrected_masked_mean = np.nanmean(corrected_masked, axis=1)\n",
+    "corrected_masked.where(mask != 0, np.nan)\n",
+    "corrected_masked_mean = corrected_masked.mean(skipna=True, axis=1)\n",
     "del corrected_masked\n",
     "\n",
     "if not strixel_sensor:\n",
@@ -999,8 +999,8 @@
     "for i, mod in enumerate(karabo_da):\n",
     "    pdu = da_to_pdu[mod]\n",
     "    h, ex, ey = np.histogram2d(\n",
-    "        raw[i].flatten(),\n",
-    "        gain[i].flatten(),\n",
+    "        raw[i].values.flatten(),\n",
+    "        gain[i].values.flatten(),\n",
     "        bins=[100, 4],\n",
     "        range=[[0, 10000], [0, 4]],\n",
     "    )\n",
@@ -1029,7 +1029,7 @@
     "for i, mod in enumerate(karabo_da):\n",
     "    pdu = da_to_pdu[mod]\n",
     "    fig, axs = plt.subplots(nrows=2, ncols=1, figsize=(18, 10))\n",
-    "    corrected_flatten = corrected[i].flatten()\n",
+    "    corrected_flatten = corrected[i].values.flatten()\n",
     "    for ax, hist_range in zip(axs, [(-100, 1000), (-1000, 10000)]):\n",
     "        h = ax.hist(\n",
     "            corrected_flatten,\n",
@@ -1058,7 +1058,7 @@
     "display(Markdown((f\"#### The per pixel maximum of train {tid} of the GAIN data\")))\n",
     "\n",
     "fig, ax = plt.subplots(figsize=(18, 10))\n",
-    "gain_max = np.max(gain_train_cells, axis=(1, 2))\n",
+    "gain_max = gain_train_cells.max(skipna=True, axis=(1, 2))\n",
     "\n",
     "geom.plot_data_fast(\n",
     "    gain_max,\n",
-- 
GitLab


From 525daec3259e8043e688539237c8ae5863c2fabc Mon Sep 17 00:00:00 2001
From: ahmedk <karim.ahmed@xfel.eu>
Date: Mon, 27 May 2024 11:31:48 +0200
Subject: [PATCH 7/8] fix: remove unneeded changes

---
 .../Jungfrau_Gain_Correct_and_Verify_NBC.ipynb        | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
index 92be5a793..49feebf94 100644
--- a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
@@ -783,20 +783,11 @@
     "    out_folder / f for f in fnmatch.filter(corrected_files, f\"*{run}*S{first_seq:05d}*\")\n",
     "]\n",
     "\n",
-    "# TODO: replace with CALCAT value.\n",
-    "if \"1M\" in karabo_id:\n",
-    "    nmods = 2\n",
-    "elif \"4M\" in karabo_id:\n",
-    "    nmods = 8\n",
-    "else:  # 500K\n",
-    "    nmods = 1\n",
-    "\n",
     "with DataCollection.from_paths(seq_corrected_files) as corr_dc:\n",
     "    # Reading CORR data for plotting.\n",
     "    jf_corr = components.JUNGFRAU(\n",
     "        corr_dc,\n",
     "        detector_name=karabo_id,\n",
-    "        n_modules=nmods,\n",
     "    ).select_trains(np.s_[:plot_trains])\n",
     "    tid, jf_corr_data = next(iter(jf_corr.trains(require_all=True)))\n",
     "\n",
@@ -810,7 +801,7 @@
     "with RunDirectory(f\"{in_folder}/r{run:04d}/\", f\"*S{first_seq:05d}*\", _use_voview=False) as raw_dc:\n",
     "    # Reading RAW data for plotting.\n",
     "    jf_raw = components.JUNGFRAU(\n",
-    "        raw_dc, detector_name=karabo_id, n_modules=nmods, first_modno=jf_corr._modnos_start_at\n",
+    "        raw_dc, detector_name=karabo_id\n",
     "        ).select_trains(np.s_[:plot_trains])\n",
     "\n",
     "raw = jf_raw.get_array(\"data.adc\")[:, :, cell_idx_preview, ...]\n",
-- 
GitLab


From b8aae122d04283bb23a55f5aab40d14d39036119 Mon Sep 17 00:00:00 2001
From: ahmedk <karim.ahmed@xfel.eu>
Date: Mon, 27 May 2024 13:31:39 +0200
Subject: [PATCH 8/8] remove unavailable da from karabo_da

---
 .../Jungfrau_Gain_Correct_and_Verify_NBC.ipynb | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
index 49feebf94..cdb69ea35 100644
--- a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
@@ -426,6 +426,9 @@
     "        else:\n",
     "            table.append((fi, \"\", k,  f))\n",
     "        fi += 1\n",
+    "# remove da which doesnt have data.\n",
+    "karabo_da = [da for da in karabo_da if da in mapped_files and mapped_files[da]]\n",
+    "\n",
     "md = display(Latex(tabulate.tabulate(\n",
     "    table, tablefmt='latex',\n",
     "    headers=[\"#\", \"module\", \"# module\", \"file\"])))"
@@ -783,11 +786,20 @@
     "    out_folder / f for f in fnmatch.filter(corrected_files, f\"*{run}*S{first_seq:05d}*\")\n",
     "]\n",
     "\n",
+    "# TODO: replace with CALCAT value.\n",
+    "if \"1M\" in karabo_id:\n",
+    "    nmods = 2\n",
+    "elif \"4M\" in karabo_id:\n",
+    "    nmods = 8\n",
+    "else:  # 500K\n",
+    "    nmods = 1\n",
+    "\n",
     "with DataCollection.from_paths(seq_corrected_files) as corr_dc:\n",
     "    # Reading CORR data for plotting.\n",
     "    jf_corr = components.JUNGFRAU(\n",
     "        corr_dc,\n",
     "        detector_name=karabo_id,\n",
+    "        n_modules=nmods,\n",
     "    ).select_trains(np.s_[:plot_trains])\n",
     "    tid, jf_corr_data = next(iter(jf_corr.trains(require_all=True)))\n",
     "\n",
@@ -801,8 +813,10 @@
     "with RunDirectory(f\"{in_folder}/r{run:04d}/\", f\"*S{first_seq:05d}*\", _use_voview=False) as raw_dc:\n",
     "    # Reading RAW data for plotting.\n",
     "    jf_raw = components.JUNGFRAU(\n",
-    "        raw_dc, detector_name=karabo_id\n",
-    "        ).select_trains(np.s_[:plot_trains])\n",
+    "        raw_dc,\n",
+    "        detector_name=karabo_id,\n",
+    "        n_modules=nmods,\n",
+    "    ).select_trains(np.s_[:plot_trains])\n",
     "\n",
     "raw = jf_raw.get_array(\"data.adc\")[:, :, cell_idx_preview, ...]\n",
     "\n",
-- 
GitLab