From 4fae716d13b8ff9a0f87ee500cebd8f949d8fef5 Mon Sep 17 00:00:00 2001 From: ahmedk <karim.ahmed@xfel.eu> Date: Thu, 2 Dec 2021 15:43:35 +0100 Subject: [PATCH] apply suggested refactors --- ...Jungfrau_Gain_Correct_and_Verify_NBC.ipynb | 35 ++++++-------- ...rk_analysis_all_gains_burst_mode_NBC.ipynb | 2 +- src/cal_tools/tools.py | 47 +++++++++++-------- 3 files changed, 44 insertions(+), 40 deletions(-) diff --git a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb index 5e64c6ae0..bca077b80 100644 --- a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb +++ b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb @@ -153,17 +153,16 @@ "print(f\"Processing a total of {num_seq_files} sequence files\")\n", "table = []\n", "fi = 0\n", - "if len(mapped_files) > 0: # create table\n", - " for kda, sfiles in mapped_files.items():\n", - " for k, f in enumerate(sfiles):\n", - " if k == 0:\n", - " table.append((fi, kda, k, f))\n", - " else:\n", - " table.append((fi, \"\", k, f))\n", - " fi += 1\n", - " md = display(Latex(tabulate.tabulate(\n", - " table, tablefmt='latex',\n", - " headers=[\"#\", \"module\", \"# module\", \"file\"])))" + "for kda, sfiles in mapped_files.items():\n", + " for k, f in enumerate(sfiles):\n", + " if k == 0:\n", + " table.append((fi, kda, k, f))\n", + " else:\n", + " table.append((fi, \"\", k, f))\n", + " fi += 1\n", + "md = display(Latex(tabulate.tabulate(\n", + " table, tablefmt='latex',\n", + " headers=[\"#\", \"module\", \"# module\", \"file\"])))" ] }, { @@ -318,7 +317,10 @@ "\n", " if 0 <= index < plt_images:\n", " r_data[index, ...] = d[0, ...]\n", - "\n", + " if memory_cells == 1:\n", + " g_data[index, ...] = g\n", + " else:\n", + " g_data[index, ...] = g[1, ...]\n", " # Select memory cells\n", " \n", " # TODO: This needs to be revisited.\n", @@ -350,13 +352,6 @@ "\n", " msk = np.choose(g, np.moveaxis(mask_cell, -1, 0))\n", "\n", - " # Store sample of data for plotting\n", - " if 0 <= index < plt_images:\n", - " if memory_cells == 1:\n", - " g_data[index, ...] = g\n", - " else:\n", - " g_data[index, ...] = g[1, ...]\n", - "\n", " data_corr[index, ...] = d\n", " mask_corr[index, ...] = msk" ] @@ -418,7 +413,7 @@ "\n", " sensor_size = dshape[1:]\n", " # load number of data available, including trains with empty data.\n", - " n_trains = seq_dc.get_data_counts(instrument_src_kda, \"data.adc\").shape[0]\n", + " n_trains = len(seq_dc.train_ids)\n", "\n", " n_imgs = dshape[0]\n", "\n", diff --git a/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb b/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb index e24b0cd43..8391912ab 100644 --- a/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb +++ b/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb @@ -217,7 +217,7 @@ " # load shape of data for memory cells, and detector size (imgs, cells, x, y)\n", " n_imgs = run_dc[instrument_src, \"data.adc\"].shape[0]\n", " # load number of data available, including trains with empty data.\n", - " n_trains = run_dc.get_data_counts(instrument_src, \"data.adc\").shape[0]\n", + " n_trains = len(run_dc.train_ids)\n", " instr_dc = run_dc.select(instrument_src, require_all=True)\n", " if n_trains-n_imgs != 0:\n", " print(\n", diff --git a/src/cal_tools/tools.py b/src/cal_tools/tools.py index 0efce13d7..4613e111f 100644 --- a/src/cal_tools/tools.py +++ b/src/cal_tools/tools.py @@ -61,29 +61,38 @@ def run_prop_seq_from_path(filename): def map_seq_files( run_dc: "extra_data.DataCollection", karabo_id: str, - karabo_da: List[str], - sequences: List[int], + karabo_das: List[str], + sequences: Optional[List[int]] = None, ) -> Tuple[dict, int]: - """ - Using a DataCollection from extra-data collect + + """Using a DataCollection from extra-data to read available sequence files. + + Returns: + Dict: with karabo_das keys and the corresponding sequence files. + Int: for number of all sequence files for all karabo_das to process. """ - num_seq_files = 0 - mapped_files = dict() - sequence_files = [ - Path(f.filename) for f in run_dc.select(f"*{karabo_id}*").files] - for kda in karabo_da: - if sequences != [-1]: - seq_files = [ - f for f in sequence_files if any( - f.match(f"*-{kda}-S{s:05d}.h5") for s in sequences)] - else: - seq_files = [ - f for f in sequence_files if f.match(f"*-{kda}*.h5")] - num_seq_files += len(seq_files) - mapped_files[kda] = seq_files - return mapped_files, num_seq_files + if sequences == [-1]: + sequences = None + if sequences is not None: + sequences = set(int(seq) for seq in sequences) + + seq_fn_pat = re.compile(r".*-(?P<da>.*?)-S(?P<seq>.*?)\.h5") + + mapped_files = {kda: [] for kda in karabo_das} + total_files = 0 + for fn in run_dc.select(f"*{karabo_id}*").files: + fn = Path(fn.filename) + if (match := seq_fn_pat.match(fn.name)) is not None: + da = match.group("da") + if da in mapped_files and ( + sequences is None or int(match.group("seq")) in sequences + ): + mapped_files[da].append(fn) + total_files += 1 + + return mapped_files, total_files def map_modules_from_folder(in_folder, run, path_template, karabo_da, -- GitLab