diff --git a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb index d4fd0bda44be06b09d09c0fa9b6f5dfcc8829ec2..0b5ecb7ea2ada77c4cd3e5add9acec6cd0bf3690 100644 --- a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb +++ b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb @@ -41,7 +41,7 @@ "overwrite = True # set to True if existing data should be overwritten\n", "relative_gain = True # do relative gain correction\n", "plt_images = 100 # Number of images to plot after applying selected corrections.\n", - "limit_images = 20 # ONLY FOR TESTING. process only first N images, Use 0 to process all.\n", + "limit_images = 0 # ONLY FOR TESTING. process only first N images, Use 0 to process all.\n", "\n", "\n", "# Parameters for retrieving calibration constants\n", @@ -87,7 +87,7 @@ "from cal_tools.tools import (\n", " get_constant_from_db_and_time,\n", " get_dir_creation_date,\n", - " map_modules_from_folder,\n", + " map_seq_files,\n", " write_compressed_frames,\n", ")\n", "from iCalibrationDB import Conditions, Constants\n", @@ -114,9 +114,6 @@ "else:\n", " out_folder.mkdir(parents=True, exist_ok=True)\n", "\n", - "if sequences[0] == -1:\n", - " sequences = None\n", - "\n", "print(f\"Run is: {run}\")\n", "print(f\"Instrument H5File source: {instrument_src}\")\n", "print(f\"Process modules: {karabo_da}\")\n", @@ -136,40 +133,35 @@ "metadata": {}, "outputs": [], "source": [ - "# set everything up filewise\n", - "mapped_files, _, total_sequences, _, _ = map_modules_from_folder(\n", - " in_folder=in_folder,\n", - " run=run,\n", - " path_template=path_template,\n", - " karabo_da=karabo_da,\n", - " sequences=sequences,\n", - " qm_naming=False,\n", - ")\n", + "# Read available sequence files to correct.\n", + "mapped_files, num_seq_files = map_seq_files(\n", + " run_dc, karabo_id, karabo_da, sequences)\n", "\n", - "print(f\"Processing a total of {total_sequences} sequence files\")\n", + "if not len(mapped_files):\n", + " raise IndexError(\n", + " \"No sequence files available to correct for the selected sequences and karabo_da.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(f\"Processing a total of {num_seq_files} sequence files\")\n", "table = []\n", "fi = 0\n", - "if total_sequences > 0: # create table\n", - " for i, key in enumerate(mapped_files):\n", - " for k, f in enumerate(list(mapped_files[key].queue)):\n", + "if len(mapped_files) > 0: # create table\n", + " for kda, sfiles in mapped_files.items():\n", + " for k, f in enumerate(sfiles):\n", " if k == 0:\n", - " table.append((fi, karabo_da[i], k, f))\n", + " table.append((fi, kda, k, f))\n", " else:\n", " table.append((fi, \"\", k, f))\n", " fi += 1\n", " md = display(Latex(tabulate.tabulate(\n", " table, tablefmt='latex',\n", - " headers=[\"#\", \"module\", \"# module\", \"file\"])))\n", - "\n", - "# restore the queue\n", - "mapped_files, _, total_sequences, _, _ = map_modules_from_folder(\n", - " in_folder=in_folder,\n", - " run=run,\n", - " path_template=path_template,\n", - " karabo_da=karabo_da,\n", - " sequences=sequences,\n", - " qm_naming=False,\n", - ")" + " headers=[\"#\", \"module\", \"# module\", \"file\"])))" ] }, { @@ -332,6 +324,10 @@ " r_data[index, ...] = d[0, ...]\n", "\n", " # Select memory cells\n", + " \n", + " # TODO: This needs to be revisited.\n", + " # As this result in copying data to a new array on every train,\n", + " # even when there's the same pattern of memory cells on every train.\n", " if memory_cells > 1:\n", " m[m>16] = 0\n", " offset_map_cell = offset_map[m, ...]\n", @@ -404,8 +400,9 @@ "for local_karabo_da, mapped_files_module in mapped_files.items():\n", " instrument_src_kda = instrument_src.format(int(local_karabo_da[-2:]))\n", " data_path = \"INSTRUMENT/\"+instrument_src_kda+\"/data\"\n", + " \n", "\n", - " for sequence_file_number, sequence_file in enumerate(mapped_files_module.queue): # noqa\n", + " for sequence_file in mapped_files_module: # noqa\n", " sequence_file = Path(sequence_file)\n", " seq_dc = H5File(sequence_file)\n", "\n", @@ -418,7 +415,7 @@ " dshape = seq_dc[instrument_src_kda, \"data.adc\"].shape\n", "\n", " if dshape[0] == 0:\n", - " print(f\"\\tWARNING: No image data for {out_file}: data shape is {dshape}\")\n", + " print(f\"\\t- WARNING: No image data for {out_file}: data shape is {dshape}\")\n", " continue\n", "\n", " sensor_size = dshape[1:]\n", @@ -433,7 +430,7 @@ "\n", " print(f\"\\nNumber of images to correct: {n_imgs} for {out_file}\")\n", " if n_trains - dshape[0] != 0:\n", - " print(f\"\\tWARNING: {sequence_file} has {n_trains - dshape[0]} \"\n", + " print(f\"\\t- WARNING: {sequence_file} has {n_trains - dshape[0]} \"\n", " \"trains with empty data.\")\n", "\n", " # Just in case if n_imgs is less than the chosen plt_images.\n", diff --git a/src/cal_tools/tools.py b/src/cal_tools/tools.py index c7c3c249be450366826b68d42a8d73d3e1fb645f..0efce13d75525273517eaa9420167bdfe1930c2f 100644 --- a/src/cal_tools/tools.py +++ b/src/cal_tools/tools.py @@ -58,8 +58,36 @@ def run_prop_seq_from_path(filename): return run, proposal, sequence +def map_seq_files( + run_dc: "extra_data.DataCollection", + karabo_id: str, + karabo_da: List[str], + sequences: List[int], +) -> Tuple[dict, int]: + """ + Using a DataCollection from extra-data collect + available sequence files. + """ + num_seq_files = 0 + mapped_files = dict() + sequence_files = [ + Path(f.filename) for f in run_dc.select(f"*{karabo_id}*").files] + for kda in karabo_da: + if sequences != [-1]: + seq_files = [ + f for f in sequence_files if any( + f.match(f"*-{kda}-S{s:05d}.h5") for s in sequences)] + else: + seq_files = [ + f for f in sequence_files if f.match(f"*-{kda}*.h5")] + num_seq_files += len(seq_files) + mapped_files[kda] = seq_files + + return mapped_files, num_seq_files + + def map_modules_from_folder(in_folder, run, path_template, karabo_da, - sequences=None, qm_naming=True): + sequences=None): """ Prepare queues of files to process. Queues are stored in dictionary with module name Q{}M{} as a key @@ -69,9 +97,7 @@ def map_modules_from_folder(in_folder, run, path_template, karabo_da, :param path_template: Template for file name e.g. `RAW-R{:04d}-{}-S{:05d}.h5` :param karabo_da: List of data aggregators e.g. [AGIPD00, AGIPD01] - :param sequences: List of sequences to be considered. - :param qm_naming: Flag to use Q{}M{} naming convention for dict keys, - instead of the karabo-da. + :param sequences: List of sequences to be considered :return: Dictionary of queues of files, dictionary of module indexes, total number of sequences, dictionary of number of sequences per module """ @@ -82,10 +108,7 @@ def map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences_qm = {} for inset in karabo_da: module_idx = int(inset[-2:]) - if qm_naming: - name = module_index_to_qm(module_idx) - else: - name = inset + name = module_index_to_qm(module_idx) module_files[name] = Queue() sequences_qm[name] = 0 mod_ids[name] = module_idx