remove unavailable da from karabo_da

b8aae122 · Karim Ahmed · 525daec3 · b8aae122
Commit b8aae122 authored 9 months ago by Karim Ahmed
--- a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
@@ -426,6 +426,9 @@
    "        else:\n",
    "            table.append((fi, \"\", k,  f))\n",
    "        fi += 1\n",
+    "# remove da which doesnt have data.\n",
+    "karabo_da = [da for da in karabo_da if da in mapped_files and mapped_files[da]]\n",
+    "\n",
    "md = display(Latex(tabulate.tabulate(\n",
    "    table, tablefmt='latex',\n",
    "    headers=[\"#\", \"module\", \"# module\", \"file\"])))"
@@ -783,11 +786,20 @@
    "    out_folder / f for f in fnmatch.filter(corrected_files, f\"*{run}*S{first_seq:05d}*\")\n",
    "]\n",
    "\n",
+    "# TODO: replace with CALCAT value.\n",
+    "if \"1M\" in karabo_id:\n",
+    "    nmods = 2\n",
+    "elif \"4M\" in karabo_id:\n",
+    "    nmods = 8\n",
+    "else:  # 500K\n",
+    "    nmods = 1\n",
+    "\n",
    "with DataCollection.from_paths(seq_corrected_files) as corr_dc:\n",
    "    # Reading CORR data for plotting.\n",
    "    jf_corr = components.JUNGFRAU(\n",
    "        corr_dc,\n",
    "        detector_name=karabo_id,\n",
+    "        n_modules=nmods,\n",
    "    ).select_trains(np.s_[:plot_trains])\n",
    "    tid, jf_corr_data = next(iter(jf_corr.trains(require_all=True)))\n",
    "\n",
@@ -801,8 +813,10 @@
    "with RunDirectory(f\"{in_folder}/r{run:04d}/\", f\"*S{first_seq:05d}*\", _use_voview=False) as raw_dc:\n",
    "    # Reading RAW data for plotting.\n",
    "    jf_raw = components.JUNGFRAU(\n",
-    "        raw_dc, detector_name=karabo_id\n",
-    "        ).select_trains(np.s_[:plot_trains])\n",
+    "        raw_dc,\n",
+    "        detector_name=karabo_id,\n",
+    "        n_modules=nmods,\n",
+    "    ).select_trains(np.s_[:plot_trains])\n",
    "\n",
    "raw = jf_raw.get_array(\"data.adc\")[:, :, cell_idx_preview, ...]\n",
    "\n",

 %% Cell type:markdown id: tags:

 # Jungfrau Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the Jungfrau Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/HED/202331/p900360/raw"  # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/remove/jungfrau"  # the folder to output to, required
 run = 20  # run to process, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1]  # sequences to correct, set to [-1] for all, range allowed
 sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel

 # Parameters used to access raw data.
 karabo_id = "HED_IA1_JF500K4"  # karabo prefix of Jungfrau devices
 karabo_da = ['JNGFR04']  # data aggregators
 receiver_template = "JNGFR{:02d}"  # Detector receiver template for accessing raw data files. e.g. "JNGFR{:02d}"
 instrument_source_template = '{}/DET/{}:daqOutput'  # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = '{}/DET/CONTROL'  # template for control source name (filled with karabo_id_control)
 karabo_id_control = ""  # if control is on a different ID, set to empty string if it is the same a karabo-id

 # Parameters for calibration database.
 cal_db_interface = "tcp://max-exfl-cal001:8017#8025" # the database interface to use
 cal_db_timeout = 180000  # timeout on caldb requests
 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"

 # Parameters affecting corrected data.
 relative_gain = True  # do relative gain correction.
 strixel_sensor = ""  # reordering for strixel detector layout. Possible strixels to choose from are A0123 and A1256.
 strixel_double_norm = 2.0  # normalization to use for double-size pixels, only applied for strixel sensors.
 limit_trains = 0  # ONLY FOR TESTING. process only first N trains, Use 0 to process all.
 chunks_ids = 32  # HDF chunk size for memoryCell and frameNumber.
 chunks_data = 1  # HDF chunk size for pixel data in number of frames.
 wrong_gain_pixels = [-1]  # List of 5 values (e.g. [4, 0, 255, 896, 1024]) defining the module number (4 for JNGFR04). And using the indexes of the FEM row [pixel_x_0:pixel_x_1] and column [pixel_y_0:pixel_y_1]. Set to -1 to not pick pixels for gain replacement.
 replace_wrong_gain_value = 0  # Force gain value into the chosen gain [0, 1, or 2] for pixels specified in `wrong_gain_pixels`. This has no effect if wrong_gain_pixels = [-1]

 # Parameters for retrieving calibration constants
 integration_time = -1  # integration time in us. set to -1 to overwrite by value in file.
 gain_setting = -1  # 0 for dynamic gain, 1 for dynamic HG0. set to -1 to overwrite by value in file.
 gain_mode = -1  # 0 for runs with dynamic gain setting, 1 for fixed gain. Set to -1 to overwrite by value in file.
 mem_cells = -1  # Set mem_cells to -1 to automatically use the value stored in RAW data.
 bias_voltage = -1  # Bias Voltage. Set to -1 to overwrite by value in file.

 # Parameters for plotting
 skip_plots = False  # exit after writing corrected files
 plot_trains = 500  # Number of trains to plot for RAW and CORRECTED plots. Set to -1 to automatically plot all trains.
 cell_id_preview = 15  # cell Id used for preview in single-shot plots

 # Parameters for ROI selection and reduction
 roi_definitions = [-1]  # List with groups of 6 values defining ROIs, e.g. [3, 120, 180, 200, 550, -2] for module 3 (JNGFR03), slice 120:180, 200:550, average along axis -2 (slow scan, or -1 for fast scan)
 roi_threshold = 2.  # Corrected pixels below the threshold will be excluded from ROI projections. Set to -1 to include all pixels.


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import fnmatch
 import multiprocessing
 import sys
 import warnings
 from logging import warning
 from pathlib import Path

 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
 import pasha as psh
 import tabulate
 from extra_data import DataCollection, H5File, RunDirectory, by_id, components
 from IPython.display import Latex, Markdown, display
 from matplotlib.colors import LogNorm

 import cal_tools.restful_config as rest_cfg
 from cal_tools.calcat_interface import JUNGFRAU_CalibrationData
 from cal_tools.enums import BadPixels
 from cal_tools.files import DataFile
 from cal_tools.jungfrau.jungfraulib import JungfrauCtrl
 from cal_tools.plotting import init_jungfrau_geom
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    calcat_creation_time,
    map_seq_files,
    write_constants_fragment,
 )

 warnings.filterwarnings('ignore')

 matplotlib.use('agg')
 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_folder = in_folder / f'r{run:04d}'
 run_dc = RunDirectory(run_folder)
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)

 out_folder.mkdir(parents=True, exist_ok=True)

 print(f"Run is: {run}")
 print(f"Instrument H5File source: {instrument_src}")
 karabo_da = sorted(karabo_da)
 print(f"Process modules: {karabo_da}")

 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f"Creation time: {creation_time}")

 if karabo_id_control == "":
    karabo_id_control = karabo_id

 if any(axis_no not in {-2, -1, 2, 3} for axis_no in roi_definitions[5::6]):
    print("ROI averaging must be on axis 2/3 (or equivalently -2/-1). "
          f"Axis numbers given: {roi_definitions[5::6]}")
    sys.exit(1)
 ```

 %% Cell type:code id: tags:

 ``` python
 ctrl_src = ctrl_source_template.format(karabo_id_control)
 ctrl_data = JungfrauCtrl(run_dc, ctrl_src)

 if mem_cells < 0:
    memory_cells, sc_start = ctrl_data.get_memory_cells()

    mem_cells_name = "single cell" if memory_cells == 1 else "burst"
    print(f"Number of memory cells are {memory_cells}")
    print(f"Run is in {mem_cells_name} mode.\nStorage cell start: {sc_start:02d}")
 else:
    memory_cells = mem_cells
    mem_cells_name = "single cell" if memory_cells == 1 else "burst"
    print(f"Run is in manually set to {mem_cells_name} mode. With {memory_cells} memory cells")

 if integration_time < 0:
    integration_time = ctrl_data.get_integration_time()
    print(f"Integration time is {integration_time} us")
 else:
    print(f"Integration time is manually set to {integration_time} us")

 if bias_voltage < 0:
    bias_voltage = ctrl_data.get_bias_voltage()
    print(f"Bias voltage is {bias_voltage} V")
 else:
    print(f"Bias voltage is manually set to {bias_voltage} V.")

 if gain_setting < 0:
    gain_setting = ctrl_data.get_gain_setting()
    print(f"Gain setting is {gain_setting} (run settings: {ctrl_data.run_settings})")
 else:
    print(f"Gain setting is manually set to {gain_setting}.")

 force_fixed_gain_constants_flag = False
 if gain_mode < 0:
    gain_mode = ctrl_data.get_gain_mode()
    print(f"Gain mode is {gain_mode} ({ctrl_data.run_mode})")
    # JF corrections in burst mode are only supported when no gain switching occurs.
    # Always retrieve fixed gain constant for burst mode.
    if gain_mode == 0 and memory_cells > 1:
        print("By default fixed gain constant will be retrieved for burst mode data,"
            " even for dynamic gain data.")
        force_fixed_gain_constants_flag = True
 else:
    print(f"Gain mode is manually set to {gain_mode}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 def jungfrau_cal_mdata(gm):
    jf_cal = JUNGFRAU_CalibrationData(
        detector_name=karabo_id,
        sensor_bias_voltage=bias_voltage,
        event_at=creation_time,
        modules=karabo_da,
        memory_cells=memory_cells,
        integration_time=integration_time,
        gain_setting=gain_setting,
        gain_mode=gm,
        client=rest_cfg.calibration_client(),
    )

    constant_names = ["Offset10Hz", "BadPixelsDark10Hz"]
    if relative_gain:
        constant_names += ["BadPixelsFF10Hz", "RelativeGain10Hz"]
    jf_metadata = jf_cal.metadata(calibrations=constant_names)
    # Display retrieved calibration constants timestamps
    jf_cal.display_markdown_retrieved_constants(metadata=jf_metadata)
    return jf_cal, jf_metadata

 def force_fixed_gain_constants():
    """JF corrections in burst mode are only supported when
    no gain switching occurs. Always retrieve fixed gain
    constant for burst mode.
    https://git.xfel.eu/calibration/planning/-/issues/196

    Returns:
        dict: The metadata with the jungfrau retrieved constants.
            {mod: {cname: ccv_metadata}}
    """
    from datetime import datetime

    from cal_tools.calcat_interface import CalCatError

    try:
        jf_cal, jf_metadata = jungfrau_cal_mdata(gm=1)
    except CalCatError as e:
        warning(
            "No fixed gain constants found. "
            "Looking for dynamic gain constant. "
            f"(CalCatError: {e}.")
    else:
        return jf_cal, jf_metadata

    # In case of CALCATError exception look for dynamic gain constants
    jf_cal, jf_metadata = jungfrau_cal_mdata(gm=0)

    for mod, ccvs in jf_metadata.items():
        offset = ccvs.get("Offset10Hz")
        if not offset:  # This module wont be corrected later after validating constants.
            continue
        time_difference = creation_time - datetime.fromisoformat(offset["begin_validity_at"])
        if abs(time_difference.days) > 3:
            warning(
                f"No dynamic gain constant retrieved for {mod} with at least"
                " 3 days time difference with the RAW data creation date."
                " Please make sure there are available constants.")
            jf_metadata[mod].pop("Offset10Hz")

    return jf_cal, jf_metadata
 ```

 %% Cell type:markdown id: tags:

 ### Retrieving calibration constants

 %% Cell type:code id: tags:

 ``` python
 if force_fixed_gain_constants_flag:
    jf_cal, jf_metadata = force_fixed_gain_constants()
 else:
    jf_cal, jf_metadata = jungfrau_cal_mdata(gain_mode)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Validate the constants availability and raise/warn correspondingly.
 for mod in karabo_da[:]:
    calibrations = jf_metadata.get(mod, {})

    missing_dark_constants = {"Offset10Hz", "BadPixelsDark10Hz"} - set(calibrations)
    missing_gain_constants = {"BadPixelsFF10Hz", "RelativeGain10Hz"} - set(calibrations)

    if missing_dark_constants:
        warning(
            f"Dark constants {missing_dark_constants} are not available to correct {mod}."
            f" Module {mod} won't be corrected.")
        karabo_da.remove(mod)

    if relative_gain and missing_gain_constants:
        warning(f"Gain constants {missing_gain_constants} were not retrieved for {mod}."
                " No Relative gain correction for this module")
 if not karabo_da:  # Dark constants are missing for all modules.
    raise ValueError("Dark constants are missing for all modules.")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Record constant details in YAML metadata
 write_constants_fragment(
    out_folder=(metadata_folder or out_folder),
    det_metadata=jf_metadata,
    caldb_root=jf_cal.caldb_root)


 # load constants arrays after storing fragment YAML file
 # and validating constants availability.
 const_data = jf_cal.ndarray_map(metadata=jf_metadata)

 # For plotting
 da_to_pdu = {}
 for mod_info in jf_cal.physical_detector_units.values():
    da_to_pdu[mod_info["karabo_da"]] = mod_info["physical_name"]
 ```

 %% Cell type:code id: tags:

 ``` python
 def prepare_constants(module: str):
    """Prepare constant arrays.

    :param module: The module name (karabo_da)
    :return:
        offset_map (offset map),
        mask (mask of bad pixels),
        gain_map (map of relative gain factors),
        module (name of module),
    """
    constant_arrays = const_data[module]
    offset_map = constant_arrays["Offset10Hz"]
    mask = constant_arrays["BadPixelsDark10Hz"]

    gain_map = constant_arrays.get("RelativeGain10Hz")
    mask_ff = constant_arrays.get("BadPixelsFF10Hz")

    # Combine masks
    if mask_ff is not None:
        mask |= np.moveaxis(mask_ff, 0, 1)

    if memory_cells > 1:
        # move from x, y, cell, gain to cell, x, y, gain
        offset_map = np.moveaxis(offset_map, [0, 1], [1, 2])
        mask = np.moveaxis(mask, [0, 1], [1, 2])
    else:
        offset_map = np.squeeze(offset_map)
        mask = np.squeeze(mask)

    # masking double size pixels
    mask[..., [255, 256], :, :] |= BadPixels.NON_STANDARD_SIZE
    mask[..., [255, 256, 511, 512, 767, 768], :] |= BadPixels.NON_STANDARD_SIZE

    if gain_map is not None:
        if memory_cells > 1:
            gain_map = np.moveaxis(gain_map, [0, 2], [2, 0])
            # add extra empty cell constant
            b = np.ones(((1,)+gain_map.shape[1:]))
            gain_map = np.concatenate((gain_map, b), axis=0)
        else:
            gain_map = np.moveaxis(np.squeeze(gain_map), 1, 0)

    return offset_map, mask, gain_map, module

 with multiprocessing.Pool() as pool:
    r = pool.map(prepare_constants, karabo_da)

 # Print timestamps for the retrieved constants.
 constants = {}
 for offset_map, mask, gain_map, k_da in r:

    constants[k_da] = (offset_map, mask, gain_map)

 const_data.clear()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read available sequence files to correct.
 mapped_files, num_seq_files = map_seq_files(
    run_folder, karabo_da, sequences)

 if not len(mapped_files):
    raise IndexError(
        "No sequence files available to correct for the selected sequences and karabo_da.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Processing a total of {num_seq_files} sequence files")
 table = []
 fi = 0
 for kda, sfiles in mapped_files.items():
    for k, f in enumerate(sfiles):
        if k == 0:
            table.append((fi, kda, k, f))
        else:
            table.append((fi, "", k,  f))
        fi += 1
+# remove da which doesnt have data.
+karabo_da = [da for da in karabo_da if da in mapped_files and mapped_files[da]]
+
 md = display(Latex(tabulate.tabulate(
    table, tablefmt='latex',
    headers=["#", "module", "# module", "file"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 if strixel_sensor:
    from cal_tools.jungfrau.jfstrixel import get_strixel_parameters, to_strixel
    strx_params = get_strixel_parameters(strixel_sensor)

    strixel_shape = strx_params["frame_shape"]
    Ydouble = strx_params.get("ydouble", slice(None))
    Xdouble = strx_params.get("xdouble", slice(None))

    print('Strixel sensor transformation enabled')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Correct a chunk of images for offset and gain
 def correct_train(wid, index, d):
    d = d.astype(np.float32)  # [cells, x, y]
    g = gain[index]

    # Copy gain over first to keep it at the original 3 for low gain.
    if strixel_sensor:
        to_strixel(g, out=gain_corr[index, ...], kind=strixel_sensor)
    else:
        gain_corr[index, ...] = g

    # Jungfrau gains 0[00], 1[01], 3[11]
    # Change low gain to 2 for indexing purposes.
    g[g==3] = 2

    # A fix for a module hardware problem (e.g. Jungfrau_M302)
    # of chip being stuck on the wrong gain bit.
    if (
        wrong_gain_pixels[0] > -1 and
        wrong_gain_pixels[0] == int(local_karabo_da[-2:])
    ):
        x1 = wrong_gain_pixels[1]
        x2 = wrong_gain_pixels[2]
        y1 = wrong_gain_pixels[3]
        y2 = wrong_gain_pixels[4]
        g[:, x1:x2, y1:y2] = replace_wrong_gain_value

    # Select memory cells
    if memory_cells > 1:
        """
        Even though it is correct to assume that memory cells pattern
        can be the same across all trains (for one correction run
        taken with one acquisition), it is preferred to not assume
        this to account for exceptions that can happen.
        """
        m = memcells[index].copy()
        # 255 is a cell value pointing to no cell image data (image of 0 pixels).
        # Corresponding image will be corrected with constant of cell 0. To avoid values of 0.
        # This line is depending on not storing the modified memory cells in the corrected data.
        m[m==255] = 0

        offset_map_cell = offset_map[m, ...]  # [16 + empty cell, x, y]
        mask_cell = mask[m, ...]
    else:
        offset_map_cell = offset_map
        mask_cell = mask

    # Offset correction
    offset = np.choose(g, np.moveaxis(offset_map_cell, -1, 0))

    d -= offset

    # Gain correction
    if relative_gain and gain_map is not None:
        if memory_cells > 1:
            gain_map_cell = gain_map[m, ...]
        else:
            gain_map_cell = gain_map
        cal = np.choose(g, np.moveaxis(gain_map_cell, -1, 0))
        d /= cal

    msk = np.choose(g, np.moveaxis(mask_cell, -1, 0))

    if strixel_sensor:
        to_strixel(d, out=data_corr[index, ...], kind=strixel_sensor)
        data_corr[index, :, Ydouble, Xdouble] /= strixel_double_norm
        to_strixel(msk, out=mask_corr[index, ...], kind=strixel_sensor)
    else:
        data_corr[index, ...] = d
        mask_corr[index, ...] = msk
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()

 n_cpus = multiprocessing.cpu_count()
 context = psh.context.ProcessContext(num_workers=n_cpus)
 print(f"Using {n_cpus} workers for correction.")
 ```

 %% Cell type:code id: tags:

 ``` python
 def save_reduced_rois(ofile, data_corr, mask_corr, karabo_da):
    """If ROIs are defined for this karabo_da, reduce them and save to the output file"""
    rois_defined = 0
    module_no = int(karabo_da[-2:])
    params_source = f'{karabo_id}/ROIPROC/{karabo_da}'
    rois_source = f'{params_source}:output'
    if roi_definitions != [-1]:
        # Create Instrument and Control sections to later add datasets.
        outp_source = ofile.create_instrument_source(rois_source)
        ctrl_source = ofile.create_control_source(params_source)
    for i in range(len(roi_definitions) // 6):
        roi_module, a1, a2, b1, b2, mean_axis = roi_definitions[i*6 : (i+1)*6]
        if roi_module == module_no:
            rois_defined += 1
            # Set pixels below the threshold to 0 (but still used in the averaging)
            roi_data = data_corr[..., a1:a2, b1:b2]
            if roi_threshold > -1:
                roi_data = roi_data * (roi_data > roi_threshold)
            # Apply the mask and average remaining pixels to 1D
            roi_data = roi_data.mean(
                axis=mean_axis, where=(mask_corr[..., a1:a2, b1:b2] == 0)
            )

            # Add roi corrected datasets
            outp_source.create_key(f'data.roi{rois_defined}.data', data=roi_data)

            # Add roi run control datasets.
            ctrl_source.create_run_key(f'roi{rois_defined}.region', np.array([[a1, a2, b1, b2]]))
            ctrl_source.create_run_key(f'roi{rois_defined}.reduce_axis', np.array([mean_axis]))
            ctrl_source.create_run_key(f'roi{rois_defined}.threshold', np.array([roi_threshold], dtype=np.float32))

    if rois_defined:
        # Copy the index for the new source
        # Create count/first datasets at INDEX source.
        ofile.copy(f'INDEX/{karabo_id}/DET/{karabo_da}:daqOutput/data',
                   f'INDEX/{rois_source}/data')
        ntrains = ofile['INDEX/trainId'].shape[0]
        ctrl_source.create_index(ntrains)
 ```

 %% Cell type:markdown id: tags:

 ### Correcting RAW data ###

 %% Cell type:code id: tags:

 ``` python
 # Loop over modules
 empty_seq = 0
 corrected_files = []
 for local_karabo_da, mapped_files_module in mapped_files.items():
    instrument_src_kda = instrument_src.format(int(local_karabo_da[-2:]))

    for sequence_file in mapped_files_module:
        # Save corrected data in an output file with name
        # of corresponding raw sequence file.
        ofile_name = sequence_file.name.replace("RAW", "CORR")
        out_file = out_folder / ofile_name
        corrected_files.append(ofile_name)

        # Load sequence file data collection, data.adc keydata,
        # the shape for data to later created arrays of the same shape,
        # and number of available trains to correct.
        seq_dc = H5File(sequence_file)
        seq_dc_adc = seq_dc[instrument_src_kda, "data.adc"]
        ishape = seq_dc_adc.shape  # input shape.
        corr_ntrains = ishape[0]  # number of available trains to correct.
        all_train_ids = seq_dc_adc.train_ids

        # Raise a WARNING if this sequence has no trains to correct.
        # Otherwise, print number of trains with no data.
        if corr_ntrains == 0:
            warning(f"No trains to correct for {sequence_file.name}: "
                 "Skipping the processing of this file.")
            empty_seq += 1
            continue
        elif len(all_train_ids) != corr_ntrains:
            print(f"{sequence_file.name} has {len(seq_dc_adc.train_ids) - corr_ntrains} "
                  "trains with missing data.")

        # For testing, limit corrected trains. i.e. Getting output faster.
        if limit_trains > 0:
            print(f"\nCorrected trains are limited to: {limit_trains} trains")
            corr_ntrains = min(corr_ntrains, limit_trains)

        print(f"\nNumber of corrected trains are: {corr_ntrains} for {ofile_name}")

        # Load constants from the constants dictionary.
        # These arrays are used by `correct_train()` function
        offset_map, mask, gain_map = constants[local_karabo_da]

        # Determine total output shape.
        if strixel_sensor:
            oshape = (*ishape[:-2], *strixel_shape)
        else:
            oshape = ishape

        # Allocate shared arrays for corrected data. Used in `correct_train()`
        data_corr = context.alloc(shape=oshape, dtype=np.float32)
        gain_corr = context.alloc(shape=oshape, dtype=np.uint8)
        mask_corr = context.alloc(shape=oshape, dtype=np.uint32)

        step_timer.start()
        # Overwrite seq_dc after eliminating empty trains or/and applying limited images.
        seq_dc = seq_dc.select(
            instrument_src_kda, "*", require_all=True).select_trains(np.s_[:corr_ntrains])

        # Load raw images(adc), gain, memcells, and frame numbers.
        data = seq_dc[instrument_src_kda, "data.adc"].ndarray()
        gain = seq_dc[instrument_src_kda, "data.gain"].ndarray()
        memcells = seq_dc[instrument_src_kda, "data.memoryCell"].ndarray()
        frame_number = seq_dc[instrument_src_kda, "data.frameNumber"].ndarray()

        # Validate that the selected cell id to preview is available in raw data.
        if memory_cells > 1:
            # For plotting, assuming that memory cells are sorted the same for all trains.
            found_cells = memcells[0] == cell_id_preview
            if any(found_cells):
                cell_idx_preview = np.where(found_cells)[0][0]
            else:
                print(f"The selected cell_id_preview {cell_id_preview} is not available in burst mode. "
                      f"Previewing cell `{memcells[0]}`.")
                cell_idx_preview = 0
        else:
            cell_idx_preview = 0

        # Correct data per train
        context.map(correct_train, data)
        step_timer.done_step(f"Correction time.")

        step_timer.start()

        # Create CORR files and add corrected data sections.
        image_counts = seq_dc[instrument_src_kda, "data.adc"].data_counts(labelled=False)

        seq_file = seq_dc.files[0]  # FileAccess
        with DataFile(out_file, 'w') as outp_file:
            # Create INDEX datasets.
            outp_file.create_index(seq_dc.train_ids, from_file=seq_file)

            # Create Instrument section to later add corrected datasets.
            outp_source = outp_file.create_instrument_source(instrument_src_kda)

            # Create count/first datasets at INDEX source.
            outp_source.create_index(data=image_counts)

            # RAW memoryCell and frameNumber are not corrected. But we are storing only
            # the values for the corrected trains.
            outp_source.create_key(
                "data.memoryCell", data=memcells,
                chunks=(min(chunks_ids, memcells.shape[0]), 1))
            outp_source.create_key(
                "data.frameNumber", data=frame_number,
                chunks=(min(chunks_ids, frame_number.shape[0]), 1))
            # Add main corrected `data.adc`` dataset and store corrected data.
            outp_source.create_key(
                "data.adc", data=data_corr,
                chunks=(min(chunks_data, data_corr.shape[0]), *oshape[1:]))
            outp_source.create_compressed_key(
                "data.gain", data=gain_corr)
            outp_source.create_compressed_key(
                "data.mask", data=mask_corr)

            # Temporary hotfix for FXE assuming this dataset is in corrected files.
            outp_source.create_key(
                "data.trainId", data=seq_dc.train_ids,
                chunks=(min(50, len(seq_dc.train_ids))))

            save_reduced_rois(outp_file, data_corr, mask_corr, local_karabo_da)

            # Create METDATA datasets
            outp_file.create_metadata(
                like=seq_dc,
                sequence=seq_file.sequence,
            )

        step_timer.done_step(f'Saving data time.')
 if empty_seq == sum([len(i) for i in mapped_files.values()]):
    warning("No valid trains for RAW data to correct.")
    sys.exit(0)
 ```

 %% Cell type:markdown id: tags:

 ### Processing time summary ###

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print('Skipping plots')
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 _, geom = init_jungfrau_geom(karabo_id=karabo_id, karabo_da=karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 first_seq = 0 if sequences == [-1] else sequences[0]

 seq_corrected_files = [
    out_folder / f for f in fnmatch.filter(corrected_files, f"*{run}*S{first_seq:05d}*")
 ]

+# TODO: replace with CALCAT value.
+if "1M" in karabo_id:
+    nmods = 2
+elif "4M" in karabo_id:
+    nmods = 8
+else:  # 500K
+    nmods = 1
+
 with DataCollection.from_paths(seq_corrected_files) as corr_dc:
    # Reading CORR data for plotting.
    jf_corr = components.JUNGFRAU(
        corr_dc,
        detector_name=karabo_id,
+        n_modules=nmods,
    ).select_trains(np.s_[:plot_trains])
    tid, jf_corr_data = next(iter(jf_corr.trains(require_all=True)))

 # Shape = [modules, trains, cells, x, y]
 corrected = jf_corr.get_array("data.adc")[:, :, cell_idx_preview, ...]
 corrected_train = jf_corr_data["data.adc"][:, cell_idx_preview, ...]  # loose the train axis.

 mask = jf_corr.get_array("data.mask")[:, :, cell_idx_preview, ...]
 mask_train = jf_corr_data["data.mask"][:, cell_idx_preview, ...]

 with RunDirectory(f"{in_folder}/r{run:04d}/", f"*S{first_seq:05d}*", _use_voview=False) as raw_dc:
    # Reading RAW data for plotting.
    jf_raw = components.JUNGFRAU(
-        raw_dc, detector_name=karabo_id
-        ).select_trains(np.s_[:plot_trains])
+        raw_dc,
+        detector_name=karabo_id,
+        n_modules=nmods,
+    ).select_trains(np.s_[:plot_trains])

 raw = jf_raw.get_array("data.adc")[:, :, cell_idx_preview, ...]

 gain = jf_raw.get_array("data.gain")[:, :, cell_idx_preview, ...]
 gain_train_cells = (
    jf_raw.select_trains(by_id[[tid]]).get_array("data.gain")[:, :, :, ...]
 )
 step_timer.done_step("Prepared data for plotting")
 ```

 %% Cell type:markdown id: tags:

 ### Mean RAW Preview

 %% Cell type:code id: tags:

 ``` python
 print(f"The per pixel mean of the first {raw.shape[1]} trains of the first sequence file")

 fig, ax = plt.subplots(figsize=(18, 10))
 raw_mean = raw.mean(skipna=True, axis=1)
 vmin, vmax = np.nanpercentile(raw_mean, [5, 95])
 geom.plot_data_fast(
    raw_mean,
    ax=ax,
    vmin=vmin,
    vmax=vmax,
    colorbar={'shrink': 1, 'pad': 0.01},
 )
 ax.set_title(f'{karabo_id} - Mean RAW', size=18)
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ### Mean CORRECTED Preview

 %% Cell type:code id: tags:

 ``` python
 print(f"The per pixel mean of the first {corrected.shape[1]} trains of the first sequence file")

 fig, ax = plt.subplots(figsize=(18, 10))
 corrected_mean = corrected.mean(skipna=True, axis=1, keep_attrs=True)
 vmin, vmax = np.nanpercentile(corrected_mean, [5, 95])

 mean_plot_kwargs = dict(vmin=vmin, vmax=vmax)

 if strixel_sensor:
    if strixel_sensor == "A1256":
        aspect = 1/3
    else:  # A0123
        aspect = 10

 if not strixel_sensor:
    geom.plot_data_fast(
        corrected_mean,
        ax=ax,
        colorbar={'shrink': 1, 'pad': 0.01},
        **mean_plot_kwargs
    )
 else:
    corr = ax.imshow(corrected_mean.squeeze(), aspect=aspect, **mean_plot_kwargs)
    plt.colorbar(corr)

 ax.set_title(f'{karabo_id} - Mean CORRECTED', size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(figsize=(18, 10))
 corrected_masked = corrected.copy()
 corrected_masked.where(mask != 0, np.nan)
 corrected_masked_mean = corrected_masked.mean(skipna=True, axis=1)
 del corrected_masked

 if not strixel_sensor:
    geom.plot_data_fast(
        corrected_masked_mean,
        ax=ax,
        colorbar={'shrink': 1, 'pad': 0.01},
        **mean_plot_kwargs
    )
 else:
    corr = ax.imshow(corrected_masked_mean.squeeze(), aspect=aspect, **mean_plot_kwargs)
    plt.colorbar(corr)

 ax.set_title(f'{karabo_id} - Mean CORRECTED with mask', size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown((f"#### A single image from train {tid}")))

 fig, ax = plt.subplots(figsize=(18, 10))
 vmin, vmax = np.nanpercentile(corrected_train, [5, 95])

 single_plot_kwargs = dict(
    vmin=vmin,
    vmax=vmax,
 )

 if not strixel_sensor:
    geom.plot_data_fast(
        corrected_train,
        ax=ax,
        colorbar={"shrink": 1, "pad": 0.01},
        **single_plot_kwargs
    )
 else:
    corr = ax.imshow(corrected_train.squeeze(), aspect=aspect, **single_plot_kwargs)
    plt.colorbar(corr)

 ax.set_title(f"{karabo_id} - CORRECTED train: {tid}", size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis, title):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [
        np.min(edges[1]),
        np.max(edges[1]),
        np.min(edges[0]),
        np.max(edges[0]),
    ]

    im = ax.imshow(
        data[::-1, :],
        extent=extent,
        aspect="auto",
        norm=LogNorm(vmin=1, vmax=np.max(data))
    )
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_title(title)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:markdown id: tags:

 ### Gain Bit Value

 %% Cell type:code id: tags:

 ``` python
 for i, mod in enumerate(karabo_da):
    pdu = da_to_pdu[mod]
    h, ex, ey = np.histogram2d(
        raw[i].values.flatten(),
        gain[i].values.flatten(),
        bins=[100, 4],
        range=[[0, 10000], [0, 4]],
    )
    do_2d_plot(
        h,
        (ex, ey),
        "Signal (ADU)",
        "Gain Bit Value (high gain=0[00], medium gain=1[01], low gain=3[11])",
        f"Module {mod} ({pdu})",
    )
 ```

 %% Cell type:markdown id: tags:

 ## Signal Distribution ##

 %% Cell type:code id: tags:

 ``` python
 for i, mod in enumerate(karabo_da):
    pdu = da_to_pdu[mod]
    fig, axs = plt.subplots(nrows=2, ncols=1, figsize=(18, 10))
    corrected_flatten = corrected[i].values.flatten()
    for ax, hist_range in zip(axs, [(-100, 1000), (-1000, 10000)]):
        h = ax.hist(
            corrected_flatten,
            bins=1000,
            range=hist_range,
            log=True,
        )
        l = ax.set_xlabel("Signal (keV)")
        l = ax.set_ylabel("Counts")
        _ = ax.set_title(f'Module {mod} ({pdu})')
 ```

 %% Cell type:markdown id: tags:

 ### Maximum GAIN Preview

 %% Cell type:code id: tags:

 ``` python
 display(Markdown((f"#### The per pixel maximum of train {tid} of the GAIN data")))

 fig, ax = plt.subplots(figsize=(18, 10))
 gain_max = gain_train_cells.max(skipna=True, axis=(1, 2))

 geom.plot_data_fast(
    gain_max,
    ax=ax,
    colorbar={'shrink': 1, 'pad': 0.01},
 )
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 badpixels = [
        BadPixels.OFFSET_OUT_OF_THRESHOLD,
        BadPixels.NOISE_OUT_OF_THRESHOLD,
        BadPixels.OFFSET_NOISE_EVAL_ERROR,
        BadPixels.NO_DARK_DATA,
        BadPixels.WRONG_GAIN_VALUE,
    ]
 for item in badpixels:
    table.append(
        (item.name, f"{item.value:016b}"))
 md = display(Latex(tabulate.tabulate(
    table, tablefmt='latex',
    headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:markdown id: tags:

 ### Single Image Bad Pixels ###

 A single image bad pixel map for the first image of the first train

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f"#### Bad pixels image for train {tid}"))

 fig, ax = plt.subplots(figsize=(18, 10))
 vmin, vmax = (0, sorted([bp.value for bp in badpixels])[-2])
 if not strixel_sensor:
    geom.plot_data_fast(
        np.log2(mask_train),
        ax=ax,
        vmin=vmin, vmax=vmax,
        colorbar={'shrink': 1, 'pad': 0.01},
    )
 else:
    mask = ax.imshow(
        mask_train.squeeze(), vmin=vmin, vmax=vmax, aspect=aspect)
    plt.colorbar(mask)

 plt.show()
 ```