Compare revisions

Karim Ahmed · Karim Ahmed · Karim Ahmed · Thomas Kluyver · Thomas Kluyver · Thomas Kluyver
--- a/.coveragerc
+++ b/.coveragerc
+[paths]
+source=
+    src/
+    */lib/python*/site-packages/
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -46,12 +46,13 @@ pytest:
  script:
    - export LANG=C  # Hopefully detect anything relying on locale
    - python3 -m pip install ".[test]"
-    - python3 -m pytest --color yes --verbose --cov=cal_tools --cov=xfel_calibrate
-  #  Nope... https://docs.gitlab.com/12.10/ee/user/project/merge_requests/test_coverage_visualization.html#enabling-the-feature
-  #    - coverage xml
-  #  artifacts:
-  #    reports:
-  #      cobertura: coverage.xml
+    - python3 -m pytest --color yes --verbose --cov=cal_tools --cov=xfel_calibrate --cov-report html:htmlcov --cov-report term
+  coverage: '/TOTAL.*? (\d+(?:\.\d+)?\%)$/'
+  artifacts:
+    expose_as: 'Coverage report'
+    name: htmlcov
+    paths:
+      - htmlcov/

 cython-editable-install-test:
  stage: test

--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -6,6 +6,7 @@ repos:
    rev: 0.13.0
    hooks:
    - id: nbqa-check-ast
+      args: ["--nbqa-dont-skip-bad-cells"]
  - repo: https://github.com/pycqa/isort
    rev: 5.13.2
    hooks:

--- a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
+++ b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
 %% Cell type:markdown id: tags:

 # AGIPD Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/MID/202201/p002834/raw" # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/esobolev/pycal_litfrm/p002834/r0225"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 overwrite = False  # IGNORED, NEEDED FOR COMPATIBILITY.
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 train_ids = [-1] # train IDs to correct, set to -1 for all, range allowed
 run = 225 # runs to process, required

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_template = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 index_source_template = 'INDEX/{}/DET/{}:xtdf/'  # path in the HDF5 file to images
 ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information
 karabo_id_control = "MID_EXP_AGIPD1M1" # karabo-id for control device

 slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants, loaded in precorrection notebook

 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
 cal_db_interface = "tcp://max-exfl-cal001:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milliseconds
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
 cal_db_root = ""  # The calibration database root path to access constant files. e.g. accessing constants from the test database /gpfs/exfel/d/cal_tst/caldb_store.

 mem_cells = -1  # Number of memory cells used, set to 0 to automatically infer
 bias_voltage = -1  # bias voltage, set to 0 to use stored value in slow data.
 acq_rate = -1. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)
 max_pulses = [0, 352, 1] # range list [st, end, step] of memory cell indices to be processed within a train. 3 allowed maximum list input elements.
 mem_cells_db = -1  # set to a value different than 0 to use this value for DB queries
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction parameters
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
 cm_dark_fraction = 0.66 # threshold for fraction of  empty pixels to consider module enough dark to perform CM correction
 cm_dark_range = [-50.,30] # range for signal value ADU for pixel to be consider as a dark pixel
 cm_n_itr = 4 # number of iterations for common mode correction
 hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
 mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
 noisy_adc_threshold = 0.25 # threshold to mask complete adc
 ff_gain = 7.2 # conversion gain for absolute FlatField constants, while applying xray_gain
 photon_energy = -1.0 # photon energy in keV, non-positive value for XGM autodetection
 rounding_threshold = 0.5 # the fraction to round to down, 0.5 for standard rounding rule
 cs_mg_adjust = 7e3  # Value to adjust medium gain when correcting with current source. This is used when `adjust_mg_baseline` is True.

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 # TODO: Remove this boolean parameter an replace rel_gain_mode with it.
 rel_gain = False
 rel_gain_mode = "off"  # Select relative gain correction. Choices [`PC`, `CS`, `off`]. (`PC`: Pulse Capacitor, `CS`: Current Source, `off`: Disable relative gain correction). Default: off.
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 zero_nans = False # set NaN values in corrected data to 0
 zero_orange = False # set to 0 very negative and very large values in corrected data
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
 corr_asic_diag = False # if set, diagonal drop offs on ASICs are corrected
 force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold
 force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold
 mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
 common_mode = False # Common mode correction
 melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
 mask_zero_std = False # Mask pixels with zero standard deviation across train
 low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain
 round_photons = False  # Round to absolute number of photons, only use with gain corrections

 # Additional processing
 count_lit_pixels = False # Count the number of pixels registering photons
 spi_hitfinding = False  # Find hits using lit-pixel counter

 # Optional auxiliary devices
 use_ppu_device = ''  # Device ID for a pulse picker device to only process picked trains, empty string to disable
 ppu_train_offset = 0  # When using the pulse picker, offset between the PPU's sequence start and actually picked train
 require_ppu_trigger = False  # Optional protection against running without PPU or without triggering trains.

 use_litframe_finder = 'off' # Process only illuminated frames: 'off' - disable, 'device' - use online device data, 'offline' - use offline algorithm, 'auto' - choose online/offline source automatically (default)
 litframe_device_id = '' # Device ID for a lit frame finder device, empty string to auto detection
 energy_threshold = -1000 # The low limit for the energy (uJ) exposed by frames subject to processing. If -1000, selection by pulse energy is disabled
 use_super_selection = 'cm' # Make a common selection for entire run: 'off' - disable, 'final' - enable for final selection, 'cm' - enable only for common mode correction

 use_xgm_device = ''  # DoocsXGM device ID to obtain actual photon energy, operating condition else.

 # Output parameters
 recast_image_data = ''  # Cast data to a different dtype before saving
 compress_fields = ['gain', 'mask']  # Datasets in image group to compress.

 # SPI hit-finder parameters
 spi_hf_modules = [3, 4, 8, 15]  # Use specified modules for SPI hitfinding
 spi_hf_mode = "adaptive"  # The method to compute threshold for hitscores in SPI hitfinding: `fixed` or `adaptive`
 spi_hf_snr = 4.0  # Siginal-to-noise ration for adaptive threshold in SPI hitfinding
 spi_hf_min_scores = 100  # The minimal size of events to compute adaptive threshold in SPI hitfinding
 spi_hf_fixed_threshold = 0  # The fixed threshold value
 spi_hf_hitrate_window_size = 200  # The window size for runnig average of hitrate in trains
 spi_hf_xgm_norm = False  # Use XGM pulse energy for hitscore normalization
 spi_hf_miss_fraction = 1  # The fraction of misses to select along with hits
 spi_hf_miss_fraction_base = "hit"  # The base to compute the number of misses to select: the number of hits (`hit`) or misses (`miss`)

 # Plotting parameters
 skip_plots = False # exit after writing corrected files and metadata
 cell_id_preview = 1 # cell Id used for preview in single-shot plots
 cmap = "viridis"  # matplolib.colormap for almost all heatmap. Other options ['plasma', 'inferno', 'magma', 'cividis', 'jet', ...]

 # Parallelization parameters
 chunk_size = 1000  # Size of chunk for image-wise correction
 n_cores_correct = 16 # Number of chunks to be processed in parallel
 n_cores_files = 4 # Number of files to be processed in parallel
 sequences_per_node = 2 # number of sequence files per cluster node if run as SLURM job, set to 0 to not run SLURM parallel
 max_nodes = 8 # Maximum number of SLURM jobs to split correction work into
 max_tasks_per_worker = 1  # the number of tasks a correction pool worker process can complete before it will exit and be replaced with a fresh worker process. Leave as -1 to keep worker alive as long as pool.


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import math
 import multiprocessing
 import os
 import sys
 import warnings
 from datetime import timedelta
 from logging import warning
 from pathlib import Path

 import tabulate
 from dateutil import parser
 from IPython.display import Latex, Markdown, display

 warnings.filterwarnings('ignore')
 import h5py
 import matplotlib
 import matplotlib.pyplot as plt
 import yaml
 from extra_data import by_id, RunDirectory, stack_detector_data
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from matplotlib.colors import LogNorm

 matplotlib.use("agg")
 %matplotlib inline
 import numpy as np
 import seaborn as sns

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")

 import cal_tools.restful_config as rest_cfg
 from cal_tools import agipdalgs as calgs
 from cal_tools.agipdlib import (
    AgipdCorrections,
    AgipdCtrl,
    CellRange,
    LitFrameSelection,
 )
 from cal_tools.ana_tools import get_range
 from cal_tools.calcat_interface import (
    AGIPD_CalibrationData,
    CalCatError,
 )
 from cal_tools.enums import AgipdGainMode, BadPixels
 from cal_tools.plotting import agipd_single_module_geometry
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    calcat_creation_time,
    latex_warning,
    map_modules_from_folder,
    module_index_to_qm,
    write_constants_fragment,
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_folder = in_folder / f'r{run:04d}'

 step_timer = StepTimer()
 ```

 %% Cell type:markdown id: tags:

 ## Evaluated parameters ##

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the hierarchy and dependability for correction booleans are defined
 corr_bools = {}

 cs_corr = False
 pc_corr = False
 if rel_gain_mode.lower() == "off":
    # TODO: Remove this part after replacing rel_gain with rel_gain_mode
    if rel_gain:
        pc_corr = True

 elif rel_gain_mode.lower() == "cs":
    cs_corr = True

 elif rel_gain_mode.lower() == "pc":
    pc_corr = True

 else:
    raise ValueError(
        "Selected `rel_gain_mode` is unexpected. "
        "Please select between CS or PC.")

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["cs_corr"] = cs_corr
    corr_bools["pc_corr"] = pc_corr
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["zero_nans"] = zero_nans
    corr_bools["zero_orange"] = zero_orange
    corr_bools["mask_noisy_adc"] = mask_noisy_adc
    corr_bools["force_hg_if_below"] = force_hg_if_below
    corr_bools["force_mg_if_below"] = force_mg_if_below
    corr_bools["common_mode"] = common_mode
    corr_bools["melt_snow"] = melt_snow
    corr_bools["mask_zero_std"] = mask_zero_std
    corr_bools["low_medium_gap"] = low_medium_gap
    corr_bools["round_photons"] = round_photons
    corr_bools["count_lit_pixels"] = count_lit_pixels

 # Many corrections don't apply to fixed gain mode; will explicitly disable later if detected
 disable_for_fixed_gain = [
    "adjust_mg_baseline",
    "blc_set_min",
    "force_hg_if_below",
    "force_mg_if_below",
    "low_medium_gap",
    "melt_snow",
    "pc_corr",
    "cs_corr",
 ]
 ```

 %% Cell type:code id: tags:

 ``` python
 if sequences == [-1]:
    sequences = None

 dc = RunDirectory(run_folder)

 ctrl_src = ctrl_source_template.format(karabo_id_control)
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 index_src = index_source_template.format(karabo_id, receiver_template)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create output folder
 out_folder.mkdir(parents=True, exist_ok=True)

 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if "AGIPD1M" in karabo_id:
    nmods = 16
 elif "AGIPD500K" in karabo_id:
    nmods = 8
 else:
    nmods = 1

 # Evaluate requested modules
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    mod_indices = modules if nmods > 1 else [0]
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:  # TODO: fix this with the new CALCAT metadata for module indices.
    modules = [int(x[-2:]) for x in karabo_da]
    mod_indices = modules if nmods > 1 else [0]

 print("Process modules:", ', '.join(module_index_to_qm(x) for x in mod_indices))
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 ```

 %% Cell type:code id: tags:

 ``` python
 train_available = False

 for m in modules:
    try:
        # Attempt to select the module. If no trains are available, ValueError might be raised
        if len(dc[instrument_src.format(m), 'image.data'].drop_empty_trains().train_ids) > 0:
            train_available = True
            break  # Found a module with available trains.
    except ValueError as e:
        warning(f"Missing module {m} from data: {e}")

 if not train_available:
    # Execute this block if no modules with trains were found.
    latex_warning("No trains available to correct for selected modules.")
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_ppu_device and use_ppu_device in dc.control_sources:
    # Obtain trains to process if using a pulse picker device and it's present.

    seq_start = dc[use_ppu_device, 'trainTrigger.sequenceStart.value'].ndarray()

    # The trains picked are the unique values of trainTrigger.sequenceStart
    # minus the first (previous trigger before this run).
    start_train_ids = np.unique(seq_start)[1:] + ppu_train_offset

    train_ids = []
    for train_id in start_train_ids:
        n_trains = dc[
            use_ppu_device, 'trainTrigger.numberOfTrains'
        ].select_trains(by_id[[train_id]]).ndarray()[0]
        train_ids.extend(list(range(train_id, train_id + n_trains)))

    if train_ids:
        print(f'PPU device {use_ppu_device} triggered for {len(train_ids)} train(s)')
    elif require_ppu_trigger:
        raise RuntimeError(f'PPU device {use_ppu_device} not triggered but required, aborting!')
    else:
        print(f'PPU device {use_ppu_device} not triggered, processing all valid trains')
        train_ids = None

 elif use_ppu_device:
    # PPU configured but not present.

    if require_ppu_trigger:
        raise RuntimeError(f'PPU device {use_ppu_device} required but not found, aborting!')
    else:
        print(f'PPU device {use_ppu_device} configured but not found, processing all valid trains')
        train_ids = None

 elif train_ids != [-1]:
    # Specific trains passed by parameter, convert to ndarray.
    train_ids = np.array(train_ids)

    print(f'Processing up to {len(train_ids)} manually selected train(s)')

 else:
    # No PPU configured.
    print(f'Processing all valid trains')
    train_ids = None
 ```

 %% Cell type:code id: tags:

 ``` python
 first_mod_channel = sorted(modules)[0]
 instrument_src_first_mod = [
    s for s in list(dc.all_sources) if f"{first_mod_channel}CH" in s][0]

 agipd_cond = AgipdCtrl(
    run_dc=dc,
    image_src=instrument_src_first_mod,
    ctrl_src=ctrl_src,
    raise_error=False,  # to be able to process very old data without gain_setting value
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 offset = parser.parse(creation_date_offset)
 delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
 creation_time += delta
 print(f"Creation time: {creation_time}")

 if acq_rate == -1.:
    acq_rate = agipd_cond.get_acq_rate()
 if mem_cells == -1:
    mem_cells = agipd_cond.get_num_cells()
 # TODO: look for alternative for passing creation_time
 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == -1:
    for m in modules:
        bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control, module=m)
        # Accept non-zero value for a bias voltage from any module.
        if bias_voltage != 0.:
            break
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()
 if gain_mode == -1:
    gain_mode = agipd_cond.get_gain_mode()
 else:
    gain_mode = AgipdGainMode(gain_mode)
 ```

 %% Cell type:code id: tags:

 ``` python
 mem_cells_db = mem_cells if mem_cells_db == -1 else mem_cells_db

 print(f"Maximum memory cells to calibrate: {mem_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Using {creation_time} as creation time")
 print("Operating conditions are:")
 print(f"• Bias voltage: {bias_voltage}")
 print(f"• Memory cells: {mem_cells_db}")
 print(f"• Acquisition rate: {acq_rate}")
 print(f"• Gain setting: {gain_setting}")
 print(f"• Gain mode: {gain_mode.name}")
 print(f"• Integration time: {integration_time}")
 print(f"• Photon Energy: 9.2")
 ```

 %% Cell type:code id: tags:

 ``` python
 if gain_mode:
    for to_disable in disable_for_fixed_gain:
        if corr_bools.get(to_disable, False):
            warning(f"{to_disable} correction was requested, but does not apply to fixed gain mode")
            corr_bools[to_disable] = False
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_litframe_finder != 'off':
    from extra_redu import make_litframe_finder, LitFrameFinderError

    if use_litframe_finder not in ['auto', 'offline', 'online']:
        raise ValueError("Unexpected value in 'use_litframe_finder'.")

    inst = karabo_id_control[:3]
    litfrm = make_litframe_finder(inst, dc, litframe_device_id)
    try:
        get_data = {'auto': litfrm.read_or_process, 'offline': litfrm.process, 'online': litfrm.read}
        r = get_data[use_litframe_finder]()
        cell_sel = LitFrameSelection(r, train_ids, max_pulses, energy_threshold, use_super_selection)
        print(f"{cell_sel.msg()}\n")
        cell_sel.print_report()
        if np.count_nonzero(r.nLitFrame.value) == 0:  # No lit frames.
            latex_warning(
                "No lit frames identified using AGIPD LitFrameFinder."
                " Offline correction will not be performed.")
            sys.exit(0)
    except LitFrameFinderError as err:
        print(cell_sel.msg())
        warning(f"Cannot use AgipdLitFrameFinder due to:\n{err}")
        cell_sel = CellRange(max_pulses, max_cells=mem_cells)
 else:
    # Use range selection
    cell_sel = CellRange(max_pulses, max_cells=mem_cells)
    print(cell_sel.msg())
 ```

 %% Cell type:code id: tags:

 ``` python
 photon_energy_warn = None
 if photon_energy <= 0.0:
    if use_xgm_device:
        # Try to obtain photon energy from XGM device.
        wavelength_data = dc[use_xgm_device, 'pulseEnergy.wavelengthUsed']

        try:
            from scipy.constants import h, c, e

            # Read wavelength as a single value and convert to hv.
            photon_energy = (h * c / e) / (wavelength_data.as_single_value(rtol=1e-2) * 1e-6)
            print(f'Obtained photon energy {photon_energy:.3f} keV from {use_xgm_device}')
        except ValueError:
            photon_energy_warn = 'XGM source available but photon energy varies greater than 1%'
    else:
        photon_energy_warn = 'Neither explicit photon energy nor XGM device configured'

 rounding_threshold_warn = None
 if rounding_threshold <= .0 or 1. <= rounding_threshold:
    rounding_threshold_warn = 'Round threshold is out of (0, 1) range. Use standard 0.5 value.'
    rounding_threshold = 0.5

 if round_photons:
    if photon_energy_warn:
        warning(photon_energy_warn + ', photon rounding disabled!')
        round_photons = False
    else:
        print(f'Photon energy for rounding: {photon_energy:.3f} keV')
        if rounding_threshold_warn:
            warning(rounding_threshold_warn)
 ```

 %% Cell type:code id: tags:

 ``` python
 if count_lit_pixels:
    if round_photons:
        data_units = 'photon'
        litpx_threshold = 1.
    else:
        data_units = 'keV'
        if photon_energy_warn:
            warning(photon_energy_warn + '. Use 12 keV for lit-pixel counter threshold')
            litpx_threshold = 12.
        else:
            litpx_threshold = photon_energy

        if rounding_threshold_warn:
            warning(rounding_threshold_warn)

    if not xray_gain:
        # convert photon energy to ADU (for AGIPD approx. 1 keV = 7 ADU)
        # it looks that rounding to photons can be applied to data in ADU as well
        litpx_threshold *= 7.
        data_units = 'ADU'

    litpx_threshold *= rounding_threshold
    print(f"Count lit-pixels with signal above {litpx_threshold:.3g} {data_units}")
 else:
    # dummy value, that is not expected to be used
    litpx_threshold = 42
 ```

 %% Cell type:code id: tags:

 ``` python
 agipd_corr = AgipdCorrections(
    mem_cells,
    cell_sel,
    h5_data_path=instrument_src,
    h5_index_path=index_src,
    corr_bools=corr_bools,
    gain_mode=gain_mode,
    comp_threads=os.cpu_count() // n_cores_files,
    train_ids=train_ids
 )

 agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
 agipd_corr.hg_hard_threshold = hg_hard_threshold
 agipd_corr.mg_hard_threshold = mg_hard_threshold

 agipd_corr.cm_dark_min = cm_dark_range[0]
 agipd_corr.cm_dark_max = cm_dark_range[1]
 agipd_corr.cm_dark_fraction = cm_dark_fraction
 agipd_corr.cm_n_itr = cm_n_itr
 agipd_corr.noisy_adc_threshold = noisy_adc_threshold
 agipd_corr.ff_gain = ff_gain
 agipd_corr.photon_energy = photon_energy
 agipd_corr.rounding_threshold = rounding_threshold
 agipd_corr.cs_mg_adjust = cs_mg_adjust
 agipd_corr.litpx_threshold = litpx_threshold

 agipd_corr.compress_fields = compress_fields
 if recast_image_data:
    agipd_corr.recast_image_fields['data'] = np.dtype(recast_image_data)
 ```

 %% Cell type:markdown id: tags:

 ## Retrieving constants

 %% Cell type:code id: tags:

 ``` python
 def get_constants_and_update_metadata(cal_data, main_metadata, constants):
    try:
        metadata = cal_data.metadata(constants)
        for key, value in metadata.items():
            main_metadata.setdefault(key, {}).update(value)
    except CalCatError as e:  # TODO: replace when API errors are improved.
        warning(f"CalCatError: {e}")
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 # Instantiate agipd_cal with the read operating conditions.
 agipd_cal = AGIPD_CalibrationData(
    detector_name=karabo_id,
    modules=karabo_da,
    sensor_bias_voltage=bias_voltage,
    memory_cells=mem_cells,
    acquisition_rate=acq_rate,
    integration_time=integration_time,
    source_energy=9.2,
    gain_mode=gain_mode,
    gain_setting=gain_setting,
    event_at=creation_time,
    client=rest_cfg.calibration_client(),
    caldb_root=Path(cal_db_root) if cal_db_root else None,
 )

 # Prepare lists of expected calibrations
 dark_constants = ["Offset", "Noise", "BadPixelsDark"]
 if not gain_mode:  # Adaptive gain
    dark_constants.append("ThresholdsDark")

 agipd_metadata = agipd_cal.metadata(dark_constants)

 agipd_cal.gain_mode = None  # gain_mode is not used for gain constants
 pc_constants, ff_constants, cs_constants = [], [], []

 if agipd_corr.corr_bools.get('xray_corr'):
    ff_constants = list(agipd_cal.illuminated_calibrations)
    get_constants_and_update_metadata(
        agipd_cal, agipd_metadata, ff_constants)

 if any(agipd_corr.relgain_bools):

    if cs_corr:
        # Integration time is not used with CS
        agipd_cal.integration_time = None
        cs_constants = ["SlopesCS", "BadPixelsCS"]
        get_constants_and_update_metadata(
            agipd_cal, agipd_metadata, cs_constants)


    else:  # rel_gain_mode == "pc" or "off"
        pc_constants = ["SlopesPC", "BadPixelsPC"]
        get_constants_and_update_metadata(
            agipd_cal, agipd_metadata, pc_constants)

 step_timer.done_step("Constants were retrieved in")

 relgain_alias = "CS" if cs_corr else "PC"
 print("Preparing constants ("
      f"FF: {agipd_corr.corr_bools.get('xray_corr', False)}, "
      f"{relgain_alias}: {any(agipd_corr.relgain_bools)}, "
      f"BLC: {any(agipd_corr.blc_bools)})")
 # Display retrieved calibration constants timestamps
 agipd_cal.display_markdown_retrieved_constants(metadata=agipd_metadata)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Skip das not available in CALCAT
 def filter_available_das(das):
    available_das = [da for da in das if da in agipd_metadata.keys()]
    if missing_das := set(das) - set(available_das):
        warning(
            f"Skipping correcting {missing_das} as they are "
            "not mapped to this detector in CALCAT.")
    return das

 karabo_da = filter_available_das(karabo_da)

 # Validate constants availability and exclude modules with no offsets.
 for da, calibrations in agipd_metadata.items():
    mod = modules[karabo_da.index(da)]
    # Constants to error out for when missing.
    error_missing_constants = {"Offset"}
    if not gain_mode:
        error_missing_constants |= {"ThresholdsDark"}

    error_missing_constants -= set(calibrations)
    if error_missing_constants:
        warning(f"Offset constant is not available to correct {da}.")
        # Remove module from files to process.
        karabo_da.remove(da)
        modules.remove(mod)

    warn_missing_constants = set(dark_constants + pc_constants + ff_constants + cs_constants)
    warn_missing_constants -= error_missing_constants
    warn_missing_constants -= set(calibrations)
    if warn_missing_constants:
        warning(f"Constants {warn_missing_constants} were not retrieved for {da}.")

 if not karabo_da:  # Offsets are missing for all modules.
    raise Exception("Could not find offset constants for any modules, will not correct data.")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Record constant details in YAML metadata
 write_constants_fragment(
    out_folder=(metadata_folder or out_folder),
    det_metadata=agipd_metadata,
    caldb_root=agipd_cal.caldb_root)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Set everything up filewise
 # This cell needs to be after getting and validating CCVs and
 # after modifying `karabo_da` in case of missing required constants.
 mapped_files, _, total_sequences, _, _ =  map_modules_from_folder(
-    str(in_folder), run, path_template, karabo_da, sequences
+    str(in_folder), run, path_template, karabo_da, sequences, key_name="da" if nmods < 16 else "qm"
 )
 file_list = []

 # ToDo: Split table over pages
 print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
 table = []
 ti = 0
 for k, files in mapped_files.items():
    i = 0
    for f in list(files.queue):
        file_list.append(f)
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["#", "module", "# module", "file"])))
 file_list = sorted(file_list, key=lambda name: name[-10:])
 ```

 %% Cell type:code id: tags:

 ``` python
 # Load calibration constants to RAM
 agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))

 def load_constants(da, module):
    """
    Initialize constants data from previously retrieved metadata.

    Args:
        da (str): Data Aggregator (Karabo DA)
        module (int): Module index

    Returns:
        (int, dict, str): Module index, {constant name: creation time}, Karabo DA
    """
    const_data = dict()
    variant = dict()
    for cname, mdata in agipd_metadata[da].items():
        dataset = mdata["dataset"]
        with h5py.File(agipd_cal.caldb_root / mdata["path"], "r") as cf:  # noqa
            const_data[cname] = np.copy(cf[f"{dataset}/data"])
            variant[cname] = cf[dataset].attrs["variant"] if cf[dataset].attrs.keys() else 0  # noqa
    agipd_corr.init_constants(const_data, module, variant)


 step_timer.start()
 with multiprocessing.Pool(processes=len(modules)) as pool:
    pool.starmap(load_constants, zip(karabo_da, modules))
 step_timer.done_step(f'Constants were loaded in ')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Store timestamps for Offset, SlopesPC/SlopesCS, and SlopesFF
 # in YAML file for time-summary table.
 timestamps = {}

 for mod, mod_mdata in agipd_metadata.items():
    modno = int(mod[-2:])

    module_timestamps = {}

    # Store few time stamps if exists
    # Add NA to keep array structure
    for key in ['Offset', f'Slopes{relgain_alias}', 'SlopesFF']:
        if key in mod_mdata:
            module_timestamps[key] = mod_mdata[key]["begin_validity_at"]
        else:
            module_timestamps[key] = "NA"
-
-    timestamps[module_index_to_qm(modno)] = module_timestamps
+    mod_name = mod if nmods < 16 else module_index_to_qm(modno)
+    timestamps[mod_name] = module_timestamps

 seq = sequences[0] if sequences else 0

 with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fd:
    yaml.safe_dump({"time-summary": {f"S{seq}": timestamps}}, fd)
 ```

 %% Cell type:markdown id: tags:

 ## Data processing ##

 %% Cell type:code id: tags:

 ``` python
 # allocate memory for images and hists
 n_images_max = mem_cells * 256
 data_shape = (n_images_max, 512, 128)
 agipd_corr.allocate_images(data_shape, n_cores_files)
 ```

 %% Cell type:code id: tags:

 ``` python
 def batches(l, batch_size):
    """Group a list into batches of (up to) batch_size elements"""
    start = 0
    while start < len(l):
        yield l[start:start + batch_size]
        start += batch_size
 ```

 %% Cell type:code id: tags:

 ``` python
 def imagewise_chunks(img_counts):
    """Break up the loaded data into chunks of up to chunk_size

    Yields (file data slot, start index, stop index)
    """
    for i_proc, n_img in enumerate(img_counts):
        n_chunks = math.ceil(n_img / chunk_size)
        for i in range(n_chunks):
            yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 all_imgs_counts = []
 if max_tasks_per_worker == -1:
    max_tasks_per_worker = None
 with multiprocessing.Pool(maxtasksperchild=max_tasks_per_worker) as pool:
    step_timer.done_step('Started pool')

    for file_batch in batches(file_list, n_cores_files):
        # TODO: Move some printed output to logging or similar
        print(f"Processing next {len(file_batch)} files")
        step_timer.start()
        img_counts = pool.starmap(
            agipd_corr.read_file,
            zip(range(len(file_batch)), file_batch, [not common_mode]*len(file_batch))
        )
        step_timer.done_step(f'Loading data from files')

        all_imgs_counts += img_counts
        if not np.any(img_counts):
            # Skip any further processing and output if there are no images to
            # correct in this file.
            continue

        if mask_zero_std:
            # Evaluate zero-data-std mask
            pool.starmap(
                agipd_corr.mask_zero_std, itertools.product(
                    range(len(file_batch)),
                    np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
                )
            )
            step_timer.done_step('Mask 0 std')

        # Perform offset image-wise correction
        pool.starmap(agipd_corr.offset_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Offset correction")

        if blc_noise or blc_stripes or blc_hmatch:
            # Perform image-wise correction
            pool.starmap(agipd_corr.baseline_correction, imagewise_chunks(img_counts))
            step_timer.done_step("Base-line shift correction")

        if common_mode:
            # In common mode corrected is enabled.
            # Cell selection is only activated after common mode correction.
            # Perform cross-file correction parallel over asics
            image_files_idx = [i_proc for i_proc, n_img in enumerate(img_counts) if n_img > 0]
            pool.starmap(agipd_corr.cm_correction, itertools.product(
                image_files_idx, range(16)  # 16 ASICs per module
            ))
            step_timer.done_step("Common-mode correction")

            img_counts = pool.map(agipd_corr.apply_selected_pulses, image_files_idx)
            step_timer.done_step("Applying selected cells after common mode correction")

        # Perform image-wise correction"
        pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Gain corrections")

        # Peform additional processing
        if count_lit_pixels:
            pool.starmap(agipd_corr.litpixel_counter, imagewise_chunks(img_counts))
            step_timer.done_step("Lit-pixel counting")

        # Save corrected data
        pool.starmap(agipd_corr.write_file, [
            (i_proc, file_name, str(out_folder / Path(file_name).name.replace("RAW", "CORR")))
            for i_proc, file_name in enumerate(file_batch)
        ])
        step_timer.done_step("Save")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Correction of {len(file_list)} files is finished")
 print(f"Total processing time {step_timer.timespan():.01f} s")
 print(f"Timing summary per batch of {n_cores_files} files:")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print("Skipping plots as configured.")
    sys.exit(0)
 elif not np.any(all_imgs_counts):
    latex_warning(f"All sequence files contain no data for correction.")
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis, title=""):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = np.array(
        [np.nanmin(edges[1]), np.nanmax(edges[1]),
         np.nanmin(edges[0]), np.nanmax(edges[0])])
    im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
                   norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_title(title)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 def get_trains_data(data_folder, source, include, detector_id, tid=None, modules=16, fillvalue=None, mod_starts_at=0):
    """Load single train for all module

    :param data_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param detector_id: The karabo id of the detector to get data for
    :param tid: Train Id to be loaded. First train is considered if None is given
    :param path: Path to find image data inside h5 file
    """
    try:
        run_data = RunDirectory(data_folder, include)
    except FileNotFoundError:
        warning(f'No corrected files for {include}. Skipping plots.')
        sys.exit(0)
    if tid is not None:
        tid, data = run_data.select(
            f'{detector_id}/DET/*', source).train_from_id(tid, keep_dims=True)
    else:
        # A first full trainId for all available modules is of interest.
        tid, data = next(run_data.select(
            f'{detector_id}/DET/*', source).trains(require_all=True, keep_dims=True))

    stacked_data = stack_detector_data(
        train=data, data=source, fillvalue=fillvalue, modules=modules,
        starts_at=mod_starts_at,
    )

    return tid, stacked_data
 ```

 %% Cell type:code id: tags:

 ``` python
 if "AGIPD500K" in karabo_id:
    geom = AGIPD_500K2GGeometry.from_origin()
 elif "AGIPD1M" in karabo_id:
    geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
        (-525, 625),
        (-550, -10),
        (520, -160),
        (542.5, 475),
    ])
 else:  # single module AGIPD detector
    geom = agipd_single_module_geometry()
 ```

 %% Cell type:code id: tags:

 ``` python
 include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
 mod_starts_at = 0 if nmods > 1 else modules[0]  # TODO: use CALCAT metadata for the detector.
 tid, corrected = get_trains_data(out_folder, 'image.data', include, karabo_id, modules=nmods, mod_starts_at=mod_starts_at)

 _, gains = get_trains_data(out_folder, 'image.gain', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 _, mask = get_trains_data(out_folder, 'image.mask', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 _, blshift = get_trains_data(out_folder, 'image.blShift', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 _, cellId = get_trains_data(out_folder, 'image.cellId', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 _, pulseId = get_trains_data(out_folder, 'image.pulseId', include, karabo_id, tid, modules=nmods, fillvalue=0, mod_starts_at=mod_starts_at)
 _, raw = get_trains_data(run_folder, 'image.data', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 # As part of data reduction efforts, the DAQ now has an option to discard AGIPD gain data
 # when it is known that all data is in the same gain stage. In such cases, the gain data
 # will be set to zeros. Consequently, the signal vs. analog gain 2D histogram can be skipped.
 gain = raw[:, 1, ...]
 if gain.max() > 0:
    signal = raw[:, 0, ...]
    display(Markdown("### Signal vs. Analogue Gain"))
    hist, bins_x, bins_y = calgs.histogram2d(
        signal.flatten().astype(np.float32),
        gain.flatten().astype(np.float32),
        bins=(100, 100),
        range=[
            np.percentile(signal, [0.02, 99.8]),
            np.percentile(gain, [0.02, 99.8]),
            ],
    )
    do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Digitized Gain ###

 The following plot shows plots signal vs. digitized gain

 %% Cell type:code id: tags:

 ``` python
 vmin, vmax = np.nanmin(corrected), np.nanmax(corrected)
 hist, bins_x, bins_y = calgs.histogram2d(
    corrected.flatten().astype(np.float32),
    gains.flatten().astype(np.float32), bins=(100, 3),
    range=[
        # The range boundaries and decided by DET expert.
        [max(vmin, -50), min(vmax, 8192)],
        [0, 3]
        ],
    )
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Gain statistics in %")
 table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
          f'{gains[gains==1].size/gains.size*100:.03f}',
          f'{gains[gains==2].size/gains.size*100:.03f}']]
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["High", "Medium", "Low"])))
 ```

 %% Cell type:markdown id: tags:

 ### Intensity per Pulse ###

 %% Cell type:code id: tags:

 ``` python
 pulse_range = [np.nanmin(pulseId[pulseId>=0]), np.nanmax(pulseId[pulseId>=0])]


 def clamp(value, min_value, max_value):
    return max(min_value, min(value, max_value))


 # Modify pulse_range, if only one pulse is selected.
 if pulse_range[0] == pulse_range[1]:
    pulse_range = [0, pulse_range[1]+int(acq_rate)]

 mean_data = np.nanmean(corrected, axis=(2, 3))
 vmin, vmax = np.nanmin(mean_data), np.nanmax(mean_data)
 hist, bins_x, bins_y = calgs.histogram2d(
    mean_data.flatten().astype(np.float32),
    pulseId.flatten().astype(np.float32),
    bins=(100, int(pulse_range[1])),
    range=[[clamp(vmin, -50, -0.2), min(vmax, 1000)], pulse_range],
 )
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id", title="Signal-Pulse ID")

 if vmax > 1000:  # a zoom out plot.
    hist, bins_x, bins_y = calgs.histogram2d(
        mean_data.flatten().astype(np.float32),
        pulseId.flatten().astype(np.float32),
        bins=(100,  int(pulse_range[1])),
        range=[[clamp(vmin, -50, -0.2), min(vmax, 20000)], pulse_range]
    )
    do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id", title="Signal-Pulse ID (Extended View)")
 ```

 %% Cell type:markdown id: tags:

 ### Baseline shift ###

 Estimated base-line shift with respect to the total ADU counts of corrected image.

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(blshift.flatten(), bins=100, log=True)
 _ = plt.xlabel('Baseline shift [ADU]')
 _ = plt.ylabel('Counts')
 _ = ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 10))
 corrected_ave = np.nansum(corrected, axis=(2, 3), dtype=float)
 plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)
 plt.xlim(np.nanpercentile(corrected_ave/10**6, [2, 98]))
 plt.grid()
 plt.xlabel('Illuminated corrected [MADU] ')
 _ = plt.ylabel('Estimated baseline shift [ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
 if cell_id_preview not in cellId[:, 0]:
    print(f"WARNING: The selected cell_id_preview value {cell_id_preview} is not available in the corrected data.")
    cell_id_preview = cellId[:, 0][0]
    cell_idx_preview = 0
    print(f"Previewing the first available cellId: {cell_id_preview}.")
 else:
    cell_idx_preview = np.where(cellId[:, 0] == cell_id_preview)[0][0]
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Raw preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown(f'Mean over images of the RAW data\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(raw[slice(*cell_sel.crange), 0, ...], axis=0)
    vmin, vmax = np.percentile(data, [5, 95])
    ax = geom.plot_data_fast(data, ax=ax, vmin=vmin, vmax=vmax, cmap=cmap)
    pass
 else:
    print("Skipping mean RAW preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'Single shot of the RAW data from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = np.percentile(raw[cell_idx_preview, 0, ...], [5, 95])
 ax = geom.plot_data_fast(
    raw[cell_idx_preview, 0, ...], ax=ax, vmin=vmin, vmax=vmax, cmap=cmap)
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Corrected preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown('### Mean CORRECTED Preview ###\n'))
    display(Markdown(f'A mean across train: {tid}\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(corrected, axis=0)
    vmax = np.nanpercentile(data, 99.8)
    ax = geom.plot_data_fast(data, ax=ax, vmin=max(-50, np.nanmin(data)), vmax=vmax, cmap=cmap)
    pass
 else:
    print("Skipping mean CORRECTED preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'A single shot of the CORRECTED image from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmax = np.nanpercentile(corrected[cell_idx_preview], 99.8)
 ax = geom.plot_data_fast(
    corrected[cell_idx_preview],
    ax=ax,
    vmin=max(-50, np.nanmin(corrected[cell_idx_preview])),
    vmax=vmax,
    cmap=cmap,
 )
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_idx_preview], 5, -50)
 nbins = int((vmax + 50) / 2)
 h = ax.hist(corrected[cell_idx_preview].flatten(),
            bins=nbins, range=(-50, vmax),
            histtype='stepfilled', log=True)
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ax.grid()
 plt.title(f'Log-scaled histogram for corrected data for cell {cell_idx_preview}')
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected, 10, -100)
 vmax = np.nanmax(corrected)
 if vmax > 50000:
    vmax = 50000
 nbins = int((vmax + 100) / 5)
 h = ax.hist(corrected.flatten(), bins=nbins,
            range=(-100, vmax), histtype='step', log=True, label = 'All')
 ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='High gain', color='green')
 ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Medium gain', color='red')
 ax.hist(corrected[gains == 2].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Low gain', color='yellow')
 ax.legend()
 ax.grid()
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 plt.title(f'Overlaid Histograms for corrected data for multiple gains')
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Maximum GAIN Preview ###\n'))
 display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(
    np.max(gains, axis=0), ax=ax,
    cmap=cmap, vmin=-0.3, vmax=2.3)  # Extend cmap for wrong gain values.
 pass
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'### Single Shot Bad Pixels ### \n'))
 display(Markdown(f'A single shot bad pixel map from cell {cell_id_preview} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(
    np.log2(mask[cell_idx_preview]), ax=ax, vmin=0, vmax=32, cmap=cmap)
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 if round_photons:
    display(Markdown('### Photonization histograms ###'))

    x_preround = (agipd_corr.hist_bins_preround[1:] + agipd_corr.hist_bins_preround[:-1]) / 2
    x_postround = (agipd_corr.hist_bins_postround[1:] + agipd_corr.hist_bins_postround[:-1]) / 2
    x_photons = np.arange(0, (x_postround[-1] + 1) / photon_energy)

    fig, ax = plt.subplots(ncols=1, nrows=1, clear=True)
    ax.plot(x_preround, agipd_corr.shared_hist_preround, '.-', color='C0')
    ax.bar(x_postround, agipd_corr.shared_hist_postround, photon_energy, color='C1', alpha=0.5)
    ax.set_yscale('log')
    ax.set_ylim(0, max(agipd_corr.shared_hist_preround.max(), agipd_corr.shared_hist_postround.max())*3)
    ax.set_xlim(x_postround[0], x_postround[-1]+1)
    ax.set_xlabel('Photon energy / keV')
    ax.set_ylabel('Intensity')
    ax.vlines(x_photons * photon_energy, *ax.get_ylim(), color='k', linestyle='dashed')

    phx = ax.twiny()
    phx.set_xlim(x_postround[0] / photon_energy, (x_postround[-1]+1)/photon_energy)
    phx.set_xticks(x_photons)
    phx.set_xlabel('# Photons')
    pass
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train  ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(
    np.mean(mask>0, axis=0), vmin=0, ax=ax, vmax=1, cmap=cmap)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train. Only Dark Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 cm = np.copy(mask)
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 ax = geom.plot_data_fast(
    np.mean(cm>0, axis=0), vmin=0, ax=ax, vmax=1, cmap=cmap)
 pass
 ```

 %% Cell type:markdown id: tags:

 # AGIPD Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/MID/202201/p002834/raw" # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/esobolev/pycal_litfrm/p002834/r0225"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 overwrite = False  # IGNORED, NEEDED FOR COMPATIBILITY.
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 train_ids = [-1] # train IDs to correct, set to -1 for all, range allowed
 run = 225 # runs to process, required

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_template = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 index_source_template = 'INDEX/{}/DET/{}:xtdf/'  # path in the HDF5 file to images
 ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information
 karabo_id_control = "MID_EXP_AGIPD1M1" # karabo-id for control device

 slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants, loaded in precorrection notebook

 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
 cal_db_interface = "tcp://max-exfl-cal001:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milliseconds
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
 cal_db_root = ""  # The calibration database root path to access constant files. e.g. accessing constants from the test database /gpfs/exfel/d/cal_tst/caldb_store.

 mem_cells = -1  # Number of memory cells used, set to 0 to automatically infer
 bias_voltage = -1  # bias voltage, set to 0 to use stored value in slow data.
 acq_rate = -1. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)
 max_pulses = [0, 352, 1] # range list [st, end, step] of memory cell indices to be processed within a train. 3 allowed maximum list input elements.
 mem_cells_db = -1  # set to a value different than 0 to use this value for DB queries
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction parameters
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
 cm_dark_fraction = 0.66 # threshold for fraction of  empty pixels to consider module enough dark to perform CM correction
 cm_dark_range = [-50.,30] # range for signal value ADU for pixel to be consider as a dark pixel
 cm_n_itr = 4 # number of iterations for common mode correction
 hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
 mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
 noisy_adc_threshold = 0.25 # threshold to mask complete adc
 ff_gain = 7.2 # conversion gain for absolute FlatField constants, while applying xray_gain
 photon_energy = -1.0 # photon energy in keV, non-positive value for XGM autodetection
 rounding_threshold = 0.5 # the fraction to round to down, 0.5 for standard rounding rule
 cs_mg_adjust = 7e3  # Value to adjust medium gain when correcting with current source. This is used when `adjust_mg_baseline` is True.

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 # TODO: Remove this boolean parameter an replace rel_gain_mode with it.
 rel_gain = False
 rel_gain_mode = "off"  # Select relative gain correction. Choices [`PC`, `CS`, `off`]. (`PC`: Pulse Capacitor, `CS`: Current Source, `off`: Disable relative gain correction). Default: off.
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 zero_nans = False # set NaN values in corrected data to 0
 zero_orange = False # set to 0 very negative and very large values in corrected data
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
 corr_asic_diag = False # if set, diagonal drop offs on ASICs are corrected
 force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold
 force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold
 mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
 common_mode = False # Common mode correction
 melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
 mask_zero_std = False # Mask pixels with zero standard deviation across train
 low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain
 round_photons = False  # Round to absolute number of photons, only use with gain corrections

 # Additional processing
 count_lit_pixels = False # Count the number of pixels registering photons
 spi_hitfinding = False  # Find hits using lit-pixel counter

 # Optional auxiliary devices
 use_ppu_device = ''  # Device ID for a pulse picker device to only process picked trains, empty string to disable
 ppu_train_offset = 0  # When using the pulse picker, offset between the PPU's sequence start and actually picked train
 require_ppu_trigger = False  # Optional protection against running without PPU or without triggering trains.

 use_litframe_finder = 'off' # Process only illuminated frames: 'off' - disable, 'device' - use online device data, 'offline' - use offline algorithm, 'auto' - choose online/offline source automatically (default)
 litframe_device_id = '' # Device ID for a lit frame finder device, empty string to auto detection
 energy_threshold = -1000 # The low limit for the energy (uJ) exposed by frames subject to processing. If -1000, selection by pulse energy is disabled
 use_super_selection = 'cm' # Make a common selection for entire run: 'off' - disable, 'final' - enable for final selection, 'cm' - enable only for common mode correction

 use_xgm_device = ''  # DoocsXGM device ID to obtain actual photon energy, operating condition else.

 # Output parameters
 recast_image_data = ''  # Cast data to a different dtype before saving
 compress_fields = ['gain', 'mask']  # Datasets in image group to compress.

 # SPI hit-finder parameters
 spi_hf_modules = [3, 4, 8, 15]  # Use specified modules for SPI hitfinding
 spi_hf_mode = "adaptive"  # The method to compute threshold for hitscores in SPI hitfinding: `fixed` or `adaptive`
 spi_hf_snr = 4.0  # Siginal-to-noise ration for adaptive threshold in SPI hitfinding
 spi_hf_min_scores = 100  # The minimal size of events to compute adaptive threshold in SPI hitfinding
 spi_hf_fixed_threshold = 0  # The fixed threshold value
 spi_hf_hitrate_window_size = 200  # The window size for runnig average of hitrate in trains
 spi_hf_xgm_norm = False  # Use XGM pulse energy for hitscore normalization
 spi_hf_miss_fraction = 1  # The fraction of misses to select along with hits
 spi_hf_miss_fraction_base = "hit"  # The base to compute the number of misses to select: the number of hits (`hit`) or misses (`miss`)

 # Plotting parameters
 skip_plots = False # exit after writing corrected files and metadata
 cell_id_preview = 1 # cell Id used for preview in single-shot plots
 cmap = "viridis"  # matplolib.colormap for almost all heatmap. Other options ['plasma', 'inferno', 'magma', 'cividis', 'jet', ...]

 # Parallelization parameters
 chunk_size = 1000  # Size of chunk for image-wise correction
 n_cores_correct = 16 # Number of chunks to be processed in parallel
 n_cores_files = 4 # Number of files to be processed in parallel
 sequences_per_node = 2 # number of sequence files per cluster node if run as SLURM job, set to 0 to not run SLURM parallel
 max_nodes = 8 # Maximum number of SLURM jobs to split correction work into
 max_tasks_per_worker = 1  # the number of tasks a correction pool worker process can complete before it will exit and be replaced with a fresh worker process. Leave as -1 to keep worker alive as long as pool.


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import math
 import multiprocessing
 import os
 import sys
 import warnings
 from datetime import timedelta
 from logging import warning
 from pathlib import Path

 import tabulate
 from dateutil import parser
 from IPython.display import Latex, Markdown, display

 warnings.filterwarnings('ignore')
 import h5py
 import matplotlib
 import matplotlib.pyplot as plt
 import yaml
 from extra_data import by_id, RunDirectory, stack_detector_data
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from matplotlib.colors import LogNorm

 matplotlib.use("agg")
 %matplotlib inline
 import numpy as np
 import seaborn as sns

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")

 import cal_tools.restful_config as rest_cfg
 from cal_tools import agipdalgs as calgs
 from cal_tools.agipdlib import (
    AgipdCorrections,
    AgipdCtrl,
    CellRange,
    LitFrameSelection,
 )
 from cal_tools.ana_tools import get_range
 from cal_tools.calcat_interface import (
    AGIPD_CalibrationData,
    CalCatError,
 )
 from cal_tools.enums import AgipdGainMode, BadPixels
 from cal_tools.plotting import agipd_single_module_geometry
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    calcat_creation_time,
    latex_warning,
    map_modules_from_folder,
    module_index_to_qm,
    write_constants_fragment,
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_folder = in_folder / f'r{run:04d}'

 step_timer = StepTimer()
 ```

 %% Cell type:markdown id: tags:

 ## Evaluated parameters ##

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the hierarchy and dependability for correction booleans are defined
 corr_bools = {}

 cs_corr = False
 pc_corr = False
 if rel_gain_mode.lower() == "off":
    # TODO: Remove this part after replacing rel_gain with rel_gain_mode
    if rel_gain:
        pc_corr = True

 elif rel_gain_mode.lower() == "cs":
    cs_corr = True

 elif rel_gain_mode.lower() == "pc":
    pc_corr = True

 else:
    raise ValueError(
        "Selected `rel_gain_mode` is unexpected. "
        "Please select between CS or PC.")

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["cs_corr"] = cs_corr
    corr_bools["pc_corr"] = pc_corr
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["zero_nans"] = zero_nans
    corr_bools["zero_orange"] = zero_orange
    corr_bools["mask_noisy_adc"] = mask_noisy_adc
    corr_bools["force_hg_if_below"] = force_hg_if_below
    corr_bools["force_mg_if_below"] = force_mg_if_below
    corr_bools["common_mode"] = common_mode
    corr_bools["melt_snow"] = melt_snow
    corr_bools["mask_zero_std"] = mask_zero_std
    corr_bools["low_medium_gap"] = low_medium_gap
    corr_bools["round_photons"] = round_photons
    corr_bools["count_lit_pixels"] = count_lit_pixels

 # Many corrections don't apply to fixed gain mode; will explicitly disable later if detected
 disable_for_fixed_gain = [
    "adjust_mg_baseline",
    "blc_set_min",
    "force_hg_if_below",
    "force_mg_if_below",
    "low_medium_gap",
    "melt_snow",
    "pc_corr",
    "cs_corr",
 ]
 ```

 %% Cell type:code id: tags:

 ``` python
 if sequences == [-1]:
    sequences = None

 dc = RunDirectory(run_folder)

 ctrl_src = ctrl_source_template.format(karabo_id_control)
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 index_src = index_source_template.format(karabo_id, receiver_template)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create output folder
 out_folder.mkdir(parents=True, exist_ok=True)

 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if "AGIPD1M" in karabo_id:
    nmods = 16
 elif "AGIPD500K" in karabo_id:
    nmods = 8
 else:
    nmods = 1

 # Evaluate requested modules
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    mod_indices = modules if nmods > 1 else [0]
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:  # TODO: fix this with the new CALCAT metadata for module indices.
    modules = [int(x[-2:]) for x in karabo_da]
    mod_indices = modules if nmods > 1 else [0]

 print("Process modules:", ', '.join(module_index_to_qm(x) for x in mod_indices))
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 ```

 %% Cell type:code id: tags:

 ``` python
 train_available = False

 for m in modules:
    try:
        # Attempt to select the module. If no trains are available, ValueError might be raised
        if len(dc[instrument_src.format(m), 'image.data'].drop_empty_trains().train_ids) > 0:
            train_available = True
            break  # Found a module with available trains.
    except ValueError as e:
        warning(f"Missing module {m} from data: {e}")

 if not train_available:
    # Execute this block if no modules with trains were found.
    latex_warning("No trains available to correct for selected modules.")
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_ppu_device and use_ppu_device in dc.control_sources:
    # Obtain trains to process if using a pulse picker device and it's present.

    seq_start = dc[use_ppu_device, 'trainTrigger.sequenceStart.value'].ndarray()

    # The trains picked are the unique values of trainTrigger.sequenceStart
    # minus the first (previous trigger before this run).
    start_train_ids = np.unique(seq_start)[1:] + ppu_train_offset

    train_ids = []
    for train_id in start_train_ids:
        n_trains = dc[
            use_ppu_device, 'trainTrigger.numberOfTrains'
        ].select_trains(by_id[[train_id]]).ndarray()[0]
        train_ids.extend(list(range(train_id, train_id + n_trains)))

    if train_ids:
        print(f'PPU device {use_ppu_device} triggered for {len(train_ids)} train(s)')
    elif require_ppu_trigger:
        raise RuntimeError(f'PPU device {use_ppu_device} not triggered but required, aborting!')
    else:
        print(f'PPU device {use_ppu_device} not triggered, processing all valid trains')
        train_ids = None

 elif use_ppu_device:
    # PPU configured but not present.

    if require_ppu_trigger:
        raise RuntimeError(f'PPU device {use_ppu_device} required but not found, aborting!')
    else:
        print(f'PPU device {use_ppu_device} configured but not found, processing all valid trains')
        train_ids = None

 elif train_ids != [-1]:
    # Specific trains passed by parameter, convert to ndarray.
    train_ids = np.array(train_ids)

    print(f'Processing up to {len(train_ids)} manually selected train(s)')

 else:
    # No PPU configured.
    print(f'Processing all valid trains')
    train_ids = None
 ```

 %% Cell type:code id: tags:

 ``` python
 first_mod_channel = sorted(modules)[0]
 instrument_src_first_mod = [
    s for s in list(dc.all_sources) if f"{first_mod_channel}CH" in s][0]

 agipd_cond = AgipdCtrl(
    run_dc=dc,
    image_src=instrument_src_first_mod,
    ctrl_src=ctrl_src,
    raise_error=False,  # to be able to process very old data without gain_setting value
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 offset = parser.parse(creation_date_offset)
 delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
 creation_time += delta
 print(f"Creation time: {creation_time}")

 if acq_rate == -1.:
    acq_rate = agipd_cond.get_acq_rate()
 if mem_cells == -1:
    mem_cells = agipd_cond.get_num_cells()
 # TODO: look for alternative for passing creation_time
 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == -1:
    for m in modules:
        bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control, module=m)
        # Accept non-zero value for a bias voltage from any module.
        if bias_voltage != 0.:
            break
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()
 if gain_mode == -1:
    gain_mode = agipd_cond.get_gain_mode()
 else:
    gain_mode = AgipdGainMode(gain_mode)
 ```

 %% Cell type:code id: tags:

 ``` python
 mem_cells_db = mem_cells if mem_cells_db == -1 else mem_cells_db

 print(f"Maximum memory cells to calibrate: {mem_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Using {creation_time} as creation time")
 print("Operating conditions are:")
 print(f"• Bias voltage: {bias_voltage}")
 print(f"• Memory cells: {mem_cells_db}")
 print(f"• Acquisition rate: {acq_rate}")
 print(f"• Gain setting: {gain_setting}")
 print(f"• Gain mode: {gain_mode.name}")
 print(f"• Integration time: {integration_time}")
 print(f"• Photon Energy: 9.2")
 ```

 %% Cell type:code id: tags:

 ``` python
 if gain_mode:
    for to_disable in disable_for_fixed_gain:
        if corr_bools.get(to_disable, False):
            warning(f"{to_disable} correction was requested, but does not apply to fixed gain mode")
            corr_bools[to_disable] = False
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_litframe_finder != 'off':
    from extra_redu import make_litframe_finder, LitFrameFinderError

    if use_litframe_finder not in ['auto', 'offline', 'online']:
        raise ValueError("Unexpected value in 'use_litframe_finder'.")

    inst = karabo_id_control[:3]
    litfrm = make_litframe_finder(inst, dc, litframe_device_id)
    try:
        get_data = {'auto': litfrm.read_or_process, 'offline': litfrm.process, 'online': litfrm.read}
        r = get_data[use_litframe_finder]()
        cell_sel = LitFrameSelection(r, train_ids, max_pulses, energy_threshold, use_super_selection)
        print(f"{cell_sel.msg()}\n")
        cell_sel.print_report()
        if np.count_nonzero(r.nLitFrame.value) == 0:  # No lit frames.
            latex_warning(
                "No lit frames identified using AGIPD LitFrameFinder."
                " Offline correction will not be performed.")
            sys.exit(0)
    except LitFrameFinderError as err:
        print(cell_sel.msg())
        warning(f"Cannot use AgipdLitFrameFinder due to:\n{err}")
        cell_sel = CellRange(max_pulses, max_cells=mem_cells)
 else:
    # Use range selection
    cell_sel = CellRange(max_pulses, max_cells=mem_cells)
    print(cell_sel.msg())
 ```

 %% Cell type:code id: tags:

 ``` python
 photon_energy_warn = None
 if photon_energy <= 0.0:
    if use_xgm_device:
        # Try to obtain photon energy from XGM device.
        wavelength_data = dc[use_xgm_device, 'pulseEnergy.wavelengthUsed']

        try:
            from scipy.constants import h, c, e

            # Read wavelength as a single value and convert to hv.
            photon_energy = (h * c / e) / (wavelength_data.as_single_value(rtol=1e-2) * 1e-6)
            print(f'Obtained photon energy {photon_energy:.3f} keV from {use_xgm_device}')
        except ValueError:
            photon_energy_warn = 'XGM source available but photon energy varies greater than 1%'
    else:
        photon_energy_warn = 'Neither explicit photon energy nor XGM device configured'

 rounding_threshold_warn = None
 if rounding_threshold <= .0 or 1. <= rounding_threshold:
    rounding_threshold_warn = 'Round threshold is out of (0, 1) range. Use standard 0.5 value.'
    rounding_threshold = 0.5

 if round_photons:
    if photon_energy_warn:
        warning(photon_energy_warn + ', photon rounding disabled!')
        round_photons = False
    else:
        print(f'Photon energy for rounding: {photon_energy:.3f} keV')
        if rounding_threshold_warn:
            warning(rounding_threshold_warn)
 ```

 %% Cell type:code id: tags:

 ``` python
 if count_lit_pixels:
    if round_photons:
        data_units = 'photon'
        litpx_threshold = 1.
    else:
        data_units = 'keV'
        if photon_energy_warn:
            warning(photon_energy_warn + '. Use 12 keV for lit-pixel counter threshold')
            litpx_threshold = 12.
        else:
            litpx_threshold = photon_energy

        if rounding_threshold_warn:
            warning(rounding_threshold_warn)

    if not xray_gain:
        # convert photon energy to ADU (for AGIPD approx. 1 keV = 7 ADU)
        # it looks that rounding to photons can be applied to data in ADU as well
        litpx_threshold *= 7.
        data_units = 'ADU'

    litpx_threshold *= rounding_threshold
    print(f"Count lit-pixels with signal above {litpx_threshold:.3g} {data_units}")
 else:
    # dummy value, that is not expected to be used
    litpx_threshold = 42
 ```

 %% Cell type:code id: tags:

 ``` python
 agipd_corr = AgipdCorrections(
    mem_cells,
    cell_sel,
    h5_data_path=instrument_src,
    h5_index_path=index_src,
    corr_bools=corr_bools,
    gain_mode=gain_mode,
    comp_threads=os.cpu_count() // n_cores_files,
    train_ids=train_ids
 )

 agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
 agipd_corr.hg_hard_threshold = hg_hard_threshold
 agipd_corr.mg_hard_threshold = mg_hard_threshold

 agipd_corr.cm_dark_min = cm_dark_range[0]
 agipd_corr.cm_dark_max = cm_dark_range[1]
 agipd_corr.cm_dark_fraction = cm_dark_fraction
 agipd_corr.cm_n_itr = cm_n_itr
 agipd_corr.noisy_adc_threshold = noisy_adc_threshold
 agipd_corr.ff_gain = ff_gain
 agipd_corr.photon_energy = photon_energy
 agipd_corr.rounding_threshold = rounding_threshold
 agipd_corr.cs_mg_adjust = cs_mg_adjust
 agipd_corr.litpx_threshold = litpx_threshold

 agipd_corr.compress_fields = compress_fields
 if recast_image_data:
    agipd_corr.recast_image_fields['data'] = np.dtype(recast_image_data)
 ```

 %% Cell type:markdown id: tags:

 ## Retrieving constants

 %% Cell type:code id: tags:

 ``` python
 def get_constants_and_update_metadata(cal_data, main_metadata, constants):
    try:
        metadata = cal_data.metadata(constants)
        for key, value in metadata.items():
            main_metadata.setdefault(key, {}).update(value)
    except CalCatError as e:  # TODO: replace when API errors are improved.
        warning(f"CalCatError: {e}")
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 # Instantiate agipd_cal with the read operating conditions.
 agipd_cal = AGIPD_CalibrationData(
    detector_name=karabo_id,
    modules=karabo_da,
    sensor_bias_voltage=bias_voltage,
    memory_cells=mem_cells,
    acquisition_rate=acq_rate,
    integration_time=integration_time,
    source_energy=9.2,
    gain_mode=gain_mode,
    gain_setting=gain_setting,
    event_at=creation_time,
    client=rest_cfg.calibration_client(),
    caldb_root=Path(cal_db_root) if cal_db_root else None,
 )

 # Prepare lists of expected calibrations
 dark_constants = ["Offset", "Noise", "BadPixelsDark"]
 if not gain_mode:  # Adaptive gain
    dark_constants.append("ThresholdsDark")

 agipd_metadata = agipd_cal.metadata(dark_constants)

 agipd_cal.gain_mode = None  # gain_mode is not used for gain constants
 pc_constants, ff_constants, cs_constants = [], [], []

 if agipd_corr.corr_bools.get('xray_corr'):
    ff_constants = list(agipd_cal.illuminated_calibrations)
    get_constants_and_update_metadata(
        agipd_cal, agipd_metadata, ff_constants)

 if any(agipd_corr.relgain_bools):

    if cs_corr:
        # Integration time is not used with CS
        agipd_cal.integration_time = None
        cs_constants = ["SlopesCS", "BadPixelsCS"]
        get_constants_and_update_metadata(
            agipd_cal, agipd_metadata, cs_constants)


    else:  # rel_gain_mode == "pc" or "off"
        pc_constants = ["SlopesPC", "BadPixelsPC"]
        get_constants_and_update_metadata(
            agipd_cal, agipd_metadata, pc_constants)

 step_timer.done_step("Constants were retrieved in")

 relgain_alias = "CS" if cs_corr else "PC"
 print("Preparing constants ("
      f"FF: {agipd_corr.corr_bools.get('xray_corr', False)}, "
      f"{relgain_alias}: {any(agipd_corr.relgain_bools)}, "
      f"BLC: {any(agipd_corr.blc_bools)})")
 # Display retrieved calibration constants timestamps
 agipd_cal.display_markdown_retrieved_constants(metadata=agipd_metadata)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Skip das not available in CALCAT
 def filter_available_das(das):
    available_das = [da for da in das if da in agipd_metadata.keys()]
    if missing_das := set(das) - set(available_das):
        warning(
            f"Skipping correcting {missing_das} as they are "
            "not mapped to this detector in CALCAT.")
    return das

 karabo_da = filter_available_das(karabo_da)

 # Validate constants availability and exclude modules with no offsets.
 for da, calibrations in agipd_metadata.items():
    mod = modules[karabo_da.index(da)]
    # Constants to error out for when missing.
    error_missing_constants = {"Offset"}
    if not gain_mode:
        error_missing_constants |= {"ThresholdsDark"}

    error_missing_constants -= set(calibrations)
    if error_missing_constants:
        warning(f"Offset constant is not available to correct {da}.")
        # Remove module from files to process.
        karabo_da.remove(da)
        modules.remove(mod)

    warn_missing_constants = set(dark_constants + pc_constants + ff_constants + cs_constants)
    warn_missing_constants -= error_missing_constants
    warn_missing_constants -= set(calibrations)
    if warn_missing_constants:
        warning(f"Constants {warn_missing_constants} were not retrieved for {da}.")

 if not karabo_da:  # Offsets are missing for all modules.
    raise Exception("Could not find offset constants for any modules, will not correct data.")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Record constant details in YAML metadata
 write_constants_fragment(
    out_folder=(metadata_folder or out_folder),
    det_metadata=agipd_metadata,
    caldb_root=agipd_cal.caldb_root)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Set everything up filewise
 # This cell needs to be after getting and validating CCVs and
 # after modifying `karabo_da` in case of missing required constants.
 mapped_files, _, total_sequences, _, _ =  map_modules_from_folder(
-    str(in_folder), run, path_template, karabo_da, sequences
+    str(in_folder), run, path_template, karabo_da, sequences, key_name="da" if nmods < 16 else "qm"
 )
 file_list = []

 # ToDo: Split table over pages
 print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
 table = []
 ti = 0
 for k, files in mapped_files.items():
    i = 0
    for f in list(files.queue):
        file_list.append(f)
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["#", "module", "# module", "file"])))
 file_list = sorted(file_list, key=lambda name: name[-10:])
 ```

 %% Cell type:code id: tags:

 ``` python
 # Load calibration constants to RAM
 agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))

 def load_constants(da, module):
    """
    Initialize constants data from previously retrieved metadata.

    Args:
        da (str): Data Aggregator (Karabo DA)
        module (int): Module index

    Returns:
        (int, dict, str): Module index, {constant name: creation time}, Karabo DA
    """
    const_data = dict()
    variant = dict()
    for cname, mdata in agipd_metadata[da].items():
        dataset = mdata["dataset"]
        with h5py.File(agipd_cal.caldb_root / mdata["path"], "r") as cf:  # noqa
            const_data[cname] = np.copy(cf[f"{dataset}/data"])
            variant[cname] = cf[dataset].attrs["variant"] if cf[dataset].attrs.keys() else 0  # noqa
    agipd_corr.init_constants(const_data, module, variant)


 step_timer.start()
 with multiprocessing.Pool(processes=len(modules)) as pool:
    pool.starmap(load_constants, zip(karabo_da, modules))
 step_timer.done_step(f'Constants were loaded in ')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Store timestamps for Offset, SlopesPC/SlopesCS, and SlopesFF
 # in YAML file for time-summary table.
 timestamps = {}

 for mod, mod_mdata in agipd_metadata.items():
    modno = int(mod[-2:])

    module_timestamps = {}

    # Store few time stamps if exists
    # Add NA to keep array structure
    for key in ['Offset', f'Slopes{relgain_alias}', 'SlopesFF']:
        if key in mod_mdata:
            module_timestamps[key] = mod_mdata[key]["begin_validity_at"]
        else:
            module_timestamps[key] = "NA"
-
-    timestamps[module_index_to_qm(modno)] = module_timestamps
+    mod_name = mod if nmods < 16 else module_index_to_qm(modno)
+    timestamps[mod_name] = module_timestamps

 seq = sequences[0] if sequences else 0

 with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fd:
    yaml.safe_dump({"time-summary": {f"S{seq}": timestamps}}, fd)
 ```

 %% Cell type:markdown id: tags:

 ## Data processing ##

 %% Cell type:code id: tags:

 ``` python
 # allocate memory for images and hists
 n_images_max = mem_cells * 256
 data_shape = (n_images_max, 512, 128)
 agipd_corr.allocate_images(data_shape, n_cores_files)
 ```

 %% Cell type:code id: tags:

 ``` python
 def batches(l, batch_size):
    """Group a list into batches of (up to) batch_size elements"""
    start = 0
    while start < len(l):
        yield l[start:start + batch_size]
        start += batch_size
 ```

 %% Cell type:code id: tags:

 ``` python
 def imagewise_chunks(img_counts):
    """Break up the loaded data into chunks of up to chunk_size

    Yields (file data slot, start index, stop index)
    """
    for i_proc, n_img in enumerate(img_counts):
        n_chunks = math.ceil(n_img / chunk_size)
        for i in range(n_chunks):
            yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 all_imgs_counts = []
 if max_tasks_per_worker == -1:
    max_tasks_per_worker = None
 with multiprocessing.Pool(maxtasksperchild=max_tasks_per_worker) as pool:
    step_timer.done_step('Started pool')

    for file_batch in batches(file_list, n_cores_files):
        # TODO: Move some printed output to logging or similar
        print(f"Processing next {len(file_batch)} files")
        step_timer.start()
        img_counts = pool.starmap(
            agipd_corr.read_file,
            zip(range(len(file_batch)), file_batch, [not common_mode]*len(file_batch))
        )
        step_timer.done_step(f'Loading data from files')

        all_imgs_counts += img_counts
        if not np.any(img_counts):
            # Skip any further processing and output if there are no images to
            # correct in this file.
            continue

        if mask_zero_std:
            # Evaluate zero-data-std mask
            pool.starmap(
                agipd_corr.mask_zero_std, itertools.product(
                    range(len(file_batch)),
                    np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
                )
            )
            step_timer.done_step('Mask 0 std')

        # Perform offset image-wise correction
        pool.starmap(agipd_corr.offset_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Offset correction")

        if blc_noise or blc_stripes or blc_hmatch:
            # Perform image-wise correction
            pool.starmap(agipd_corr.baseline_correction, imagewise_chunks(img_counts))
            step_timer.done_step("Base-line shift correction")

        if common_mode:
            # In common mode corrected is enabled.
            # Cell selection is only activated after common mode correction.
            # Perform cross-file correction parallel over asics
            image_files_idx = [i_proc for i_proc, n_img in enumerate(img_counts) if n_img > 0]
            pool.starmap(agipd_corr.cm_correction, itertools.product(
                image_files_idx, range(16)  # 16 ASICs per module
            ))
            step_timer.done_step("Common-mode correction")

            img_counts = pool.map(agipd_corr.apply_selected_pulses, image_files_idx)
            step_timer.done_step("Applying selected cells after common mode correction")

        # Perform image-wise correction"
        pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Gain corrections")

        # Peform additional processing
        if count_lit_pixels:
            pool.starmap(agipd_corr.litpixel_counter, imagewise_chunks(img_counts))
            step_timer.done_step("Lit-pixel counting")

        # Save corrected data
        pool.starmap(agipd_corr.write_file, [
            (i_proc, file_name, str(out_folder / Path(file_name).name.replace("RAW", "CORR")))
            for i_proc, file_name in enumerate(file_batch)
        ])
        step_timer.done_step("Save")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Correction of {len(file_list)} files is finished")
 print(f"Total processing time {step_timer.timespan():.01f} s")
 print(f"Timing summary per batch of {n_cores_files} files:")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print("Skipping plots as configured.")
    sys.exit(0)
 elif not np.any(all_imgs_counts):
    latex_warning(f"All sequence files contain no data for correction.")
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis, title=""):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = np.array(
        [np.nanmin(edges[1]), np.nanmax(edges[1]),
         np.nanmin(edges[0]), np.nanmax(edges[0])])
    im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
                   norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_title(title)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 def get_trains_data(data_folder, source, include, detector_id, tid=None, modules=16, fillvalue=None, mod_starts_at=0):
    """Load single train for all module

    :param data_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param detector_id: The karabo id of the detector to get data for
    :param tid: Train Id to be loaded. First train is considered if None is given
    :param path: Path to find image data inside h5 file
    """
    try:
        run_data = RunDirectory(data_folder, include)
    except FileNotFoundError:
        warning(f'No corrected files for {include}. Skipping plots.')
        sys.exit(0)
    if tid is not None:
        tid, data = run_data.select(
            f'{detector_id}/DET/*', source).train_from_id(tid, keep_dims=True)
    else:
        # A first full trainId for all available modules is of interest.
        tid, data = next(run_data.select(
            f'{detector_id}/DET/*', source).trains(require_all=True, keep_dims=True))

    stacked_data = stack_detector_data(
        train=data, data=source, fillvalue=fillvalue, modules=modules,
        starts_at=mod_starts_at,
    )

    return tid, stacked_data
 ```

 %% Cell type:code id: tags:

 ``` python
 if "AGIPD500K" in karabo_id:
    geom = AGIPD_500K2GGeometry.from_origin()
 elif "AGIPD1M" in karabo_id:
    geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
        (-525, 625),
        (-550, -10),
        (520, -160),
        (542.5, 475),
    ])
 else:  # single module AGIPD detector
    geom = agipd_single_module_geometry()
 ```

 %% Cell type:code id: tags:

 ``` python
 include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
 mod_starts_at = 0 if nmods > 1 else modules[0]  # TODO: use CALCAT metadata for the detector.
 tid, corrected = get_trains_data(out_folder, 'image.data', include, karabo_id, modules=nmods, mod_starts_at=mod_starts_at)

 _, gains = get_trains_data(out_folder, 'image.gain', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 _, mask = get_trains_data(out_folder, 'image.mask', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 _, blshift = get_trains_data(out_folder, 'image.blShift', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 _, cellId = get_trains_data(out_folder, 'image.cellId', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 _, pulseId = get_trains_data(out_folder, 'image.pulseId', include, karabo_id, tid, modules=nmods, fillvalue=0, mod_starts_at=mod_starts_at)
 _, raw = get_trains_data(run_folder, 'image.data', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 # As part of data reduction efforts, the DAQ now has an option to discard AGIPD gain data
 # when it is known that all data is in the same gain stage. In such cases, the gain data
 # will be set to zeros. Consequently, the signal vs. analog gain 2D histogram can be skipped.
 gain = raw[:, 1, ...]
 if gain.max() > 0:
    signal = raw[:, 0, ...]
    display(Markdown("### Signal vs. Analogue Gain"))
    hist, bins_x, bins_y = calgs.histogram2d(
        signal.flatten().astype(np.float32),
        gain.flatten().astype(np.float32),
        bins=(100, 100),
        range=[
            np.percentile(signal, [0.02, 99.8]),
            np.percentile(gain, [0.02, 99.8]),
            ],
    )
    do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Digitized Gain ###

 The following plot shows plots signal vs. digitized gain

 %% Cell type:code id: tags:

 ``` python
 vmin, vmax = np.nanmin(corrected), np.nanmax(corrected)
 hist, bins_x, bins_y = calgs.histogram2d(
    corrected.flatten().astype(np.float32),
    gains.flatten().astype(np.float32), bins=(100, 3),
    range=[
        # The range boundaries and decided by DET expert.
        [max(vmin, -50), min(vmax, 8192)],
        [0, 3]
        ],
    )
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Gain statistics in %")
 table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
          f'{gains[gains==1].size/gains.size*100:.03f}',
          f'{gains[gains==2].size/gains.size*100:.03f}']]
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["High", "Medium", "Low"])))
 ```

 %% Cell type:markdown id: tags:

 ### Intensity per Pulse ###

 %% Cell type:code id: tags:

 ``` python
 pulse_range = [np.nanmin(pulseId[pulseId>=0]), np.nanmax(pulseId[pulseId>=0])]


 def clamp(value, min_value, max_value):
    return max(min_value, min(value, max_value))


 # Modify pulse_range, if only one pulse is selected.
 if pulse_range[0] == pulse_range[1]:
    pulse_range = [0, pulse_range[1]+int(acq_rate)]

 mean_data = np.nanmean(corrected, axis=(2, 3))
 vmin, vmax = np.nanmin(mean_data), np.nanmax(mean_data)
 hist, bins_x, bins_y = calgs.histogram2d(
    mean_data.flatten().astype(np.float32),
    pulseId.flatten().astype(np.float32),
    bins=(100, int(pulse_range[1])),
    range=[[clamp(vmin, -50, -0.2), min(vmax, 1000)], pulse_range],
 )
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id", title="Signal-Pulse ID")

 if vmax > 1000:  # a zoom out plot.
    hist, bins_x, bins_y = calgs.histogram2d(
        mean_data.flatten().astype(np.float32),
        pulseId.flatten().astype(np.float32),
        bins=(100,  int(pulse_range[1])),
        range=[[clamp(vmin, -50, -0.2), min(vmax, 20000)], pulse_range]
    )
    do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id", title="Signal-Pulse ID (Extended View)")
 ```

 %% Cell type:markdown id: tags:

 ### Baseline shift ###

 Estimated base-line shift with respect to the total ADU counts of corrected image.

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(blshift.flatten(), bins=100, log=True)
 _ = plt.xlabel('Baseline shift [ADU]')
 _ = plt.ylabel('Counts')
 _ = ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 10))
 corrected_ave = np.nansum(corrected, axis=(2, 3), dtype=float)
 plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)
 plt.xlim(np.nanpercentile(corrected_ave/10**6, [2, 98]))
 plt.grid()
 plt.xlabel('Illuminated corrected [MADU] ')
 _ = plt.ylabel('Estimated baseline shift [ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
 if cell_id_preview not in cellId[:, 0]:
    print(f"WARNING: The selected cell_id_preview value {cell_id_preview} is not available in the corrected data.")
    cell_id_preview = cellId[:, 0][0]
    cell_idx_preview = 0
    print(f"Previewing the first available cellId: {cell_id_preview}.")
 else:
    cell_idx_preview = np.where(cellId[:, 0] == cell_id_preview)[0][0]
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Raw preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown(f'Mean over images of the RAW data\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(raw[slice(*cell_sel.crange), 0, ...], axis=0)
    vmin, vmax = np.percentile(data, [5, 95])
    ax = geom.plot_data_fast(data, ax=ax, vmin=vmin, vmax=vmax, cmap=cmap)
    pass
 else:
    print("Skipping mean RAW preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'Single shot of the RAW data from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = np.percentile(raw[cell_idx_preview, 0, ...], [5, 95])
 ax = geom.plot_data_fast(
    raw[cell_idx_preview, 0, ...], ax=ax, vmin=vmin, vmax=vmax, cmap=cmap)
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Corrected preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown('### Mean CORRECTED Preview ###\n'))
    display(Markdown(f'A mean across train: {tid}\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(corrected, axis=0)
    vmax = np.nanpercentile(data, 99.8)
    ax = geom.plot_data_fast(data, ax=ax, vmin=max(-50, np.nanmin(data)), vmax=vmax, cmap=cmap)
    pass
 else:
    print("Skipping mean CORRECTED preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'A single shot of the CORRECTED image from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmax = np.nanpercentile(corrected[cell_idx_preview], 99.8)
 ax = geom.plot_data_fast(
    corrected[cell_idx_preview],
    ax=ax,
    vmin=max(-50, np.nanmin(corrected[cell_idx_preview])),
    vmax=vmax,
    cmap=cmap,
 )
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_idx_preview], 5, -50)
 nbins = int((vmax + 50) / 2)
 h = ax.hist(corrected[cell_idx_preview].flatten(),
            bins=nbins, range=(-50, vmax),
            histtype='stepfilled', log=True)
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ax.grid()
 plt.title(f'Log-scaled histogram for corrected data for cell {cell_idx_preview}')
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected, 10, -100)
 vmax = np.nanmax(corrected)
 if vmax > 50000:
    vmax = 50000
 nbins = int((vmax + 100) / 5)
 h = ax.hist(corrected.flatten(), bins=nbins,
            range=(-100, vmax), histtype='step', log=True, label = 'All')
 ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='High gain', color='green')
 ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Medium gain', color='red')
 ax.hist(corrected[gains == 2].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Low gain', color='yellow')
 ax.legend()
 ax.grid()
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 plt.title(f'Overlaid Histograms for corrected data for multiple gains')
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Maximum GAIN Preview ###\n'))
 display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(
    np.max(gains, axis=0), ax=ax,
    cmap=cmap, vmin=-0.3, vmax=2.3)  # Extend cmap for wrong gain values.
 pass
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'### Single Shot Bad Pixels ### \n'))
 display(Markdown(f'A single shot bad pixel map from cell {cell_id_preview} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(
    np.log2(mask[cell_idx_preview]), ax=ax, vmin=0, vmax=32, cmap=cmap)
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 if round_photons:
    display(Markdown('### Photonization histograms ###'))

    x_preround = (agipd_corr.hist_bins_preround[1:] + agipd_corr.hist_bins_preround[:-1]) / 2
    x_postround = (agipd_corr.hist_bins_postround[1:] + agipd_corr.hist_bins_postround[:-1]) / 2
    x_photons = np.arange(0, (x_postround[-1] + 1) / photon_energy)

    fig, ax = plt.subplots(ncols=1, nrows=1, clear=True)
    ax.plot(x_preround, agipd_corr.shared_hist_preround, '.-', color='C0')
    ax.bar(x_postround, agipd_corr.shared_hist_postround, photon_energy, color='C1', alpha=0.5)
    ax.set_yscale('log')
    ax.set_ylim(0, max(agipd_corr.shared_hist_preround.max(), agipd_corr.shared_hist_postround.max())*3)
    ax.set_xlim(x_postround[0], x_postround[-1]+1)
    ax.set_xlabel('Photon energy / keV')
    ax.set_ylabel('Intensity')
    ax.vlines(x_photons * photon_energy, *ax.get_ylim(), color='k', linestyle='dashed')

    phx = ax.twiny()
    phx.set_xlim(x_postround[0] / photon_energy, (x_postround[-1]+1)/photon_energy)
    phx.set_xticks(x_photons)
    phx.set_xlabel('# Photons')
    pass
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train  ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(
    np.mean(mask>0, axis=0), vmin=0, ax=ax, vmax=1, cmap=cmap)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train. Only Dark Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 cm = np.copy(mask)
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 ax = geom.plot_data_fast(
    np.mean(cm>0, axis=0), vmin=0, ax=ax, vmax=1, cmap=cmap)
 pass
 ```

--- a/notebooks/DynamicFF/Characterize_DynamicFF_NBC.ipynb
+++ b/notebooks/DynamicFF/Characterize_DynamicFF_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Characterization of dark and flat field for Dynamic Flat Field correction

 Author: Egor Sobolev

 Computation of dark offsets and flat-field principal components

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202430/p900425/raw"  # input folder, required
 out_folder = '/gpfs/exfel/data/scratch/esobolev/test/shimadzu'  # output folder, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run_high = 1 # run number in which dark data was recorded, required
 run_low = 2 # run number in which flat-field data was recorded, required
 operation_mode = "PCA_DynamicFF"  # Detector operation mode, optional (defaults to "PCA_DynamicFF")

 # Data files parameters.
 karabo_da = ['-1'] # data aggregators
 karabo_id = "SPB_MIC_HPVX2" # karabo prefix of Shimadzu HPV-X2 devices

 # Database access parameters.
 cal_db_interface = "tcp://max-exfl-cal001:8021"  # Unused, calibration DB interface to use
 cal_db_timeout = 30000  # Unused, calibration DB timeout
 db_output = False # if True, the notebook sends dark constants to the calibration database
 local_output = True # if True, the notebook saves dark constants locally
+creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HH:MM:SS.00 e.g. 2019-07-04 11:02:41.00

 # Calibration constants parameters
+frame_range = [4]  # range list [start, end, step] of frame indices within a train to use for characterization.
 n_components = 50  # Number of principal components of flat-field to compute (default: 50)
 ```

 %% Cell type:code id: tags:

 ``` python
 import datetime
 import os
 import warnings
 from logging import warning
 from shutil import copyfile
 from tempfile import NamedTemporaryFile

 warnings.filterwarnings('ignore')

 import time
 import numpy as np
 import matplotlib.pyplot as plt
 from IPython.display import display, Markdown

 from extra_data import RunDirectory

 %matplotlib inline
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
-    get_dir_creation_date,
    run_prop_seq_from_path,
-    save_dict_to_hdf5
+    calcat_creation_time,
 )
 from cal_tools.restful_config import calibration_client, extra_calibration_client
 from cal_tools.shimadzu import ShimadzuHPVX2
-from cal_tools.constants import write_ccv, inject_ccv
+from cal_tools.constants import CCVAlreadyInjectedError, write_ccv, inject_ccv

 import dynflatfield as dffc
 from dynflatfield.draw import plot_images, plot_camera_image
 ```

 %% Cell type:code id: tags:

 ``` python
 extra_calibration_client()  # Configure CalibrationData.

 cc = calibration_client()
-pdus = cc.get_all_phy_det_units_from_detector(
-    {"detector_identifier": karabo_id})  # TODO: Use creation_time for snapshot_at
-
+pdus = cc.get_all_phy_det_units_from_detector({
+    "detector_identifier": karabo_id,
+    "pdu_snapshot_at": calcat_creation_time(
+        in_folder, min(run_low, run_high), creation_time),
+})
 if not pdus["success"]:
    raise ValueError("Failed to retrieve PDUs")

 detector_info = pdus['data'][0]['detector']
 detector = ShimadzuHPVX2(detector_info["source_name_pattern"])

 print(f"Instrument {detector.instrument}")
 print(f"Detector in use is {karabo_id}")

 modules = {}
 for pdu_no, pdu in enumerate(pdus["data"]):
    db_module = pdu["physical_name"]
    module = pdu["module_number"]
    da = pdu["karabo_da"]
    if karabo_da[0] != "-1" and da not in karabo_da:
        continue

    instrument_source_name = detector.instrument_source(module)
    print('-', da, db_module, module, instrument_source_name)

    modules[da] = dict(
        db_module=db_module,
        module=module,
        raw_source_name=instrument_source_name,
        pdu_no=pdu_no,
    )

 constants = {}

+# make a frame slice
+frame_range += [None] * (3 - len(frame_range))
+frame_slice = slice(*frame_range)
+
 step_timer = StepTimer()
 ```

 %% Cell type:markdown id: tags:

 # Offset map

 %% Cell type:code id: tags:

 ``` python
 dark_run = run_high
-dark_creation_time = get_dir_creation_date(in_folder, dark_run)
+dark_creation_time = calcat_creation_time(in_folder, dark_run, creation_time)
 print(f"Using {dark_creation_time} as creation time of Offset constant.")

 for da, meta in modules.items():
    source_name = detector.instrument_source(meta["module"])
    image_key = detector.image_key

    display(Markdown(f"## {source_name}"))

    # read
    step_timer.start()
    file_da, _, _ = da.partition('/')
    dark_dc = RunDirectory(f"{in_folder}/r{dark_run:04d}",
                           include=f"RAW-R{dark_run:04d}-{file_da}-S*.h5")

    if source_name not in dark_dc.all_sources:
        raise ValueError(f"Could not find source {source_name} for module {da} in dark data")

    dark_dc = dark_dc.select([(source_name, image_key)])
    conditions = detector.conditions(dark_dc, meta["module"])

    key_data = dark_dc[source_name, image_key]
-    images_dark = key_data.ndarray()
+    images_dark = key_data.ndarray(roi=frame_slice)
    ntrain, npulse, ny, nx = images_dark.shape

-    print(f"N image: {ntrain * npulse} (ntrain: {ntrain}, npulse: {npulse})")
+    print(f"N image: {ntrain * npulse} (ntrain: {ntrain}, npulse: {npulse}/{key_data.shape[1]})")
    print(f"Image size: {ny} x {nx} px")
    step_timer.done_step("Read dark images")

    # process
    step_timer.start()
    dark = dffc.process_dark(images_dark)  # Amounts to a per-pixel mean right now.

    # put results in the dict
    module_constants = constants.setdefault(meta["db_module"], {})
    module_constants["Offset"] = dict(
        conditions=conditions, data=dark, pdu_no=meta["pdu_no"],
        creation_time=dark_creation_time, dims=['ss', 'fs']
    )
    step_timer.done_step("Process dark images")
    display()

    # draw plots
    step_timer.start()
    plot_camera_image(dark)
    plt.show()
    step_timer.done_step("Draw offsets")
 ```

 %% Cell type:markdown id: tags:

 # Flat-field PCA decomposition

 %% Cell type:code id: tags:

 ``` python
 flat_run = run_low
-flat_creation_time = get_dir_creation_date(in_folder, flat_run)
+flat_creation_time = calcat_creation_time(in_folder, flat_run, creation_time)
 print(f"Using {flat_creation_time} as creation time of DynamicFF constant.")

 for da, meta in modules.items():
    source_name = detector.instrument_source(meta["module"])
    image_key = detector.image_key

    display(Markdown(f"## {source_name}"))

    # read
    step_timer.start()
    file_da, _, _ = da.partition('/')
    flat_dc = RunDirectory(f"{in_folder}/r{flat_run:04d}",
                           include=f"RAW-R{flat_run:04d}-{file_da}-S*.h5")

    if source_name not in flat_dc.all_sources:
        raise ValueError(f"Could not find source {source_name} for module {da} in flatfield data")

    flat_dc = flat_dc.select([(source_name, image_key)])
    conditions = detector.conditions(flat_dc, meta["module"])

    dark = constants[meta["db_module"]]["Offset"]["data"]
    dark_conditions = constants[meta["db_module"]]["Offset"]["conditions"]

    if conditions != dark_conditions:
        raise ValueError(f"The conditions for flat-field run {conditions}) do not match "
                         f"the dark run conditions ({dark_conditions}). Skip flat-field characterization.")

    key_data = flat_dc[source_name][image_key]
-    images_flat = key_data.ndarray()
+    images_flat = key_data.ndarray(roi=frame_slice)
    ntrain, npulse, ny, nx = images_flat.shape

-    print(f"N image: {ntrain * npulse} (ntrain: {ntrain}, npulse: {npulse})")
+    print(f"N image: {ntrain * npulse} (ntrain: {ntrain}, npulse: {npulse}/{key_data.shape[1]})")
    print(f"Image size: {ny} x {nx} px")
    step_timer.done_step("Read flat-field images")

    # process
    step_timer.start()
    flat, components, explained_variance_ratio = dffc.process_flat(
        images_flat, dark, n_components)
    flat_data = np.concatenate([flat[None, ...], components])

    # put results in the dict
    conditions = detector.conditions(flat_dc, meta["module"])
    module_constants = constants.setdefault(meta["db_module"], {})
    module_constants["DynamicFF"] = dict(
        conditions=conditions, data=flat_data, pdu_no=meta["pdu_no"],
        creation_time=flat_creation_time, dims=['component', 'ss', 'fs']
    )
    step_timer.done_step("Process flat-field images")

    # draw plots
    step_timer.start()
    display(Markdown("### Average flat-field"))
    plot_camera_image(flat)
    plt.show()

    display(Markdown("### Explained variance ratio"))
    fig, ax = plt.subplots(1, 1, figsize=(10,4), tight_layout=True)
    ax.semilogy(explained_variance_ratio, 'o')
    ax.set_xticks(np.arange(len(explained_variance_ratio)))
    ax.set_xlabel("Component no.")
    ax.set_ylabel("Variance fraction")
    plt.show()

    display(Markdown("### The first principal components (up to 20)"))
    plot_images(components[:20], figsize=(13, 8))
    plt.show()

    step_timer.done_step("Draw flat-field")
 ```

 %% Cell type:markdown id: tags:

 ## Calibration constants

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 _, proposal, _ = run_prop_seq_from_path(in_folder)

 # Output Folder Creation:
 if local_output:
    os.makedirs(out_folder, exist_ok=True)

 for db_module, module_constants in constants.items():
    for constant_name, constant in module_constants.items():
        conditions = constant["conditions"]
        pdu = pdus["data"][constant["pdu_no"]]

        with NamedTemporaryFile() as tempf:
            ccv_root = write_ccv(
                tempf.name,
                pdu['physical_name'], pdu['uuid'], pdu['detector_type']['name'],
                constant_name, conditions, constant['creation_time'],
                proposal, [dark_run, flat_run],
                constant["data"], constant['dims'])

            if db_output:
-                inject_ccv(tempf.name, ccv_root, metadata_folder)
+                try:
+                    inject_ccv(tempf.name, ccv_root, metadata_folder)
+                except CCVAlreadyInjectedError:
+                    pass

            if local_output:
                ofile = f"{out_folder}/const_{constant_name}_{db_module}.h5"

                if os.path.isfile(ofile):
                    print(f'File {ofile} already exists and will be overwritten')

                copyfile(tempf.name, ofile)
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:markdown id: tags:

 # Characterization of dark and flat field for Dynamic Flat Field correction

 Author: Egor Sobolev

 Computation of dark offsets and flat-field principal components

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202430/p900425/raw"  # input folder, required
 out_folder = '/gpfs/exfel/data/scratch/esobolev/test/shimadzu'  # output folder, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run_high = 1 # run number in which dark data was recorded, required
 run_low = 2 # run number in which flat-field data was recorded, required
 operation_mode = "PCA_DynamicFF"  # Detector operation mode, optional (defaults to "PCA_DynamicFF")

 # Data files parameters.
 karabo_da = ['-1'] # data aggregators
 karabo_id = "SPB_MIC_HPVX2" # karabo prefix of Shimadzu HPV-X2 devices

 # Database access parameters.
 cal_db_interface = "tcp://max-exfl-cal001:8021"  # Unused, calibration DB interface to use
 cal_db_timeout = 30000  # Unused, calibration DB timeout
 db_output = False # if True, the notebook sends dark constants to the calibration database
 local_output = True # if True, the notebook saves dark constants locally
+creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HH:MM:SS.00 e.g. 2019-07-04 11:02:41.00

 # Calibration constants parameters
+frame_range = [4]  # range list [start, end, step] of frame indices within a train to use for characterization.
 n_components = 50  # Number of principal components of flat-field to compute (default: 50)
 ```

 %% Cell type:code id: tags:

 ``` python
 import datetime
 import os
 import warnings
 from logging import warning
 from shutil import copyfile
 from tempfile import NamedTemporaryFile

 warnings.filterwarnings('ignore')

 import time
 import numpy as np
 import matplotlib.pyplot as plt
 from IPython.display import display, Markdown

 from extra_data import RunDirectory

 %matplotlib inline
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
-    get_dir_creation_date,
    run_prop_seq_from_path,
-    save_dict_to_hdf5
+    calcat_creation_time,
 )
 from cal_tools.restful_config import calibration_client, extra_calibration_client
 from cal_tools.shimadzu import ShimadzuHPVX2
-from cal_tools.constants import write_ccv, inject_ccv
+from cal_tools.constants import CCVAlreadyInjectedError, write_ccv, inject_ccv

 import dynflatfield as dffc
 from dynflatfield.draw import plot_images, plot_camera_image
 ```

 %% Cell type:code id: tags:

 ``` python
 extra_calibration_client()  # Configure CalibrationData.

 cc = calibration_client()
-pdus = cc.get_all_phy_det_units_from_detector(
-    {"detector_identifier": karabo_id})  # TODO: Use creation_time for snapshot_at
-
+pdus = cc.get_all_phy_det_units_from_detector({
+    "detector_identifier": karabo_id,
+    "pdu_snapshot_at": calcat_creation_time(
+        in_folder, min(run_low, run_high), creation_time),
+})
 if not pdus["success"]:
    raise ValueError("Failed to retrieve PDUs")

 detector_info = pdus['data'][0]['detector']
 detector = ShimadzuHPVX2(detector_info["source_name_pattern"])

 print(f"Instrument {detector.instrument}")
 print(f"Detector in use is {karabo_id}")

 modules = {}
 for pdu_no, pdu in enumerate(pdus["data"]):
    db_module = pdu["physical_name"]
    module = pdu["module_number"]
    da = pdu["karabo_da"]
    if karabo_da[0] != "-1" and da not in karabo_da:
        continue

    instrument_source_name = detector.instrument_source(module)
    print('-', da, db_module, module, instrument_source_name)

    modules[da] = dict(
        db_module=db_module,
        module=module,
        raw_source_name=instrument_source_name,
        pdu_no=pdu_no,
    )

 constants = {}

+# make a frame slice
+frame_range += [None] * (3 - len(frame_range))
+frame_slice = slice(*frame_range)
+
 step_timer = StepTimer()
 ```

 %% Cell type:markdown id: tags:

 # Offset map

 %% Cell type:code id: tags:

 ``` python
 dark_run = run_high
-dark_creation_time = get_dir_creation_date(in_folder, dark_run)
+dark_creation_time = calcat_creation_time(in_folder, dark_run, creation_time)
 print(f"Using {dark_creation_time} as creation time of Offset constant.")

 for da, meta in modules.items():
    source_name = detector.instrument_source(meta["module"])
    image_key = detector.image_key

    display(Markdown(f"## {source_name}"))

    # read
    step_timer.start()
    file_da, _, _ = da.partition('/')
    dark_dc = RunDirectory(f"{in_folder}/r{dark_run:04d}",
                           include=f"RAW-R{dark_run:04d}-{file_da}-S*.h5")

    if source_name not in dark_dc.all_sources:
        raise ValueError(f"Could not find source {source_name} for module {da} in dark data")

    dark_dc = dark_dc.select([(source_name, image_key)])
    conditions = detector.conditions(dark_dc, meta["module"])

    key_data = dark_dc[source_name, image_key]
-    images_dark = key_data.ndarray()
+    images_dark = key_data.ndarray(roi=frame_slice)
    ntrain, npulse, ny, nx = images_dark.shape

-    print(f"N image: {ntrain * npulse} (ntrain: {ntrain}, npulse: {npulse})")
+    print(f"N image: {ntrain * npulse} (ntrain: {ntrain}, npulse: {npulse}/{key_data.shape[1]})")
    print(f"Image size: {ny} x {nx} px")
    step_timer.done_step("Read dark images")

    # process
    step_timer.start()
    dark = dffc.process_dark(images_dark)  # Amounts to a per-pixel mean right now.

    # put results in the dict
    module_constants = constants.setdefault(meta["db_module"], {})
    module_constants["Offset"] = dict(
        conditions=conditions, data=dark, pdu_no=meta["pdu_no"],
        creation_time=dark_creation_time, dims=['ss', 'fs']
    )
    step_timer.done_step("Process dark images")
    display()

    # draw plots
    step_timer.start()
    plot_camera_image(dark)
    plt.show()
    step_timer.done_step("Draw offsets")
 ```

 %% Cell type:markdown id: tags:

 # Flat-field PCA decomposition

 %% Cell type:code id: tags:

 ``` python
 flat_run = run_low
-flat_creation_time = get_dir_creation_date(in_folder, flat_run)
+flat_creation_time = calcat_creation_time(in_folder, flat_run, creation_time)
 print(f"Using {flat_creation_time} as creation time of DynamicFF constant.")

 for da, meta in modules.items():
    source_name = detector.instrument_source(meta["module"])
    image_key = detector.image_key

    display(Markdown(f"## {source_name}"))

    # read
    step_timer.start()
    file_da, _, _ = da.partition('/')
    flat_dc = RunDirectory(f"{in_folder}/r{flat_run:04d}",
                           include=f"RAW-R{flat_run:04d}-{file_da}-S*.h5")

    if source_name not in flat_dc.all_sources:
        raise ValueError(f"Could not find source {source_name} for module {da} in flatfield data")

    flat_dc = flat_dc.select([(source_name, image_key)])
    conditions = detector.conditions(flat_dc, meta["module"])

    dark = constants[meta["db_module"]]["Offset"]["data"]
    dark_conditions = constants[meta["db_module"]]["Offset"]["conditions"]

    if conditions != dark_conditions:
        raise ValueError(f"The conditions for flat-field run {conditions}) do not match "
                         f"the dark run conditions ({dark_conditions}). Skip flat-field characterization.")

    key_data = flat_dc[source_name][image_key]
-    images_flat = key_data.ndarray()
+    images_flat = key_data.ndarray(roi=frame_slice)
    ntrain, npulse, ny, nx = images_flat.shape

-    print(f"N image: {ntrain * npulse} (ntrain: {ntrain}, npulse: {npulse})")
+    print(f"N image: {ntrain * npulse} (ntrain: {ntrain}, npulse: {npulse}/{key_data.shape[1]})")
    print(f"Image size: {ny} x {nx} px")
    step_timer.done_step("Read flat-field images")

    # process
    step_timer.start()
    flat, components, explained_variance_ratio = dffc.process_flat(
        images_flat, dark, n_components)
    flat_data = np.concatenate([flat[None, ...], components])

    # put results in the dict
    conditions = detector.conditions(flat_dc, meta["module"])
    module_constants = constants.setdefault(meta["db_module"], {})
    module_constants["DynamicFF"] = dict(
        conditions=conditions, data=flat_data, pdu_no=meta["pdu_no"],
        creation_time=flat_creation_time, dims=['component', 'ss', 'fs']
    )
    step_timer.done_step("Process flat-field images")

    # draw plots
    step_timer.start()
    display(Markdown("### Average flat-field"))
    plot_camera_image(flat)
    plt.show()

    display(Markdown("### Explained variance ratio"))
    fig, ax = plt.subplots(1, 1, figsize=(10,4), tight_layout=True)
    ax.semilogy(explained_variance_ratio, 'o')
    ax.set_xticks(np.arange(len(explained_variance_ratio)))
    ax.set_xlabel("Component no.")
    ax.set_ylabel("Variance fraction")
    plt.show()

    display(Markdown("### The first principal components (up to 20)"))
    plot_images(components[:20], figsize=(13, 8))
    plt.show()

    step_timer.done_step("Draw flat-field")
 ```

 %% Cell type:markdown id: tags:

 ## Calibration constants

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 _, proposal, _ = run_prop_seq_from_path(in_folder)

 # Output Folder Creation:
 if local_output:
    os.makedirs(out_folder, exist_ok=True)

 for db_module, module_constants in constants.items():
    for constant_name, constant in module_constants.items():
        conditions = constant["conditions"]
        pdu = pdus["data"][constant["pdu_no"]]

        with NamedTemporaryFile() as tempf:
            ccv_root = write_ccv(
                tempf.name,
                pdu['physical_name'], pdu['uuid'], pdu['detector_type']['name'],
                constant_name, conditions, constant['creation_time'],
                proposal, [dark_run, flat_run],
                constant["data"], constant['dims'])

            if db_output:
-                inject_ccv(tempf.name, ccv_root, metadata_folder)
+                try:
+                    inject_ccv(tempf.name, ccv_root, metadata_folder)
+                except CCVAlreadyInjectedError:
+                    pass

            if local_output:
                ofile = f"{out_folder}/const_{constant_name}_{db_module}.h5"

                if os.path.isfile(ofile):
                    print(f'File {ofile} already exists and will be overwritten')

                copyfile(tempf.name, ofile)
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

--- a/notebooks/DynamicFF/Correct_DynamicFF_NBC.ipynb
+++ b/notebooks/DynamicFF/Correct_DynamicFF_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Dynamic Flat-field Offline Correction

 Author: Egor Sobolev

 Offline dynamic flat-field correction

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202430/p900425/raw"  # input folder, required
 out_folder ="/gpfs/exfel/exp/SPB/202430/p900425/scratch/proc/r0003"  # output folder, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run = 3  # which run to read data from, required

 # Data files parameters.
 karabo_da = ['-1']  # data aggregators
 karabo_id = "SPB_MIC_HPVX2"  # karabo prefix of Shimadzu HPV-X2 devices

 # Database access parameters.
 cal_db_interface = "tcp://max-exfl-cal001:8021"  # Unused, calibration DB interface to use
 cal_db_timeout = 30000  # Unused, calibration DB timeout
+creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HH:MM:SS.00 e.g. 2019-07-04 11:02:41.00

 # Correction parameters
 n_components = 20  # number of principal components of flat-field to use in correction
 downsample_factors = [1, 1]  # list of downsample factors for each image dimention (y, x)

 num_proc = 32  # number of processes running correction in parallel
 ```

 %% Cell type:code id: tags:

 ``` python
 import os
+import sys
 import h5py
 import warnings
 from logging import warning

 warnings.filterwarnings('ignore')

 import numpy as np
 import matplotlib.pyplot as plt
 from IPython.display import display, Markdown
 from datetime import datetime

 from extra_data import RunDirectory, by_id

 %matplotlib inline
 from cal_tools.step_timing import StepTimer
 from cal_tools.files import sequence_trains, DataFile
-from cal_tools.tools import get_dir_creation_date
+from cal_tools.tools import calcat_creation_time

 from cal_tools.restful_config import calibration_client, extra_calibration_client
 from cal_tools.calcat_interface2 import CalibrationData
 from cal_tools.shimadzu import ShimadzuHPVX2

 from dynflatfield import (
    DynamicFlatFieldCorrectionCython as DynamicFlatFieldCorrection,
-    FlatFieldCorrectionFileProcessor
+    FileDynamicFlatFieldProcessor
 )
 from dynflatfield.draw import plot_images, plot_camera_image
 ```

 %% Cell type:code id: tags:

 ``` python
-creation_time = get_dir_creation_date(in_folder, run)
+creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f"Creation time is {creation_time}")

 extra_calibration_client()  # Configure CalibrationData API.

 cc = calibration_client()
-pdus = cc.get_all_phy_det_units_from_detector(
-    {"detector_identifier": karabo_id})  # TODO: Use creation_time for snapshot_at
-
+pdus = cc.get_all_phy_det_units_from_detector({
+    "detector_identifier": karabo_id,
+    "pdu_snapshot_at": creation_time,
+})
 if not pdus["success"]:
    raise ValueError("Failed to retrieve PDUs")

 detector_info = pdus['data'][0]['detector']
 detector = ShimadzuHPVX2(detector_info["source_name_pattern"])
 index_group = detector.image_index_group
 image_key = detector.image_key

 print(f"Instrument {detector.instrument}")
 print(f"Detector in use is {karabo_id}")

 modules = {}
 for pdu in pdus["data"]:
    db_module = pdu["physical_name"]
    module = pdu["module_number"]
    da = pdu["karabo_da"]
    if karabo_da[0] != "-1" and da not in karabo_da:
        continue

    instrument_source_name = detector.instrument_source(module)
    corrected_source_name = detector.corrected_source(module)
    print('-', da, db_module, module, instrument_source_name)

    modules[da] = dict(
        db_module=db_module,
        module=module,
        raw_source_name=instrument_source_name,
        corrected_source_name=corrected_source_name,
    )

 step_timer = StepTimer()
 ```

 %% Cell type:markdown id: tags:

 # Calibration constants

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 dc = RunDirectory(f"{in_folder}/r{run:04d}")
 conditions = detector.conditions(dc)

 caldata = CalibrationData.from_condition(
    conditions, 'SPB_MIC_HPVX2', event_at=creation_time)

 aggregators = {}
 corrections = {}
 for da in modules:
+    file_da, _, _ = da.partition('/')
+    aggregators.setdefault(file_da, []).append(da)
    try:
        dark = caldata["Offset", da].ndarray()
        flat = caldata["DynamicFF", da].ndarray()

        components = flat[1:][:n_components]
        flat = flat[0]

        dffc = DynamicFlatFieldCorrection.from_constants(
            dark, flat, components, downsample_factors)

        corrections[da] = dffc
-
-        file_da, _, _ = da.partition('/')
-        aggregators.setdefault(file_da, []).append(da)
    except (KeyError, FileNotFoundError):
-        warning(f"Constants are not found for module {da}. "
-                "The module will not calibrated")
+        # missed constants are reported later
+        pass

 step_timer.done_step("Load calibration constants")
 ```

 %% Cell type:markdown id: tags:

 # Correction

 %% Cell type:code id: tags:

 ``` python
 # Output Folder Creation:
 os.makedirs(out_folder, exist_ok=True)

-report = []
+missed_constants = 0
+report = {}
 for file_da, file_modules in aggregators.items():
    dc = RunDirectory(f"{in_folder}/r{run:04d}", f"RAW-R{run:04d}-{file_da}-S*.h5")

    # build train IDs
    train_ids = set()
    process_modules = []
    for da in file_modules:
        instrument_source = modules[da]["raw_source_name"]
-        if instrument_source in dc.all_sources:
-            keydata = dc[instrument_source][image_key].drop_empty_trains()
-            train_ids.update(keydata.train_ids)
-            process_modules.append(da)
-        else:
+        if instrument_source not in dc.all_sources:
            print(f"Source {instrument_source} for module {da} is missed")
+            continue
+        if da not in corrections:
+            missed_constants += 1
+            warning(f"Constants are not found for module {da}. "
+                    "The module will not calibrated")
+            continue
+
+        keydata = dc[instrument_source][image_key].drop_empty_trains()
+        train_ids.update(keydata.train_ids)
+        process_modules.append(da)
+
+    if not process_modules:
+        continue

    train_ids = np.array(sorted(train_ids))
    ts = dc.select_trains(by_id[train_ids]).train_timestamps().astype(np.uint64)

    # correct and write sequence files
+    seq_report = {}
    for seq_id, train_mask in sequence_trains(train_ids, 200):
        step_timer.start()
        print('* sequence', seq_id)
        seq_train_ids = train_ids[train_mask]
        seq_timestamps = ts[train_mask]
        dc_seq = dc.select_trains(by_id[seq_train_ids])
        ntrains = len(seq_train_ids)

        # create output file
        channels = [f"{modules[da]['corrected_source_name']}/{index_group}"
                    for da in process_modules]

        f = DataFile.from_details(out_folder, file_da, run, seq_id)
        f.create_metadata(like=dc, instrument_channels=channels)
        f.create_index(seq_train_ids, timestamps=seq_timestamps)

        # create file structure
-        seq_report = {}
        file_datasets = {}
        for da in process_modules:
            instrument_source = modules[da]["raw_source_name"]
            keydata = dc_seq[instrument_source][image_key].drop_empty_trains()
            count = keydata.data_counts(labelled=False)
            i = np.flatnonzero(count)
            raw_images = keydata.select_trains(np.s_[i]).ndarray()

            # not pulse resolved
            shape = keydata.shape
            count = np.in1d(seq_train_ids, keydata.train_ids).astype(int)

            corrected_source = modules[da]["corrected_source_name"]
            src = f.create_instrument_source(corrected_source)
            src.create_index(**{index_group: count})

            # create key for images
            ds_data = src.create_key(image_key, shape=shape, dtype=np.float32)
            module_datasets = {image_key: ds_data}

            # create keys for image parameters
            for key in detector.copy_keys:
                keydata = dc_seq[instrument_source][key].drop_empty_trains()
                module_datasets[key] = (keydata, src.create_key(
                    key, shape=keydata.shape, dtype=keydata.dtype))

            file_datasets[da] = module_datasets

        step_timer.done_step("Create output file")

        # correct and write data to file
        for da in process_modules:
            step_timer.start()
            dc_seq = dc.select_trains(by_id[seq_train_ids])

            dffc = corrections[da]
            instrument_source = modules[da]["raw_source_name"]
-            proc = FlatFieldCorrectionFileProcessor(dffc, num_proc, instrument_source, image_key)
+            proc = FileDynamicFlatFieldProcessor(dffc, num_proc, instrument_source, image_key)

            proc.start_workers()
            proc.run(dc_seq)
            proc.join_workers()

            # not pulse resolved
            corrected_images = np.stack(proc.rdr.results, 0)
            file_datasets[da][image_key][:] = corrected_images

            # copy image parameters
            for key in detector.copy_keys:
                keydata, ds = file_datasets[da][key]
                ds[:] = keydata.ndarray()

-            seq_report[da] = (raw_images[0, 0], corrected_images[:20, 0])
+            rep_cix = np.argmax(np.mean(raw_images[:20], axis=(-1, -2)), axis=1)
+            rep_rix = rep_cix[0]
+            da_report = seq_report.setdefault(da, [])
+            da_report.append((raw_images[0, rep_rix],
+                              corrected_images[range(len(rep_cix)), rep_cix]))
            step_timer.done_step("Correct flat-field")

        f.close()
-        report.append(seq_report)
+    report.update(seq_report)
+
+if not report:
+    if missed_constants:
+        raise ValueError("Calibration constants are not found for any module")
+    else:
+        warning("No data to correct")
+        sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 if report:
-    for da, (raw_image, corrected_images) in report[0].items():
+    for da, da_report in report.items():
+        if len(da_report) > 0:
+            raw_images, corrected_images = zip(*da_report)
+            raw_images = np.stack(raw_images)
+            corrected_images = np.concatenate(corrected_images, axis=0)
+        else:
+            raw_images, corrected_images = da_report
+            raw_images = raw_images[None, ...]
+
        source = modules[da]["raw_source_name"]
        display(Markdown(f"## {source}"))

-        display(Markdown("### The first raw image"))
-        plot_camera_image(raw_images[0, 0])
+        display(Markdown("### The brightest raw image from the first train"))
+        plot_camera_image(raw_images[0])
        plt.show()

-        display(Markdown("### The first corrected image"))
+        display(Markdown("### The brightest corrected image from the first train"))
        plot_camera_image(corrected_images[0])
        plt.show()

        min_images = 5
        if corrected_images.shape[0] < min_images:
            # Update corrected_images to avoid less axes
            # array dimension than expected in plot_images.
            corrected_images = np.concatenate(
                [
                    corrected_images,
                    np.full(
                        (min_images - corrected_images.shape[0], *corrected_images.shape[1:]),
                        fill_value=np.nan)])
-        display(Markdown("### The first corrected images in the trains (up to 20)"))
+        display(Markdown("### The brightest corrected images by one from a train (up to 20 first trains)"))
        plot_images(corrected_images, figsize=(13, 8))
        plt.show()

 step_timer.done_step("Draw images")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:markdown id: tags:

 # Dynamic Flat-field Offline Correction

 Author: Egor Sobolev

 Offline dynamic flat-field correction

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202430/p900425/raw"  # input folder, required
 out_folder ="/gpfs/exfel/exp/SPB/202430/p900425/scratch/proc/r0003"  # output folder, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run = 3  # which run to read data from, required

 # Data files parameters.
 karabo_da = ['-1']  # data aggregators
 karabo_id = "SPB_MIC_HPVX2"  # karabo prefix of Shimadzu HPV-X2 devices

 # Database access parameters.
 cal_db_interface = "tcp://max-exfl-cal001:8021"  # Unused, calibration DB interface to use
 cal_db_timeout = 30000  # Unused, calibration DB timeout
+creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HH:MM:SS.00 e.g. 2019-07-04 11:02:41.00

 # Correction parameters
 n_components = 20  # number of principal components of flat-field to use in correction
 downsample_factors = [1, 1]  # list of downsample factors for each image dimention (y, x)

 num_proc = 32  # number of processes running correction in parallel
 ```

 %% Cell type:code id: tags:

 ``` python
 import os
+import sys
 import h5py
 import warnings
 from logging import warning

 warnings.filterwarnings('ignore')

 import numpy as np
 import matplotlib.pyplot as plt
 from IPython.display import display, Markdown
 from datetime import datetime

 from extra_data import RunDirectory, by_id

 %matplotlib inline
 from cal_tools.step_timing import StepTimer
 from cal_tools.files import sequence_trains, DataFile
-from cal_tools.tools import get_dir_creation_date
+from cal_tools.tools import calcat_creation_time

 from cal_tools.restful_config import calibration_client, extra_calibration_client
 from cal_tools.calcat_interface2 import CalibrationData
 from cal_tools.shimadzu import ShimadzuHPVX2

 from dynflatfield import (
    DynamicFlatFieldCorrectionCython as DynamicFlatFieldCorrection,
-    FlatFieldCorrectionFileProcessor
+    FileDynamicFlatFieldProcessor
 )
 from dynflatfield.draw import plot_images, plot_camera_image
 ```

 %% Cell type:code id: tags:

 ``` python
-creation_time = get_dir_creation_date(in_folder, run)
+creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f"Creation time is {creation_time}")

 extra_calibration_client()  # Configure CalibrationData API.

 cc = calibration_client()
-pdus = cc.get_all_phy_det_units_from_detector(
-    {"detector_identifier": karabo_id})  # TODO: Use creation_time for snapshot_at
-
+pdus = cc.get_all_phy_det_units_from_detector({
+    "detector_identifier": karabo_id,
+    "pdu_snapshot_at": creation_time,
+})
 if not pdus["success"]:
    raise ValueError("Failed to retrieve PDUs")

 detector_info = pdus['data'][0]['detector']
 detector = ShimadzuHPVX2(detector_info["source_name_pattern"])
 index_group = detector.image_index_group
 image_key = detector.image_key

 print(f"Instrument {detector.instrument}")
 print(f"Detector in use is {karabo_id}")

 modules = {}
 for pdu in pdus["data"]:
    db_module = pdu["physical_name"]
    module = pdu["module_number"]
    da = pdu["karabo_da"]
    if karabo_da[0] != "-1" and da not in karabo_da:
        continue

    instrument_source_name = detector.instrument_source(module)
    corrected_source_name = detector.corrected_source(module)
    print('-', da, db_module, module, instrument_source_name)

    modules[da] = dict(
        db_module=db_module,
        module=module,
        raw_source_name=instrument_source_name,
        corrected_source_name=corrected_source_name,
    )

 step_timer = StepTimer()
 ```

 %% Cell type:markdown id: tags:

 # Calibration constants

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 dc = RunDirectory(f"{in_folder}/r{run:04d}")
 conditions = detector.conditions(dc)

 caldata = CalibrationData.from_condition(
    conditions, 'SPB_MIC_HPVX2', event_at=creation_time)

 aggregators = {}
 corrections = {}
 for da in modules:
+    file_da, _, _ = da.partition('/')
+    aggregators.setdefault(file_da, []).append(da)
    try:
        dark = caldata["Offset", da].ndarray()
        flat = caldata["DynamicFF", da].ndarray()

        components = flat[1:][:n_components]
        flat = flat[0]

        dffc = DynamicFlatFieldCorrection.from_constants(
            dark, flat, components, downsample_factors)

        corrections[da] = dffc
-
-        file_da, _, _ = da.partition('/')
-        aggregators.setdefault(file_da, []).append(da)
    except (KeyError, FileNotFoundError):
-        warning(f"Constants are not found for module {da}. "
-                "The module will not calibrated")
+        # missed constants are reported later
+        pass

 step_timer.done_step("Load calibration constants")
 ```

 %% Cell type:markdown id: tags:

 # Correction

 %% Cell type:code id: tags:

 ``` python
 # Output Folder Creation:
 os.makedirs(out_folder, exist_ok=True)

-report = []
+missed_constants = 0
+report = {}
 for file_da, file_modules in aggregators.items():
    dc = RunDirectory(f"{in_folder}/r{run:04d}", f"RAW-R{run:04d}-{file_da}-S*.h5")

    # build train IDs
    train_ids = set()
    process_modules = []
    for da in file_modules:
        instrument_source = modules[da]["raw_source_name"]
-        if instrument_source in dc.all_sources:
-            keydata = dc[instrument_source][image_key].drop_empty_trains()
-            train_ids.update(keydata.train_ids)
-            process_modules.append(da)
-        else:
+        if instrument_source not in dc.all_sources:
            print(f"Source {instrument_source} for module {da} is missed")
+            continue
+        if da not in corrections:
+            missed_constants += 1
+            warning(f"Constants are not found for module {da}. "
+                    "The module will not calibrated")
+            continue
+
+        keydata = dc[instrument_source][image_key].drop_empty_trains()
+        train_ids.update(keydata.train_ids)
+        process_modules.append(da)
+
+    if not process_modules:
+        continue

    train_ids = np.array(sorted(train_ids))
    ts = dc.select_trains(by_id[train_ids]).train_timestamps().astype(np.uint64)

    # correct and write sequence files
+    seq_report = {}
    for seq_id, train_mask in sequence_trains(train_ids, 200):
        step_timer.start()
        print('* sequence', seq_id)
        seq_train_ids = train_ids[train_mask]
        seq_timestamps = ts[train_mask]
        dc_seq = dc.select_trains(by_id[seq_train_ids])
        ntrains = len(seq_train_ids)

        # create output file
        channels = [f"{modules[da]['corrected_source_name']}/{index_group}"
                    for da in process_modules]

        f = DataFile.from_details(out_folder, file_da, run, seq_id)
        f.create_metadata(like=dc, instrument_channels=channels)
        f.create_index(seq_train_ids, timestamps=seq_timestamps)

        # create file structure
-        seq_report = {}
        file_datasets = {}
        for da in process_modules:
            instrument_source = modules[da]["raw_source_name"]
            keydata = dc_seq[instrument_source][image_key].drop_empty_trains()
            count = keydata.data_counts(labelled=False)
            i = np.flatnonzero(count)
            raw_images = keydata.select_trains(np.s_[i]).ndarray()

            # not pulse resolved
            shape = keydata.shape
            count = np.in1d(seq_train_ids, keydata.train_ids).astype(int)

            corrected_source = modules[da]["corrected_source_name"]
            src = f.create_instrument_source(corrected_source)
            src.create_index(**{index_group: count})

            # create key for images
            ds_data = src.create_key(image_key, shape=shape, dtype=np.float32)
            module_datasets = {image_key: ds_data}

            # create keys for image parameters
            for key in detector.copy_keys:
                keydata = dc_seq[instrument_source][key].drop_empty_trains()
                module_datasets[key] = (keydata, src.create_key(
                    key, shape=keydata.shape, dtype=keydata.dtype))

            file_datasets[da] = module_datasets

        step_timer.done_step("Create output file")

        # correct and write data to file
        for da in process_modules:
            step_timer.start()
            dc_seq = dc.select_trains(by_id[seq_train_ids])

            dffc = corrections[da]
            instrument_source = modules[da]["raw_source_name"]
-            proc = FlatFieldCorrectionFileProcessor(dffc, num_proc, instrument_source, image_key)
+            proc = FileDynamicFlatFieldProcessor(dffc, num_proc, instrument_source, image_key)

            proc.start_workers()
            proc.run(dc_seq)
            proc.join_workers()

            # not pulse resolved
            corrected_images = np.stack(proc.rdr.results, 0)
            file_datasets[da][image_key][:] = corrected_images

            # copy image parameters
            for key in detector.copy_keys:
                keydata, ds = file_datasets[da][key]
                ds[:] = keydata.ndarray()

-            seq_report[da] = (raw_images[0, 0], corrected_images[:20, 0])
+            rep_cix = np.argmax(np.mean(raw_images[:20], axis=(-1, -2)), axis=1)
+            rep_rix = rep_cix[0]
+            da_report = seq_report.setdefault(da, [])
+            da_report.append((raw_images[0, rep_rix],
+                              corrected_images[range(len(rep_cix)), rep_cix]))
            step_timer.done_step("Correct flat-field")

        f.close()
-        report.append(seq_report)
+    report.update(seq_report)
+
+if not report:
+    if missed_constants:
+        raise ValueError("Calibration constants are not found for any module")
+    else:
+        warning("No data to correct")
+        sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 if report:
-    for da, (raw_image, corrected_images) in report[0].items():
+    for da, da_report in report.items():
+        if len(da_report) > 0:
+            raw_images, corrected_images = zip(*da_report)
+            raw_images = np.stack(raw_images)
+            corrected_images = np.concatenate(corrected_images, axis=0)
+        else:
+            raw_images, corrected_images = da_report
+            raw_images = raw_images[None, ...]
+
        source = modules[da]["raw_source_name"]
        display(Markdown(f"## {source}"))

-        display(Markdown("### The first raw image"))
-        plot_camera_image(raw_images[0, 0])
+        display(Markdown("### The brightest raw image from the first train"))
+        plot_camera_image(raw_images[0])
        plt.show()

-        display(Markdown("### The first corrected image"))
+        display(Markdown("### The brightest corrected image from the first train"))
        plot_camera_image(corrected_images[0])
        plt.show()

        min_images = 5
        if corrected_images.shape[0] < min_images:
            # Update corrected_images to avoid less axes
            # array dimension than expected in plot_images.
            corrected_images = np.concatenate(
                [
                    corrected_images,
                    np.full(
                        (min_images - corrected_images.shape[0], *corrected_images.shape[1:]),
                        fill_value=np.nan)])
-        display(Markdown("### The first corrected images in the trains (up to 20)"))
+        display(Markdown("### The brightest corrected images by one from a train (up to 20 first trains)"))
        plot_images(corrected_images, figsize=(13, 8))
        plt.show()

 step_timer.done_step("Draw images")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

--- a/notebooks/Gotthard2/Characterize_Darks_Gotthard2_NBC.ipynb
+++ b/notebooks/Gotthard2/Characterize_Darks_Gotthard2_NBC.ipynb
 %% Cell type:markdown id:49b6577f-96a5-4dd2-bdd9-da661b2c4619 tags:

 # Gotthard2 Dark Image Characterization #

 Author: European XFEL Detector Group, Version: 1.0

 The following is a processing for the dark constants (`Offset`, `Noise`, and `BadPixelsDark`) maps using dark images taken with Gotthard2 detector (GH2 50um or 25um).
 All constants are evaluated per strip, per pulse, and per memory cell. The maps are calculated for each gain stage that is acquired in 3 separate runs.

 The three maps are of shape (stripes, cells, gains): (1280, 2, 3). They can be injected to the database (`db_output`) and/or stored locally (`local_output`).

 %% Cell type:code id:818e24e8 tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/FXE/202231/p900298/raw"  # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/gotthard2/darks"  # the folder to output to, required
 metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run_high = 7  # run number for G0 dark run, required
 run_med = 8  # run number for G1 dark run, required
 run_low = 9  # run number for G2 dark run, required

 # Parameters used to access raw data.
 input_source_template = "{karabo_id}/DET/RECEIVER{input_source_affixes}:daqOutput"  # data source template used to read INSTRUMENT keys.
 ctrl_source_template = "{karabo_id_control}/DET/{control_template}"  # template for control source name (filled with karabo_id_control)
 karabo_id = "FXE_XAD_G2XES"  # karabo prefix of Gotthard-II devices
 karabo_da = [""]  # data aggregators
 input_source_affixes = [""]  # The affix to format into the input source template to be able to load the correct module's data.
 control_template = "CONTROL"  # control template used to read CONTROL keys.
 karabo_id_control = ""  # Control karabo ID. Set to empty string to use the karabo-id

 # Parameters for the calibration database.
 cal_db_interface = "tcp://max-exfl-cal001:8020"  # calibration DB interface to use
 cal_db_timeout = 300000  # timeout on caldb requests
 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
 db_output = False  # Output constants to the calibration database
 local_output = True  # Output constants locally

 # Conditions used for injected calibration constants.
 bias_voltage = -1  # Detector bias voltage, set to -1 to use value in raw file.
 exposure_time = -1.  # Detector exposure time, set to -1 to use value in raw file.
 exposure_period = -1.  # Detector exposure period, set to -1 to use value in raw file.
 acquisition_rate = -1.  # Detector acquisition rate (1.1/4.5), set to -1 to use value in raw file.
 single_photon = -1  # Detector single photon mode (High/Low CDS), set to -1 to use value in raw file.
+# This is used for the summary notebook
+sensor_type = ""  # Specifies the GH2 sensor type: '50um', '25um', or "" to automatically decide from RUN data.

 # Parameters used for calculating calibration constants
 bpix_noise_nitrs = 5  # number of maximum iterations to actively try to reach correct number of badpixels of NOISE_OUT_OF_THRESHOLD type. Leave -1 to keep iterating until finding maximum number of badpixels.
 # Parameters used during selecting raw data trains.
 min_trains = 1  # Minimum number of trains that should be available to process dark constants. Default 1.
 max_trains = 1000  # Maximum number of trains to use for processing dark constants. Set to 0 to use all available trains.
 badpixel_threshold_sigma = 5.  # bad pixels defined by values outside n times this std from median

 # Don't delete! myMDC sends this by default.
 operation_mode = ''  # Detector dark run acquiring operation mode, optional
 ```

 %% Cell type:code id:8085f9aa tags:

 ``` python
 import numpy as np
 import matplotlib.pyplot as plt
 import pasha as psh
 from IPython.display import Markdown, display
 from extra_data import RunDirectory
 from pathlib import Path

 import yaml
 from cal_tools.calcat_interface import CalCatApi
 from cal_tools.enums import BadPixels
 from cal_tools.gotthard2 import gotthard2algs, gotthard2lib
 from cal_tools.step_timing import StepTimer
 from cal_tools.restful_config import calibration_client
 from cal_tools.tools import (
    calcat_creation_time,
    get_constant_from_db_and_time,
    get_report,
    raw_data_location_string,
    save_const_to_h5,
    send_to_db,
 )

 from iCalibrationDB import Conditions, Constants

 %matplotlib inline
 ```

+%% Cell type:code id:bc4a9824 tags:
+
+``` python
+# Input parameters validation
+gotthard2lib.validate_sensor_type(sensor_type)
+```
+
 %% Cell type:code id:18fe4379 tags:

 ``` python
 run_nums = [run_high, run_med, run_low]
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 out_folder.mkdir(parents=True, exist_ok=True)

 if not karabo_id_control:
    karabo_id_control = karabo_id

 ctrl_src = ctrl_source_template.format(
    karabo_id_control=karabo_id_control,
    control_template=control_template,
 )
 run_nums = gotthard2lib.sort_dark_runs_by_gain(
    raw_folder=in_folder,
    runs=run_nums,
    ctrl_src=ctrl_src,
 )

 # Read report path to associate it later with injected constants.
 report = get_report(metadata_folder)

 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, run_nums[0], creation_time)
 print(f"Creation time: {creation_time}")
 ```

 %% Cell type:code id:c176a86f tags:

 ``` python
 run_dc = RunDirectory(in_folder / f"r{run_nums[0]:04d}")
 proposal = list(filter(None, str(in_folder).strip('/').split('/')))[-2]
 file_loc = raw_data_location_string(proposal, run_nums)
 ```

 %% Cell type:code id:108be688 tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id:ff9149fc tags:

 ``` python
 # Read parameter conditions and validate the values for the three runs.

 step_timer.start()
 run_dcs_dict = dict()

 conditions = {
    "bias_voltage": set(),
    "exposure_time": set(),
    "exposure_period": set(),
    "acquisition_rate": set(),
    "single_photon": set(),
 }
 for gain, run in enumerate(run_nums):
    run_dc = RunDirectory(in_folder / f"r{run:04d}/")
    run_dcs_dict[run] = [gain, run_dc]

    # Read slow data.
    g2ctrl = gotthard2lib.Gotthard2Ctrl(run_dc=run_dc, ctrl_src=ctrl_src)
    conditions["bias_voltage"].add(
        g2ctrl.get_bias_voltage() if bias_voltage == -1 else bias_voltage
    )
    conditions["exposure_time"].add(
        g2ctrl.get_exposure_time() if exposure_time == -1 else exposure_time
    )
    conditions["exposure_period"].add(
        g2ctrl.get_exposure_period() if exposure_period == -1 else exposure_period
    )
    conditions["single_photon"].add(
        g2ctrl.get_single_photon() if single_photon == -1 else single_photon
    )
    conditions["acquisition_rate"].add(
        g2ctrl.get_acquisition_rate() if acquisition_rate == -1 else acquisition_rate
    )

 for c, v in conditions.items():
    assert len(v) == 1, f"{c} value is not the same for the three runs."

 bias_voltage = conditions["bias_voltage"].pop()
 print("Bias voltage: ", bias_voltage)
 exposure_time = conditions["exposure_time"].pop()
 print("Exposure time: ", exposure_time)
 exposure_period = conditions["exposure_period"].pop()
 print("Exposure period: ", exposure_period)
 single_photon = conditions["single_photon"].pop()
 print("Single photon: ", single_photon)
 acquisition_rate = conditions["acquisition_rate"].pop()
 print("Acquisition rate: ", acquisition_rate)
-
-gh2_detector = g2ctrl.get_det_type()
-print(f"Processing {gh2_detector} Gotthard2.")
+if not sensor_type:
+    sensor_type = g2ctrl.get_sensor_type()
+print(f"Processing {sensor_type} Gotthard2.")
 ```

 %% Cell type:code id:f64bc150-cfcd-4f98-83f9-a982fdacedd7 tags:

 ``` python
 calcat = CalCatApi(client=calibration_client())
 detector_id = calcat.detector(karabo_id)['id']
 pdus_by_da = calcat.physical_detector_units(detector_id, pdu_snapshot_at=creation_time)
 # Module index is stored in the da_to_pdu dict to be used
 # later in selecting the correct data source.
 # TODO: This can be improved later by getting the module index and even data source
 # when we start storing in the CALCAT.
 da_to_pdu = dict()
 for i, (da, p) in enumerate(sorted(pdus_by_da.items())):
    da_to_pdu[da] = p['physical_name']

 calcat_das = list(da_to_pdu.keys())

 if karabo_da != [""]:
    # Filter DA connected to detector in CALCAT
    karabo_da = [da for da in karabo_da if da in calcat_das]
 else:
    karabo_da = calcat_das

 print(f"Processing {karabo_da}")

 da_to_source = gotthard2lib.map_da_to_source(
    run_dc,
    calcat_das,
    input_source_template,
    karabo_id,
    input_source_affixes,
 )
 ```

 %% Cell type:code id:ac9c5dc3-bc66-4e7e-b6a1-360259be535c tags:

 ``` python
 def specify_trains_to_process(
    img_key_data: "extra_data.KeyData",
    run_num: int,
    src: str,
 ):
    """Specify total number of trains to process.
    Based on given min_trains and max_trains, if given.

    Print number of trains to process and number of empty trains.
    Raise ValueError if specified trains are less than min_trains.
    """
    # Specifies total number of trains to process.
    n_trains = img_key_data.shape[0]
    all_trains = len(img_key_data.train_ids)
    print(
        f"{src} for run {run} has {all_trains - n_trains} "
        f"trains with empty frames out of {all_trains} trains"
    )

    if n_trains < min_trains:
        raise ValueError(
            f"Less than {min_trains} trains are available in RAW data."
            " Not enough data to process darks."
        )

    if max_trains > 0:
        n_trains = min(n_trains, max_trains)

    print(f"Processing {n_trains} trains.")

    return n_trains
 ```

 %% Cell type:code id:3c59c11d tags:

 ``` python
 # set the operating condition
 condition = Conditions.Dark.Gotthard2(
    bias_voltage=bias_voltage,
    exposure_time=exposure_time,
    exposure_period=exposure_period,
    acquisition_rate=acquisition_rate,
    single_photon=single_photon,
 )
 ```

 %% Cell type:code id:e2eb2fc0-df9c-4887-9691-f81474f8c131 tags:

 ``` python
 def convert_train(wid, index, tid, d):
    """Convert a Gotthard2 train from 12bit to 10bit."""
    gotthard2algs.convert_to_10bit(
        d[src]["data.adc"], lut, data_10bit[index, ...]
    )
 ```

 %% Cell type:code id:4e8ffeae tags:

 ``` python
 # Calculate noise and offset per pixel and global average, std and median
 noise_map = dict()
 offset_map = dict()
 badpixels_map = dict()
 context = psh.ProcessContext(num_workers=25)

 empty_lut = (np.arange(2 ** 12).astype(np.float64) * 2 ** 10 / 2 ** 12).astype(
    np.uint16
 )
 empty_lut = np.stack(1280 * [np.stack([empty_lut] * 2)], axis=0)
 for mod in karabo_da:
    # For 50um idx always 0. For 25um pick the right data source
    # from list of 2 sources.
    src = da_to_source[mod]

    # Retrieve LUT constant
    lut, time = get_constant_from_db_and_time(
        constant=Constants.Gotthard2.LUT(),
        condition=condition,
        empty_constant=empty_lut,
        karabo_id=karabo_id,
        karabo_da=mod,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time,
        timeout=cal_db_timeout,
        print_once=False,
    )
    print(f"Retrieved LUT constant with creation-time {time}")

    cshape = (1280, 2, 3)

    offset_map[mod] = context.alloc(shape=cshape, dtype=np.float32)
    noise_map[mod] = context.alloc(like=offset_map[mod])
    badpixels_map[mod] = context.alloc(like=offset_map[mod], dtype=np.uint32)

    for run_num, [gain, run_dc] in run_dcs_dict.items():
        step_timer.start()
        n_trains = specify_trains_to_process(run_dc[src, "data.adc"], run_num, src)

        # Select requested number of trains to process.
        dc = run_dc.select(src, require_all=True).select_trains(
            np.s_[:n_trains]
        )

        step_timer.done_step("preparing raw data")

        step_timer.start()
        # Convert 12bit data to 10bit
        data_10bit = context.alloc(
            shape=dc[src, "data.adc"].shape, dtype=np.float32
        )
        context.map(convert_train, dc)
        step_timer.done_step("convert to 10bit")

        step_timer.start()

        # The first ~20 frames of each train are excluded.
        # The electronics needs some time to reach stable operational conditions
        # at the beginning of each acquisition cycle,
        # hence the first ~20 images have lower quality and should not be used.
        # The rough number of 20 is correct at 4.5 MHz acquisition rate,
        # 5 should be enough at 1.1 MHz. Sticking with 20 excluded frames, as there isn't
        # much of expected difference.

        # Split even and odd data to calculate the two storage cell constants.
        # Detector always operates in burst mode.
        even_data = data_10bit[:, 20::2, :]
        odd_data = data_10bit[:, 21::2, :]

        def offset_noise_cell(wid, index, d):
            offset_map[mod][:, index, gain] = np.mean(d, axis=(0, 1))
            noise_map[mod][:, index, gain] = np.std(d, axis=(0, 1))

        # Calculate Offset and Noise Maps.
        context.map(offset_noise_cell, (even_data, odd_data))

        # Split even and odd gain data.
        data_gain = dc[src, "data.gain"].ndarray()
        even_gain = data_gain[:, 20::2, :]
        odd_gain = data_gain[:, 21::2, :]
        raw_g = 3 if gain == 2 else gain

        def badpixels_cell(wid, index, g):
            """Check if there are wrong bad gain values.
            Indicate pixels with wrong gain value across all trains for each cell."""
            badpixels_map[mod][
                np.mean(g, axis=(0, 1)) != raw_g, index, :
            ] |= BadPixels.WRONG_GAIN_VALUE.value

        context.map(badpixels_cell, (even_gain, odd_gain))

        step_timer.done_step("Processing darks")
 ```

 %% Cell type:code id:8e410ca2 tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id:3fc17e05-17ab-4ac4-9e79-c95399278bb9 tags:

 ``` python
 def print_bp_entry(bp):
    print(f"{bp.name:<30s} {bp.value:032b} -> {int(bp.value)}")


 print_bp_entry(BadPixels.NOISE_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.OFFSET_NOISE_EVAL_ERROR)
 print_bp_entry(BadPixels.WRONG_GAIN_VALUE)


 def eval_bpidx(d):
    mdn = np.nanmedian(d, axis=(0))[None, :, :]
    std = np.nanstd(d, axis=(0))[None, :, :]
    idx = (d > badpixel_threshold_sigma * std + mdn) | (
        d < (-badpixel_threshold_sigma) * std + mdn
    )
    return idx
 ```

 %% Cell type:code id:40c34cc5-fe93-4b83-bf39-f465f37c40b4 tags:

 ``` python
 step_timer.start()
 g_name = ["G0", "G1", "G2"]

 for mod in karabo_da:
    pdu = da_to_pdu[mod]
    display(Markdown(f"### Badpixels for module {mod}:"))

    badpixels_map[mod][
        ~np.isfinite(offset_map[mod])
    ] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    # Iteratively identify noisy pixels (OFFSET_NOISE_EVAL_ERROR & NOISE_OUT_OF_THRESHOLD)
    # Due to the low number of strips (1280), outliers can significantly impact
    # the badpixel calculation. We use multiple iterations to gradually refine
    # the sigma threshold, as a single fixed threshold may not be suitable for
    # different Gotthard2 detectors.
    last_nbpix = 0
    nmap = noise_map[mod].copy()
    for itr in range(bpix_noise_nitrs):
        badpixels_map[mod][
            ~np.isfinite(nmap)
        ] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value
        badpixels_map[mod][
            eval_bpidx(nmap)
        ] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
        nmap[badpixels_map[mod] > 0] = np.nan
        recent_nbpix = np.count_nonzero(badpixels_map[mod])

        if last_nbpix == recent_nbpix:
            # stop iterating if no more badpixels difference
            break

    if not local_output:
        for cell in [0, 1]:
            fig, ax = plt.subplots(figsize=(10, 5))
            for g_idx in [0, 1, 2]:
                ax.plot(badpixels_map[mod][:, cell, g_idx], label=f"G{g_idx} Bad pixel map")
            ax.set_xticks(np.arange(0, 1281, 80))
            ax.set_xlabel("Stripes #")
            ax.set_xlabel("BadPixels")
            ax.set_title(f"BadPixels map - Cell {cell} - Module {mod} ({pdu})")
            ax.set_ylim([0, 5])
            ax.legend()
            plt.show()
 step_timer.done_step(f"Creating bad pixels constant.")
 ```

 %% Cell type:code id:c8777cfe tags:

 ``` python
 if not local_output:
    for cons, cname in zip([offset_map, noise_map], ["Offset", "Noise"]):
        for mod in karabo_da:
            pdu = da_to_pdu[mod]
            display(Markdown(f"### {cname} for module {mod}:"))

            for cell in [0, 1]:
                fig, ax = plt.subplots(figsize=(10, 5))
                for g_idx in [0, 1, 2]:
                    ax.plot(cons[mod][:, cell, g_idx], label=f"G{g_idx} {cname} map")

                ax.set_xlabel("Stripes #")
                ax.set_xlabel(cname)
                ax.set_title(f"{cname} map - Cell {cell} - Module {mod} ({pdu})")
                ax.legend()
                plt.show()
 ```

 %% Cell type:code id:1c4eddf7-7d6e-49f4-8cbb-12d2bc496a8f tags:

 ``` python
 step_timer.start()
 for mod in karabo_da:
    db_mod = da_to_pdu[mod]
    constants = {
        "Offset": offset_map[mod],
        "Noise": noise_map[mod],
        "BadPixelsDark": badpixels_map[mod],
    }

    md = None

    for key, const_data in constants.items():

        const = getattr(Constants.Gotthard2, key)()
        const.data = const_data

        if db_output:
            md = send_to_db(
                db_module=db_mod,
                karabo_id=karabo_id,
                constant=const,
                condition=condition,
                file_loc=file_loc,
                report_path=report,
                cal_db_interface=cal_db_interface,
                creation_time=creation_time,
                timeout=cal_db_timeout,
            )
        if local_output:
            md = save_const_to_h5(
                db_module=db_mod,
                karabo_id=karabo_id,
                constant=const,
                condition=condition,
                data=const.data,
                file_loc=file_loc,
                report=report,
                creation_time=creation_time,
                out_folder=out_folder,
            )
            print(f"Calibration constant {key} is stored locally at {out_folder}.\n")

 print("Constants parameter conditions are:\n")
 print(
    f"• Bias voltage: {bias_voltage}\n"
    f"• Exposure time: {exposure_time}\n"
    f"• Exposure period: {exposure_period}\n"
    f"• Acquisition rate: {acquisition_rate}\n"
    f"• Single photon: {single_photon}\n"
    f"• Creation time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n"
 )
 step_timer.done_step("Injecting constants.")
 ```

 %% Cell type:code id:98ca9486 tags:

 ``` python
 # TODO: store old constants for comparison.
 for mod in karabo_da:
    mod_file = mod.replace("/", "-")
    with open(f"{metadata_folder or out_folder}/module_metadata_{mod_file}.yml", "w") as fd:
        yaml.safe_dump(
            {
                "module": mod,
                "pdu": da_to_pdu[mod],
            },
            fd,
        )
 ```

 %% Cell type:markdown id:49b6577f-96a5-4dd2-bdd9-da661b2c4619 tags:

 # Gotthard2 Dark Image Characterization #

 Author: European XFEL Detector Group, Version: 1.0

 The following is a processing for the dark constants (`Offset`, `Noise`, and `BadPixelsDark`) maps using dark images taken with Gotthard2 detector (GH2 50um or 25um).
 All constants are evaluated per strip, per pulse, and per memory cell. The maps are calculated for each gain stage that is acquired in 3 separate runs.

 The three maps are of shape (stripes, cells, gains): (1280, 2, 3). They can be injected to the database (`db_output`) and/or stored locally (`local_output`).

 %% Cell type:code id:818e24e8 tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/FXE/202231/p900298/raw"  # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/gotthard2/darks"  # the folder to output to, required
 metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run_high = 7  # run number for G0 dark run, required
 run_med = 8  # run number for G1 dark run, required
 run_low = 9  # run number for G2 dark run, required

 # Parameters used to access raw data.
 input_source_template = "{karabo_id}/DET/RECEIVER{input_source_affixes}:daqOutput"  # data source template used to read INSTRUMENT keys.
 ctrl_source_template = "{karabo_id_control}/DET/{control_template}"  # template for control source name (filled with karabo_id_control)
 karabo_id = "FXE_XAD_G2XES"  # karabo prefix of Gotthard-II devices
 karabo_da = [""]  # data aggregators
 input_source_affixes = [""]  # The affix to format into the input source template to be able to load the correct module's data.
 control_template = "CONTROL"  # control template used to read CONTROL keys.
 karabo_id_control = ""  # Control karabo ID. Set to empty string to use the karabo-id

 # Parameters for the calibration database.
 cal_db_interface = "tcp://max-exfl-cal001:8020"  # calibration DB interface to use
 cal_db_timeout = 300000  # timeout on caldb requests
 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
 db_output = False  # Output constants to the calibration database
 local_output = True  # Output constants locally

 # Conditions used for injected calibration constants.
 bias_voltage = -1  # Detector bias voltage, set to -1 to use value in raw file.
 exposure_time = -1.  # Detector exposure time, set to -1 to use value in raw file.
 exposure_period = -1.  # Detector exposure period, set to -1 to use value in raw file.
 acquisition_rate = -1.  # Detector acquisition rate (1.1/4.5), set to -1 to use value in raw file.
 single_photon = -1  # Detector single photon mode (High/Low CDS), set to -1 to use value in raw file.
+# This is used for the summary notebook
+sensor_type = ""  # Specifies the GH2 sensor type: '50um', '25um', or "" to automatically decide from RUN data.

 # Parameters used for calculating calibration constants
 bpix_noise_nitrs = 5  # number of maximum iterations to actively try to reach correct number of badpixels of NOISE_OUT_OF_THRESHOLD type. Leave -1 to keep iterating until finding maximum number of badpixels.
 # Parameters used during selecting raw data trains.
 min_trains = 1  # Minimum number of trains that should be available to process dark constants. Default 1.
 max_trains = 1000  # Maximum number of trains to use for processing dark constants. Set to 0 to use all available trains.
 badpixel_threshold_sigma = 5.  # bad pixels defined by values outside n times this std from median

 # Don't delete! myMDC sends this by default.
 operation_mode = ''  # Detector dark run acquiring operation mode, optional
 ```

 %% Cell type:code id:8085f9aa tags:

 ``` python
 import numpy as np
 import matplotlib.pyplot as plt
 import pasha as psh
 from IPython.display import Markdown, display
 from extra_data import RunDirectory
 from pathlib import Path

 import yaml
 from cal_tools.calcat_interface import CalCatApi
 from cal_tools.enums import BadPixels
 from cal_tools.gotthard2 import gotthard2algs, gotthard2lib
 from cal_tools.step_timing import StepTimer
 from cal_tools.restful_config import calibration_client
 from cal_tools.tools import (
    calcat_creation_time,
    get_constant_from_db_and_time,
    get_report,
    raw_data_location_string,
    save_const_to_h5,
    send_to_db,
 )

 from iCalibrationDB import Conditions, Constants

 %matplotlib inline
 ```

+%% Cell type:code id:bc4a9824 tags:
+
+``` python
+# Input parameters validation
+gotthard2lib.validate_sensor_type(sensor_type)
+```
+
 %% Cell type:code id:18fe4379 tags:

 ``` python
 run_nums = [run_high, run_med, run_low]
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 out_folder.mkdir(parents=True, exist_ok=True)

 if not karabo_id_control:
    karabo_id_control = karabo_id

 ctrl_src = ctrl_source_template.format(
    karabo_id_control=karabo_id_control,
    control_template=control_template,
 )
 run_nums = gotthard2lib.sort_dark_runs_by_gain(
    raw_folder=in_folder,
    runs=run_nums,
    ctrl_src=ctrl_src,
 )

 # Read report path to associate it later with injected constants.
 report = get_report(metadata_folder)

 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, run_nums[0], creation_time)
 print(f"Creation time: {creation_time}")
 ```

 %% Cell type:code id:c176a86f tags:

 ``` python
 run_dc = RunDirectory(in_folder / f"r{run_nums[0]:04d}")
 proposal = list(filter(None, str(in_folder).strip('/').split('/')))[-2]
 file_loc = raw_data_location_string(proposal, run_nums)
 ```

 %% Cell type:code id:108be688 tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id:ff9149fc tags:

 ``` python
 # Read parameter conditions and validate the values for the three runs.

 step_timer.start()
 run_dcs_dict = dict()

 conditions = {
    "bias_voltage": set(),
    "exposure_time": set(),
    "exposure_period": set(),
    "acquisition_rate": set(),
    "single_photon": set(),
 }
 for gain, run in enumerate(run_nums):
    run_dc = RunDirectory(in_folder / f"r{run:04d}/")
    run_dcs_dict[run] = [gain, run_dc]

    # Read slow data.
    g2ctrl = gotthard2lib.Gotthard2Ctrl(run_dc=run_dc, ctrl_src=ctrl_src)
    conditions["bias_voltage"].add(
        g2ctrl.get_bias_voltage() if bias_voltage == -1 else bias_voltage
    )
    conditions["exposure_time"].add(
        g2ctrl.get_exposure_time() if exposure_time == -1 else exposure_time
    )
    conditions["exposure_period"].add(
        g2ctrl.get_exposure_period() if exposure_period == -1 else exposure_period
    )
    conditions["single_photon"].add(
        g2ctrl.get_single_photon() if single_photon == -1 else single_photon
    )
    conditions["acquisition_rate"].add(
        g2ctrl.get_acquisition_rate() if acquisition_rate == -1 else acquisition_rate
    )

 for c, v in conditions.items():
    assert len(v) == 1, f"{c} value is not the same for the three runs."

 bias_voltage = conditions["bias_voltage"].pop()
 print("Bias voltage: ", bias_voltage)
 exposure_time = conditions["exposure_time"].pop()
 print("Exposure time: ", exposure_time)
 exposure_period = conditions["exposure_period"].pop()
 print("Exposure period: ", exposure_period)
 single_photon = conditions["single_photon"].pop()
 print("Single photon: ", single_photon)
 acquisition_rate = conditions["acquisition_rate"].pop()
 print("Acquisition rate: ", acquisition_rate)
-
-gh2_detector = g2ctrl.get_det_type()
-print(f"Processing {gh2_detector} Gotthard2.")
+if not sensor_type:
+    sensor_type = g2ctrl.get_sensor_type()
+print(f"Processing {sensor_type} Gotthard2.")
 ```

 %% Cell type:code id:f64bc150-cfcd-4f98-83f9-a982fdacedd7 tags:

 ``` python
 calcat = CalCatApi(client=calibration_client())
 detector_id = calcat.detector(karabo_id)['id']
 pdus_by_da = calcat.physical_detector_units(detector_id, pdu_snapshot_at=creation_time)
 # Module index is stored in the da_to_pdu dict to be used
 # later in selecting the correct data source.
 # TODO: This can be improved later by getting the module index and even data source
 # when we start storing in the CALCAT.
 da_to_pdu = dict()
 for i, (da, p) in enumerate(sorted(pdus_by_da.items())):
    da_to_pdu[da] = p['physical_name']

 calcat_das = list(da_to_pdu.keys())

 if karabo_da != [""]:
    # Filter DA connected to detector in CALCAT
    karabo_da = [da for da in karabo_da if da in calcat_das]
 else:
    karabo_da = calcat_das

 print(f"Processing {karabo_da}")

 da_to_source = gotthard2lib.map_da_to_source(
    run_dc,
    calcat_das,
    input_source_template,
    karabo_id,
    input_source_affixes,
 )
 ```

 %% Cell type:code id:ac9c5dc3-bc66-4e7e-b6a1-360259be535c tags:

 ``` python
 def specify_trains_to_process(
    img_key_data: "extra_data.KeyData",
    run_num: int,
    src: str,
 ):
    """Specify total number of trains to process.
    Based on given min_trains and max_trains, if given.

    Print number of trains to process and number of empty trains.
    Raise ValueError if specified trains are less than min_trains.
    """
    # Specifies total number of trains to process.
    n_trains = img_key_data.shape[0]
    all_trains = len(img_key_data.train_ids)
    print(
        f"{src} for run {run} has {all_trains - n_trains} "
        f"trains with empty frames out of {all_trains} trains"
    )

    if n_trains < min_trains:
        raise ValueError(
            f"Less than {min_trains} trains are available in RAW data."
            " Not enough data to process darks."
        )

    if max_trains > 0:
        n_trains = min(n_trains, max_trains)

    print(f"Processing {n_trains} trains.")

    return n_trains
 ```

 %% Cell type:code id:3c59c11d tags:

 ``` python
 # set the operating condition
 condition = Conditions.Dark.Gotthard2(
    bias_voltage=bias_voltage,
    exposure_time=exposure_time,
    exposure_period=exposure_period,
    acquisition_rate=acquisition_rate,
    single_photon=single_photon,
 )
 ```

 %% Cell type:code id:e2eb2fc0-df9c-4887-9691-f81474f8c131 tags:

 ``` python
 def convert_train(wid, index, tid, d):
    """Convert a Gotthard2 train from 12bit to 10bit."""
    gotthard2algs.convert_to_10bit(
        d[src]["data.adc"], lut, data_10bit[index, ...]
    )
 ```

 %% Cell type:code id:4e8ffeae tags:

 ``` python
 # Calculate noise and offset per pixel and global average, std and median
 noise_map = dict()
 offset_map = dict()
 badpixels_map = dict()
 context = psh.ProcessContext(num_workers=25)

 empty_lut = (np.arange(2 ** 12).astype(np.float64) * 2 ** 10 / 2 ** 12).astype(
    np.uint16
 )
 empty_lut = np.stack(1280 * [np.stack([empty_lut] * 2)], axis=0)
 for mod in karabo_da:
    # For 50um idx always 0. For 25um pick the right data source
    # from list of 2 sources.
    src = da_to_source[mod]

    # Retrieve LUT constant
    lut, time = get_constant_from_db_and_time(
        constant=Constants.Gotthard2.LUT(),
        condition=condition,
        empty_constant=empty_lut,
        karabo_id=karabo_id,
        karabo_da=mod,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time,
        timeout=cal_db_timeout,
        print_once=False,
    )
    print(f"Retrieved LUT constant with creation-time {time}")

    cshape = (1280, 2, 3)

    offset_map[mod] = context.alloc(shape=cshape, dtype=np.float32)
    noise_map[mod] = context.alloc(like=offset_map[mod])
    badpixels_map[mod] = context.alloc(like=offset_map[mod], dtype=np.uint32)

    for run_num, [gain, run_dc] in run_dcs_dict.items():
        step_timer.start()
        n_trains = specify_trains_to_process(run_dc[src, "data.adc"], run_num, src)

        # Select requested number of trains to process.
        dc = run_dc.select(src, require_all=True).select_trains(
            np.s_[:n_trains]
        )

        step_timer.done_step("preparing raw data")

        step_timer.start()
        # Convert 12bit data to 10bit
        data_10bit = context.alloc(
            shape=dc[src, "data.adc"].shape, dtype=np.float32
        )
        context.map(convert_train, dc)
        step_timer.done_step("convert to 10bit")

        step_timer.start()

        # The first ~20 frames of each train are excluded.
        # The electronics needs some time to reach stable operational conditions
        # at the beginning of each acquisition cycle,
        # hence the first ~20 images have lower quality and should not be used.
        # The rough number of 20 is correct at 4.5 MHz acquisition rate,
        # 5 should be enough at 1.1 MHz. Sticking with 20 excluded frames, as there isn't
        # much of expected difference.

        # Split even and odd data to calculate the two storage cell constants.
        # Detector always operates in burst mode.
        even_data = data_10bit[:, 20::2, :]
        odd_data = data_10bit[:, 21::2, :]

        def offset_noise_cell(wid, index, d):
            offset_map[mod][:, index, gain] = np.mean(d, axis=(0, 1))
            noise_map[mod][:, index, gain] = np.std(d, axis=(0, 1))

        # Calculate Offset and Noise Maps.
        context.map(offset_noise_cell, (even_data, odd_data))

        # Split even and odd gain data.
        data_gain = dc[src, "data.gain"].ndarray()
        even_gain = data_gain[:, 20::2, :]
        odd_gain = data_gain[:, 21::2, :]
        raw_g = 3 if gain == 2 else gain

        def badpixels_cell(wid, index, g):
            """Check if there are wrong bad gain values.
            Indicate pixels with wrong gain value across all trains for each cell."""
            badpixels_map[mod][
                np.mean(g, axis=(0, 1)) != raw_g, index, :
            ] |= BadPixels.WRONG_GAIN_VALUE.value

        context.map(badpixels_cell, (even_gain, odd_gain))

        step_timer.done_step("Processing darks")
 ```

 %% Cell type:code id:8e410ca2 tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id:3fc17e05-17ab-4ac4-9e79-c95399278bb9 tags:

 ``` python
 def print_bp_entry(bp):
    print(f"{bp.name:<30s} {bp.value:032b} -> {int(bp.value)}")


 print_bp_entry(BadPixels.NOISE_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.OFFSET_NOISE_EVAL_ERROR)
 print_bp_entry(BadPixels.WRONG_GAIN_VALUE)


 def eval_bpidx(d):
    mdn = np.nanmedian(d, axis=(0))[None, :, :]
    std = np.nanstd(d, axis=(0))[None, :, :]
    idx = (d > badpixel_threshold_sigma * std + mdn) | (
        d < (-badpixel_threshold_sigma) * std + mdn
    )
    return idx
 ```

 %% Cell type:code id:40c34cc5-fe93-4b83-bf39-f465f37c40b4 tags:

 ``` python
 step_timer.start()
 g_name = ["G0", "G1", "G2"]

 for mod in karabo_da:
    pdu = da_to_pdu[mod]
    display(Markdown(f"### Badpixels for module {mod}:"))

    badpixels_map[mod][
        ~np.isfinite(offset_map[mod])
    ] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    # Iteratively identify noisy pixels (OFFSET_NOISE_EVAL_ERROR & NOISE_OUT_OF_THRESHOLD)
    # Due to the low number of strips (1280), outliers can significantly impact
    # the badpixel calculation. We use multiple iterations to gradually refine
    # the sigma threshold, as a single fixed threshold may not be suitable for
    # different Gotthard2 detectors.
    last_nbpix = 0
    nmap = noise_map[mod].copy()
    for itr in range(bpix_noise_nitrs):
        badpixels_map[mod][
            ~np.isfinite(nmap)
        ] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value
        badpixels_map[mod][
            eval_bpidx(nmap)
        ] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
        nmap[badpixels_map[mod] > 0] = np.nan
        recent_nbpix = np.count_nonzero(badpixels_map[mod])

        if last_nbpix == recent_nbpix:
            # stop iterating if no more badpixels difference
            break

    if not local_output:
        for cell in [0, 1]:
            fig, ax = plt.subplots(figsize=(10, 5))
            for g_idx in [0, 1, 2]:
                ax.plot(badpixels_map[mod][:, cell, g_idx], label=f"G{g_idx} Bad pixel map")
            ax.set_xticks(np.arange(0, 1281, 80))
            ax.set_xlabel("Stripes #")
            ax.set_xlabel("BadPixels")
            ax.set_title(f"BadPixels map - Cell {cell} - Module {mod} ({pdu})")
            ax.set_ylim([0, 5])
            ax.legend()
            plt.show()
 step_timer.done_step(f"Creating bad pixels constant.")
 ```

 %% Cell type:code id:c8777cfe tags:

 ``` python
 if not local_output:
    for cons, cname in zip([offset_map, noise_map], ["Offset", "Noise"]):
        for mod in karabo_da:
            pdu = da_to_pdu[mod]
            display(Markdown(f"### {cname} for module {mod}:"))

            for cell in [0, 1]:
                fig, ax = plt.subplots(figsize=(10, 5))
                for g_idx in [0, 1, 2]:
                    ax.plot(cons[mod][:, cell, g_idx], label=f"G{g_idx} {cname} map")

                ax.set_xlabel("Stripes #")
                ax.set_xlabel(cname)
                ax.set_title(f"{cname} map - Cell {cell} - Module {mod} ({pdu})")
                ax.legend()
                plt.show()
 ```

 %% Cell type:code id:1c4eddf7-7d6e-49f4-8cbb-12d2bc496a8f tags:

 ``` python
 step_timer.start()
 for mod in karabo_da:
    db_mod = da_to_pdu[mod]
    constants = {
        "Offset": offset_map[mod],
        "Noise": noise_map[mod],
        "BadPixelsDark": badpixels_map[mod],
    }

    md = None

    for key, const_data in constants.items():

        const = getattr(Constants.Gotthard2, key)()
        const.data = const_data

        if db_output:
            md = send_to_db(
                db_module=db_mod,
                karabo_id=karabo_id,
                constant=const,
                condition=condition,
                file_loc=file_loc,
                report_path=report,
                cal_db_interface=cal_db_interface,
                creation_time=creation_time,
                timeout=cal_db_timeout,
            )
        if local_output:
            md = save_const_to_h5(
                db_module=db_mod,
                karabo_id=karabo_id,
                constant=const,
                condition=condition,
                data=const.data,
                file_loc=file_loc,
                report=report,
                creation_time=creation_time,
                out_folder=out_folder,
            )
            print(f"Calibration constant {key} is stored locally at {out_folder}.\n")

 print("Constants parameter conditions are:\n")
 print(
    f"• Bias voltage: {bias_voltage}\n"
    f"• Exposure time: {exposure_time}\n"
    f"• Exposure period: {exposure_period}\n"
    f"• Acquisition rate: {acquisition_rate}\n"
    f"• Single photon: {single_photon}\n"
    f"• Creation time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n"
 )
 step_timer.done_step("Injecting constants.")
 ```

 %% Cell type:code id:98ca9486 tags:

 ``` python
 # TODO: store old constants for comparison.
 for mod in karabo_da:
    mod_file = mod.replace("/", "-")
    with open(f"{metadata_folder or out_folder}/module_metadata_{mod_file}.yml", "w") as fd:
        yaml.safe_dump(
            {
                "module": mod,
                "pdu": da_to_pdu[mod],
            },
            fd,
        )
 ```

--- a/notebooks/Gotthard2/Correction_Gotthard2_NBC.ipynb
+++ b/notebooks/Gotthard2/Correction_Gotthard2_NBC.ipynb
 %% Cell type:markdown id:bed7bd15-21d9-4735-82c1-c27c1a5e3346 tags:

 # Gotthard2 Offline Correction

 Author: European XFEL Detector Group, Version: 1.0

 Offline Correction for Gotthard2 Detector.

 This notebook is able to correct 25um and 50um GH2 detectors using the same correction steps:
 - Convert 12bit raw data into 10bit, offset subtraction, then multiply with gain constant.

 | Correction | constants   | boolean to enable/disable   |
 |------------|-------------|-----------------------------|
 |   12bit to 10bit  | `LUTGotthard2` |  |
 |   Offset  | `OffsetGotthard2`|`offset_correction`|
 |   Relative gain  | `RelativeGainGotthard2` + `BadPixelsFFGotthard2` |`gain_correction`|

 Beside the corrected data, a mask is stored using the badpixels constant of the same parameter conditions and time.
 - `BadPixelsDarkGotthard2`
 - `BadPixelsFFGotthard2`, if relative gain correction is requested.

 The correction is done per sequence file. If all selected sequence files have no images to correct the notebook will fail.
 The same result would be reached in case the needed dark calibration constants were not retrieved for all modules and `offset_correction` is True.
 In case one of the gain constants were not retrieved `gain_correction` is switched to False and gain correction is disabled.

 The `data` datasets stored in the RECEIVER source along with the corrected image (`adc`) and `mask` are:

  - `gain`

  - `bunchId`

  - `memoryCell`

  - `frameNumber`

  - `timestamp`

  - `trainId`

 %% Cell type:code id:570322ed-f611-4fd1-b2ec-c12c13d55843 tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/DETLAB/202330/p900326/raw"  # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/gotthard2"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run = 20  # run to process, required
 sequences = [-1]  # sequences to correct, set to [-1] for all, range allowed
 sequences_per_node = 1  # number of sequence files per node if notebook executed through xfel-calibrate, set to 0 to not run SLURM parallel

 # Parameters used to access raw data.
 output_source_template = "{karabo_id}/CORR/RECEIVER:daqOutput"  # Correction data source template. filled with karabo_id and correction receiver
 ctrl_source_template = "{karabo_id_control}/DET/{control_template}"  # template for control source name (filled with karabo_id_control)
 karabo_id = "DETLAB_25UM_GH2"  # karabo prefix of Gotthard-II devices
 karabo_da = [""]  # data aggregators
 input_source_affixes = [""]  # The affix to format into the input source template to be able to load the correct module's data.
 input_source_template = "{karabo_id}/DET/RECEIVER{input_source_affixes}:daqOutput"  # data source template used to read INSTRUMENT keys.
 control_template = "CONTROL"  # control template used to read CONTROL keys.
 karabo_id_control = ""  # Control karabo ID. Set to empty string to use the karabo-id

 # Parameters for calibration database.
 cal_db_interface = "tcp://max-exfl-cal001:8016#8025"  # the database interface to use.
 cal_db_timeout = 180000  # timeout on caldb requests.
 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"

 # Parameters affecting corrected data.
 offset_correction = True  # apply offset correction. This can be disabled to only apply LUT or apply LUT and gain correction for non-linear differential results.
 gain_correction = True  # apply gain correction.
 chunks_data = 1  # HDF chunk size for pixel data in number of frames.

 # Parameter conditions.
 bias_voltage = -1  # Detector bias voltage, set to -1 to use value in raw file.
 exposure_time = -1.  # Detector exposure time, set to -1 to use value in raw file.
 exposure_period = -1.  # Detector exposure period, set to -1 to use value in raw file.
 acquisition_rate = -1.  # Detector acquisition rate (1.1/4.5), set to -1 to use value in raw file.
 single_photon = -1  # Detector single photon mode (High/Low CDS), set to -1 to use value in raw file.
 reverse_second_module = -1  # Reverse 25um GH2 second module before interleaving. set to -1 to use value in raw file to reverse based on `CTRL/reverseSlaveReadOutMode`'s value.
+sensor_type = ""  # Specifies the GH2 sensor type: '50um', '25um', or "" to automatically decide from RUN data.

 # Parameters for plotting
 skip_plots = False  # exit after writing corrected files
 pulse_idx_preview = 3  # pulse index to preview. The following even/odd pulse index is used for preview. # TODO: update to pulseId preview.


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id:6e9730d8-3908-41d7-abe2-d78e046d5de2 tags:

 ``` python
 import warnings
 from logging import warning

-import h5py
 import pasha as psh
 import numpy as np
 import matplotlib.pyplot as plt
 from IPython.display import Markdown, display
 from extra_data import RunDirectory, H5File
 from pathlib import Path

 import cal_tools.restful_config as rest_cfg
 from cal_tools.calcat_interface import CalCatError, GOTTHARD2_CalibrationData
 from cal_tools.files import DataFile
 from cal_tools.gotthard2 import gotthard2algs, gotthard2lib
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    calcat_creation_time,
    exit_notebook,
    map_seq_files,
    write_constants_fragment,
 )
 from XFELDetAna.plotting.heatmap import heatmapPlot

 warnings.filterwarnings('ignore')

 %matplotlib inline
 ```

+%% Cell type:code id:fc893eff tags:
+
+``` python
+# Input parameters validation
+gotthard2lib.validate_sensor_type(sensor_type)
+```
+
 %% Cell type:code id:d7c02c48-4429-42ea-a42e-de45366d7fa3 tags:

 ``` python
 in_folder = Path(in_folder)
 run_folder = in_folder / f"r{run:04d}"
 out_folder = Path(out_folder)
 out_folder.mkdir(parents=True, exist_ok=True)

 if not karabo_id_control:
    karabo_id_control = karabo_id

 ctrl_src = ctrl_source_template.format(
    karabo_id_control=karabo_id_control,
    control_template=control_template,
 )

 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f"Creation time: {creation_time}")
 ```

 %% Cell type:code id:f9a8d1eb-ce6a-4ed0-abf4-4a6029734672 tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id:892172d8 tags:

 ``` python
 run_dc = RunDirectory(run_folder)

 # Read slow data
 g2ctrl = gotthard2lib.Gotthard2Ctrl(run_dc=run_dc, ctrl_src=ctrl_src)

 if bias_voltage == -1:
    bias_voltage = g2ctrl.get_bias_voltage()
 if exposure_time == -1:
    exposure_time = g2ctrl.get_exposure_time()
 if exposure_period == -1:
    exposure_period = g2ctrl.get_exposure_period()
 if acquisition_rate == -1:
    acquisition_rate = g2ctrl.get_acquisition_rate()
 if single_photon == -1:
    single_photon = g2ctrl.get_single_photon()
+if not sensor_type:
+    sensor_type = g2ctrl.get_sensor_type()
+
+# Check if the GH2 detector type is "50um" and validate that only one data aggregator is provided
+if sensor_type == "50um" and len(karabo_da) > 1:
+    raise ValueError(
+        f"karabo_da has more than one aggregator {karabo_da}, "
+        "but the GH2 detector type is specified as '50um', which expects only one aggregator."
+    )

-gh2_detector = g2ctrl.get_det_type()
-if reverse_second_module == -1 and gh2_detector == "25um":
+if reverse_second_module == -1 and sensor_type == "25um":
    reverse_second_module = not g2ctrl.second_module_reversed()
    if reverse_second_module:
        warning(
            "Second module is not reversed. "
            "Reversing second module before correction.")

 print("Bias Voltage:", bias_voltage)
 print("Exposure Time:", exposure_time)
 print("Exposure Period:", exposure_period)
 print("Acquisition Rate:", acquisition_rate)
 print("Single Photon:", single_photon)
-print(f"Processing {gh2_detector} Gotthard2.")
+print(f"Processing {sensor_type} Gotthard2.")
 ```

 %% Cell type:code id:21a8953a-8c76-475e-8f4f-b201cc25c159 tags:

 ``` python
 # GH2 calibration data object.
 g2_cal = GOTTHARD2_CalibrationData(
    detector_name=karabo_id,
    sensor_bias_voltage=bias_voltage,
    exposure_time=exposure_time,
    exposure_period=exposure_period,
    acquisition_rate=acquisition_rate,
    single_photon=single_photon,
    event_at=creation_time,
    client=rest_cfg.calibration_client(),
 )

 da_to_pdu = None
 # Keep as long as it is essential to correct
 # RAW data (FXE p003225) before the data mapping was added to CALCAT.
 try:  # in case local constants are used with old RAW data. This can be removed in the future.
    da_to_pdu = g2_cal.mod_to_pdu
 except CalCatError as e:
    print(e)
    db_modules = [None] * len(karabo_da)

 if da_to_pdu:
    if karabo_da == [""]:
        karabo_da = sorted(da_to_pdu.keys())
    else:
        # Exclude non selected DA from processing.
        karabo_da = [da for da in karabo_da if da in da_to_pdu]

    db_modules = [da_to_pdu[da] for da in karabo_da]

 print(f"Process modules: {db_modules} for run {run}")


 da_to_source = gotthard2lib.map_da_to_source(
    run_dc,
    karabo_da,
    input_source_template,
    karabo_id,
    input_source_affixes,
 )
 data_sources = list(da_to_source.values())
 ```

 %% Cell type:code id:2551b923 tags:

 ``` python
 # Check the available trains to correct.
 total_trains = len(
    RunDirectory(run_folder).select(data_sources, require_all=True).train_ids)
 if total_trains:
    print(f"Correcting {total_trains}.")
 else:
    raise ValueError(f"No trains to correct for run {run}.")
 ```

 %% Cell type:markdown id:8c852392-bb19-4c40-b2ce-3b787538a92d tags:

 ### Retrieving calibration constants

 %% Cell type:code id:5717d722 tags:

 ``` python
 # Used for old FXE (p003225) runs before adding Gotthard2 to CALCAT
 const_data = dict()

 constant_names = ["LUTGotthard2", "OffsetGotthard2", "BadPixelsDarkGotthard2"]
 if gain_correction:
    constant_names += ["RelativeGainGotthard2", "BadPixelsFFGotthard2"]

 g2_metadata = g2_cal.metadata(calibrations=constant_names)
 # Display retrieved calibration constants timestamps
 g2_cal.display_markdown_retrieved_constants(metadata=g2_metadata)

 # Validate the constants availability and raise/warn correspondingly.
 for mod, calibrations in g2_metadata.items():

    dark_constants = {"LUTGotthard2"}
    if offset_correction:
        dark_constants |= {"OffsetGotthard2", "BadPixelsDarkGotthard2"}

    missing_dark_constants = dark_constants - set(calibrations)
    if missing_dark_constants:
        karabo_da.remove(mod)
        warning(f"Dark constants {missing_dark_constants} are not available to correct {mod}.")  # noqa

    missing_gain_constants = {
        "BadPixelsFFGotthard2", "RelativeGainGotthard2"} - set(calibrations)
    if gain_correction and missing_gain_constants:
        warning(f"Gain constants {missing_gain_constants} are not retrieved for mod {mod}.")

 if not karabo_da:
    raise ValueError("Dark constants are not available for all modules.")
 ```

 %% Cell type:code id:ac1cdec5 tags:

 ``` python
 # Record constant details in YAML metadata.
 write_constants_fragment(
    out_folder=(metadata_folder or out_folder),
    det_metadata=g2_metadata,
    caldb_root=g2_cal.caldb_root)

 # Load constants data for all constants.
 const_data = g2_cal.ndarray_map(metadata=g2_metadata)

 # Prepare constant arrays.
 for mod in karabo_da:
    # Create the mask array.
    bpix = const_data[mod].get("BadPixelsDarkGotthard2")
    if bpix is None:
        bpix = np.zeros((1280, 2, 3), dtype=np.uint32)
    if const_data[mod].get("BadPixelsFFGotthard2") is not None:
        bpix |= const_data[mod]["BadPixelsFFGotthard2"]
    const_data[mod]["Mask"] = bpix

    # Prepare empty arrays for missing constants.
    if const_data[mod].get("OffsetGotthard2") is None:
        const_data[mod]["OffsetGotthard2"] = np.zeros(
            (1280, 2, 3), dtype=np.float32)

    if const_data[mod].get("RelativeGainGotthard2") is None:
        const_data[mod]["RelativeGainGotthard2"] = np.ones(
            (1280, 2, 3), dtype=np.float32)
    const_data[mod]["RelativeGainGotthard2"] = const_data[mod]["RelativeGainGotthard2"].astype(  # noqa
        np.float32, copy=False)  # Old gain constants are not float32.
 ```

 %% Cell type:code id:2c7dd0bb tags:

 ``` python
 file_da = list({kda.split('/')[0] for kda in karabo_da})
 mapped_files, total_files = map_seq_files(
    run_folder,
    file_da,
    sequences,
 )
 # This notebook doesn't account for processing more
 # than one file data aggregator.
 seq_files = mapped_files[file_da[0]]

 if not len(seq_files):
    raise IndexError(
        "No sequence files available to correct for the selected sequences and karabo_da.")
 print(f"Processing a total of {total_files} sequence files")
 ```

 %% Cell type:code id:23fcf7f4-351a-4df7-8829-d8497d94fecc tags:

 ``` python
 context = psh.ProcessContext(num_workers=23)
 ```

 %% Cell type:code id:daecd662-26d2-4cb8-aa70-383a579cf9f9 tags:

 ``` python
 def correct_train(wid, index, d):
    g = gain[index]
    gotthard2algs.convert_to_10bit(d, const_data[mod]["LUTGotthard2"], data_corr[index, ...])
    gotthard2algs.correct_train(
        data_corr[index, ...],
        mask[index, ...],
        g,
        const_data[mod]["OffsetGotthard2"].astype(np.float32),  # PSI map is in f8
        const_data[mod]["RelativeGainGotthard2"],
        const_data[mod]["Mask"],
        apply_offset=offset_correction,
        apply_gain=gain_correction,
    )
 ```

 %% Cell type:code id:f88c1aa6-a735-4b72-adce-b30162f5daea tags:

 ``` python
 corr_data_source = output_source_template.format(karabo_id=karabo_id)
 empty_seq = 0
 for raw_file in seq_files:

    out_file = out_folder / raw_file.name.replace("RAW", "CORR")
    # Select module INSTRUMENT sources and deselect empty trains.
    dc = H5File(raw_file).select(data_sources, require_all=True)
    n_trains = len(dc.train_ids)

    if n_trains == 0:
        empty_seq += 1
        warning(f"Skipping correction. No trains to correct for this sequence file: {raw_file}.")
        continue
    else:
        print(f"Correcting {n_trains} for {raw_file}.")

    # Initialize GH2 data and gain arrays to store in corrected files.
-    if gh2_detector == "25um":
+    if sensor_type == "25um":
        dshape_stored = (dc[data_sources[0], "data.adc"].shape[:2] + (1280 * 2,))
        data_stored = np.zeros(dshape_stored, dtype=np.float32)
        gain_stored = np.zeros(dshape_stored, dtype=np.uint8)
        mask_stored = np.zeros(dshape_stored, dtype=np.uint32)
    else:
        data_stored, gain_stored, mask_stored = None, None, None

    for i, mod in enumerate(karabo_da):
        src = da_to_source[mod]
        step_timer.start()
        print(f"Correcting {src} for {raw_file}")

        data = dc[src, "data.adc"].ndarray()
        gain = dc[src, "data.gain"].ndarray()

        # This is a safeguard, addressing the rare scenario of
        # configuring the second module to not reverse the readout.
        if reverse_second_module and i == 1:
            data = np.flip(data, axis=-1)
            gain = np.flip(gain, axis=-1)

        step_timer.done_step("Preparing raw data")
        dshape = data.shape

        step_timer.start()

        # Allocate shared arrays.
        data_corr = context.alloc(shape=dshape, dtype=np.float32)
        mask = context.alloc(shape=dshape, dtype=np.uint32)
        context.map(correct_train, data)
        step_timer.done_step(f"Correcting one receiver in one sequence file")

        step_timer.start()

        # Provided PSI gain map has 0 values. Set inf values to nan.
        # TODO: This can maybe be removed after creating XFEL gain maps.?
        data_corr[np.isinf(data_corr)] = np.nan

        # Create CORR files and add corrected data sections.
        image_counts = dc[src, "data.adc"].data_counts(labelled=False)

-        if gh2_detector == "25um":
+        if sensor_type == "25um":
            data_stored[..., i::2] = data_corr
            gain_stored[..., i::2] = gain
            mask_stored[..., i::2] = mask
        else:  # "50um"
            data_stored, gain_stored, mask_stored = data_corr, gain, mask

    seq_file = dc.files[0]  # FileAccess
    with DataFile(out_file, "w") as ofile:
        # Create INDEX datasets.
        ofile.create_index(dc.train_ids, from_file=seq_file)
        ofile.create_metadata(
            like=dc,
            sequence=seq_file.sequence,
            instrument_channels=(f"{corr_data_source}/data",)
        )

        # Create Instrument section to later add corrected datasets.
        outp_source = ofile.create_instrument_source(corr_data_source)

        # Create count/first datasets at INDEX source.
        outp_source.create_index(data=image_counts)

        # Store uncorrected trainId in the corrected file.
        outp_source.create_key(
                f"data.trainId", data=dc.train_ids,
                chunks=min(50, len(dc.train_ids))
            )

        # Create datasets with the available corrected data
        for field_name, field_data in {
            "adc": data_stored,
            "gain": gain_stored,
        }.items():
            outp_source.create_key(
                f"data.{field_name}", data=field_data,
                chunks=((chunks_data,) + data_corr.shape[1:])
        )

        # For GH2 25um, the data of the second receiver is
        # stored in the corrected file.
        for field in ["bunchId", "memoryCell", "frameNumber", "timestamp"]:
                outp_source.create_key(
                    f"data.{field}", data=dc[src, f"data.{field}"].ndarray(),
                    chunks=(chunks_data, data_corr.shape[1])
            )
        outp_source.create_compressed_key(f"data.mask", data=mask_stored)
        step_timer.done_step("Storing data")
 ```

 %% Cell type:code id:94b8e4d2-9f8c-4c23-a509-39238dd8435c tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id:0ccc7f7e-2a3f-4ac0-b854-7d505410d2fd tags:

 ``` python
 if skip_plots:
    exit_notebook('Skipping plots as configured.')
 if empty_seq == len(seq_files):
    exit_notebook(f"All sequence files contain no data for correction.")
 ```

 %% Cell type:code id:ff203f77-3811-46f3-bf7d-226d2dcab13f tags:

 ``` python
 mod_dcs = {}
 first_seq_raw = seq_files[0]
 first_seq_corr = out_folder / first_seq_raw.name.replace("RAW", "CORR")
 mod_dcs[corr_data_source] = {}
 with H5File(first_seq_corr) as out_dc:
    tid, mod_dcs[corr_data_source]["train_corr_data"] = next(
        out_dc[corr_data_source, "data.adc"].trains()
    )
    mask = out_dc[corr_data_source, "data.mask"].train_from_id(tid)[1]

-if gh2_detector == "25um":
+if sensor_type == "25um":
    mod_dcs[corr_data_source]["train_raw_data"] = np.zeros((data_corr.shape[1], 1280 * 2), dtype=np.float32)
    mod_dcs[corr_data_source]["train_raw_gain"] = np.zeros((data_corr.shape[1], 1280 * 2), dtype=np.uint8)

 for i, src in enumerate(data_sources):
    with H5File(first_seq_raw) as in_dc:
        train_dict = in_dc.train_from_id(tid)[1][src]
-        if gh2_detector == "25um":
+        if sensor_type == "25um":
            if reverse_second_module and i == 1:
                data = np.flip(train_dict["data.adc"], axis=-1)
                gain = np.flip(train_dict["data.gain"], axis=-1)
            else:
                data = train_dict["data.adc"]
                gain = train_dict["data.gain"]
            mod_dcs[corr_data_source]["train_raw_data"][..., i::2] = data
            mod_dcs[corr_data_source]["train_raw_gain"][..., i::2] = gain
        else:
            mod_dcs[corr_data_source]["train_raw_data"] = train_dict["data.adc"]
            mod_dcs[corr_data_source]["train_raw_gain"] = train_dict["data.gain"]
 ```

 %% Cell type:code id:bcb2c472 tags:

 ``` python
 def plot_mean_data(data, title_format, y_label, fig_size=(10, 6)):
    _, ax = plt.subplots(figsize=fig_size)
    mean_data = np.mean(data, axis=0)

    ax.plot(mean_data)
    ax.set_title(title_format, fontsize=12)
    ax.set_xlabel("Strip #", size=12)
    ax.set_ylabel(y_label, size=12)
    plt.xticks(fontsize=10)
    plt.yticks(fontsize=10)
    plt.show()

 # Display markdown headers
 display(Markdown("### Mean RAW and CORRECTED across pulses for one train:"))
 display(Markdown(f"Train: {tid}"))

-# Set title format based on gh2_detector
-if gh2_detector == "50um":
+# Set title format based on sensor_type
+if sensor_type == "50um":
    title_format = f"{{}} data for {karabo_da} ({db_modules})"
 else:
    title_format = f"Interleaved {{}} data for {karabo_da} ({db_modules})"

 step_timer.start()

 # Plot RAW data
 raw_data = mod_dcs[corr_data_source]["train_raw_data"]
 plot_mean_data(raw_data, title_format.format("RAW"), "12-bit ADC output")

 # Plot CORRECTED data
 corr_data = mod_dcs[corr_data_source]["train_corr_data"]
 plot_mean_data(corr_data, title_format.format("CORRECTED"), "10-bit KeV. output")

 # Plot CORRECTED MASKED data
 corr_data_masked = corr_data.copy()
 corr_data_masked[mask!=0] = np.nan
 plot_mean_data(corr_data_masked, title_format.format("CORRECTED MASKED"), "10-bit KeV. output")

 step_timer.done_step("Plotting mean data")
 ```

 %% Cell type:code id:58a6a276 tags:

 ``` python
 display(Markdown(f"### RAW and CORRECTED strips across pulses for train {tid}"))
 step_timer.start()
 for plt_data, dname in zip(
    [raw_data, corr_data, corr_data_masked], ["RAW", "CORRECTED", "CORRECTED MASKED"]
 ):
    fig, ax = plt.subplots(figsize=(15, 10))
    plt.rcParams.update({"font.size": 14})
    vmin, vmax = np.nanpercentile(plt_data, [1, 99])
    cb_aspect = max(2, (vmax-vmin) / 25)
    ax.tick_params(axis='both', which='major', labelsize=14)

    heatmapPlot(
        plt_data,
        y_label="Pulses",
        x_label="Strips",
        title=title_format.format(dname),
        use_axis=ax,
        cb_aspect=cb_aspect,
        vmin=vmin, vmax=vmax,
        cmap='viridis',
    )
    pass

 step_timer.done_step("Plotting RAW and CORRECTED data for one train")
 ```

 %% Cell type:code id:cd8f5e08-fcee-4bff-ba63-6452b3d892a2 tags:

 ``` python
 # Validate given "pulse_idx_preview"

 if pulse_idx_preview + 1 > data.shape[1]:
    print(
        f"WARNING: selected pulse_idx_preview {pulse_idx_preview} is not available in data."
        " Previewing 1st pulse."
    )
    pulse_idx_preview = 1

 if data.shape[1] == 1:
    odd_pulse = 1
    even_pulse = None
 else:
    odd_pulse = pulse_idx_preview if pulse_idx_preview % 2 else pulse_idx_preview + 1
    even_pulse = (
        pulse_idx_preview if not (pulse_idx_preview % 2) else pulse_idx_preview + 1
    )

 if pulse_idx_preview + 1 > data.shape[1]:
    pulse_idx_preview = 1
    if data.shape[1] > 1:
        pulse_idx_preview = 2
 ```

 %% Cell type:code id:e2ecafe1 tags:

 ``` python
 def plot_pulses(data, odd_pulse, even_pulse, title, y_label, fig_size=(10, 6)):
    fig, ax = plt.subplots(figsize=fig_size)
    ax.plot(data[odd_pulse], label=f"Odd Pulse {odd_pulse}")
    if even_pulse:
        ax.plot(data[even_pulse], label=f"Even Pulse {even_pulse}")
    ax.set_title(title, fontsize=12)
    ax.set_xlabel("Strip #", size=12)
    ax.set_ylabel(y_label, size=12)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    ax.legend()
    plt.show()

 # Display markdown headers
 display(Markdown("### RAW and CORRECTED even/odd pulses for one train:"))
 display(Markdown(f"Train: {tid}"))

 # Plot RAW data
 plot_pulses(
    raw_data, odd_pulse, even_pulse, title_format.format("RAW"), "12-bit ADC RAW")

 # Plot CORRECTED data
 plot_pulses(
    corr_data, odd_pulse, even_pulse,
    title_format.format("CORRECTED"), "10-bit KeV CORRECTED")

 # Plot CORRECTED MASKED data
 plot_pulses(
    corr_data_masked, odd_pulse, even_pulse,
    title_format.format("CORRECTED MASKED"), "10-bit KeV CORRECTED MASKED")

 step_timer.done_step("Plotting RAW and CORRECTED odd/even pulses.")
 ```

 %% Cell type:markdown id:bed7bd15-21d9-4735-82c1-c27c1a5e3346 tags:

 # Gotthard2 Offline Correction

 Author: European XFEL Detector Group, Version: 1.0

 Offline Correction for Gotthard2 Detector.

 This notebook is able to correct 25um and 50um GH2 detectors using the same correction steps:
 - Convert 12bit raw data into 10bit, offset subtraction, then multiply with gain constant.

 | Correction | constants   | boolean to enable/disable   |
 |------------|-------------|-----------------------------|
 |   12bit to 10bit  | `LUTGotthard2` |  |
 |   Offset  | `OffsetGotthard2`|`offset_correction`|
 |   Relative gain  | `RelativeGainGotthard2` + `BadPixelsFFGotthard2` |`gain_correction`|

 Beside the corrected data, a mask is stored using the badpixels constant of the same parameter conditions and time.
 - `BadPixelsDarkGotthard2`
 - `BadPixelsFFGotthard2`, if relative gain correction is requested.

 The correction is done per sequence file. If all selected sequence files have no images to correct the notebook will fail.
 The same result would be reached in case the needed dark calibration constants were not retrieved for all modules and `offset_correction` is True.
 In case one of the gain constants were not retrieved `gain_correction` is switched to False and gain correction is disabled.

 The `data` datasets stored in the RECEIVER source along with the corrected image (`adc`) and `mask` are:

  - `gain`

  - `bunchId`

  - `memoryCell`

  - `frameNumber`

  - `timestamp`

  - `trainId`

 %% Cell type:code id:570322ed-f611-4fd1-b2ec-c12c13d55843 tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/DETLAB/202330/p900326/raw"  # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/gotthard2"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run = 20  # run to process, required
 sequences = [-1]  # sequences to correct, set to [-1] for all, range allowed
 sequences_per_node = 1  # number of sequence files per node if notebook executed through xfel-calibrate, set to 0 to not run SLURM parallel

 # Parameters used to access raw data.
 output_source_template = "{karabo_id}/CORR/RECEIVER:daqOutput"  # Correction data source template. filled with karabo_id and correction receiver
 ctrl_source_template = "{karabo_id_control}/DET/{control_template}"  # template for control source name (filled with karabo_id_control)
 karabo_id = "DETLAB_25UM_GH2"  # karabo prefix of Gotthard-II devices
 karabo_da = [""]  # data aggregators
 input_source_affixes = [""]  # The affix to format into the input source template to be able to load the correct module's data.
 input_source_template = "{karabo_id}/DET/RECEIVER{input_source_affixes}:daqOutput"  # data source template used to read INSTRUMENT keys.
 control_template = "CONTROL"  # control template used to read CONTROL keys.
 karabo_id_control = ""  # Control karabo ID. Set to empty string to use the karabo-id

 # Parameters for calibration database.
 cal_db_interface = "tcp://max-exfl-cal001:8016#8025"  # the database interface to use.
 cal_db_timeout = 180000  # timeout on caldb requests.
 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"

 # Parameters affecting corrected data.
 offset_correction = True  # apply offset correction. This can be disabled to only apply LUT or apply LUT and gain correction for non-linear differential results.
 gain_correction = True  # apply gain correction.
 chunks_data = 1  # HDF chunk size for pixel data in number of frames.

 # Parameter conditions.
 bias_voltage = -1  # Detector bias voltage, set to -1 to use value in raw file.
 exposure_time = -1.  # Detector exposure time, set to -1 to use value in raw file.
 exposure_period = -1.  # Detector exposure period, set to -1 to use value in raw file.
 acquisition_rate = -1.  # Detector acquisition rate (1.1/4.5), set to -1 to use value in raw file.
 single_photon = -1  # Detector single photon mode (High/Low CDS), set to -1 to use value in raw file.
 reverse_second_module = -1  # Reverse 25um GH2 second module before interleaving. set to -1 to use value in raw file to reverse based on `CTRL/reverseSlaveReadOutMode`'s value.
+sensor_type = ""  # Specifies the GH2 sensor type: '50um', '25um', or "" to automatically decide from RUN data.

 # Parameters for plotting
 skip_plots = False  # exit after writing corrected files
 pulse_idx_preview = 3  # pulse index to preview. The following even/odd pulse index is used for preview. # TODO: update to pulseId preview.


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id:6e9730d8-3908-41d7-abe2-d78e046d5de2 tags:

 ``` python
 import warnings
 from logging import warning

-import h5py
 import pasha as psh
 import numpy as np
 import matplotlib.pyplot as plt
 from IPython.display import Markdown, display
 from extra_data import RunDirectory, H5File
 from pathlib import Path

 import cal_tools.restful_config as rest_cfg
 from cal_tools.calcat_interface import CalCatError, GOTTHARD2_CalibrationData
 from cal_tools.files import DataFile
 from cal_tools.gotthard2 import gotthard2algs, gotthard2lib
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    calcat_creation_time,
    exit_notebook,
    map_seq_files,
    write_constants_fragment,
 )
 from XFELDetAna.plotting.heatmap import heatmapPlot

 warnings.filterwarnings('ignore')

 %matplotlib inline
 ```

+%% Cell type:code id:fc893eff tags:
+
+``` python
+# Input parameters validation
+gotthard2lib.validate_sensor_type(sensor_type)
+```
+
 %% Cell type:code id:d7c02c48-4429-42ea-a42e-de45366d7fa3 tags:

 ``` python
 in_folder = Path(in_folder)
 run_folder = in_folder / f"r{run:04d}"
 out_folder = Path(out_folder)
 out_folder.mkdir(parents=True, exist_ok=True)

 if not karabo_id_control:
    karabo_id_control = karabo_id

 ctrl_src = ctrl_source_template.format(
    karabo_id_control=karabo_id_control,
    control_template=control_template,
 )

 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f"Creation time: {creation_time}")
 ```

 %% Cell type:code id:f9a8d1eb-ce6a-4ed0-abf4-4a6029734672 tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id:892172d8 tags:

 ``` python
 run_dc = RunDirectory(run_folder)

 # Read slow data
 g2ctrl = gotthard2lib.Gotthard2Ctrl(run_dc=run_dc, ctrl_src=ctrl_src)

 if bias_voltage == -1:
    bias_voltage = g2ctrl.get_bias_voltage()
 if exposure_time == -1:
    exposure_time = g2ctrl.get_exposure_time()
 if exposure_period == -1:
    exposure_period = g2ctrl.get_exposure_period()
 if acquisition_rate == -1:
    acquisition_rate = g2ctrl.get_acquisition_rate()
 if single_photon == -1:
    single_photon = g2ctrl.get_single_photon()
+if not sensor_type:
+    sensor_type = g2ctrl.get_sensor_type()
+
+# Check if the GH2 detector type is "50um" and validate that only one data aggregator is provided
+if sensor_type == "50um" and len(karabo_da) > 1:
+    raise ValueError(
+        f"karabo_da has more than one aggregator {karabo_da}, "
+        "but the GH2 detector type is specified as '50um', which expects only one aggregator."
+    )

-gh2_detector = g2ctrl.get_det_type()
-if reverse_second_module == -1 and gh2_detector == "25um":
+if reverse_second_module == -1 and sensor_type == "25um":
    reverse_second_module = not g2ctrl.second_module_reversed()
    if reverse_second_module:
        warning(
            "Second module is not reversed. "
            "Reversing second module before correction.")

 print("Bias Voltage:", bias_voltage)
 print("Exposure Time:", exposure_time)
 print("Exposure Period:", exposure_period)
 print("Acquisition Rate:", acquisition_rate)
 print("Single Photon:", single_photon)
-print(f"Processing {gh2_detector} Gotthard2.")
+print(f"Processing {sensor_type} Gotthard2.")
 ```

 %% Cell type:code id:21a8953a-8c76-475e-8f4f-b201cc25c159 tags:

 ``` python
 # GH2 calibration data object.
 g2_cal = GOTTHARD2_CalibrationData(
    detector_name=karabo_id,
    sensor_bias_voltage=bias_voltage,
    exposure_time=exposure_time,
    exposure_period=exposure_period,
    acquisition_rate=acquisition_rate,
    single_photon=single_photon,
    event_at=creation_time,
    client=rest_cfg.calibration_client(),
 )

 da_to_pdu = None
 # Keep as long as it is essential to correct
 # RAW data (FXE p003225) before the data mapping was added to CALCAT.
 try:  # in case local constants are used with old RAW data. This can be removed in the future.
    da_to_pdu = g2_cal.mod_to_pdu
 except CalCatError as e:
    print(e)
    db_modules = [None] * len(karabo_da)

 if da_to_pdu:
    if karabo_da == [""]:
        karabo_da = sorted(da_to_pdu.keys())
    else:
        # Exclude non selected DA from processing.
        karabo_da = [da for da in karabo_da if da in da_to_pdu]

    db_modules = [da_to_pdu[da] for da in karabo_da]

 print(f"Process modules: {db_modules} for run {run}")


 da_to_source = gotthard2lib.map_da_to_source(
    run_dc,
    karabo_da,
    input_source_template,
    karabo_id,
    input_source_affixes,
 )
 data_sources = list(da_to_source.values())
 ```

 %% Cell type:code id:2551b923 tags:

 ``` python
 # Check the available trains to correct.
 total_trains = len(
    RunDirectory(run_folder).select(data_sources, require_all=True).train_ids)
 if total_trains:
    print(f"Correcting {total_trains}.")
 else:
    raise ValueError(f"No trains to correct for run {run}.")
 ```

 %% Cell type:markdown id:8c852392-bb19-4c40-b2ce-3b787538a92d tags:

 ### Retrieving calibration constants

 %% Cell type:code id:5717d722 tags:

 ``` python
 # Used for old FXE (p003225) runs before adding Gotthard2 to CALCAT
 const_data = dict()

 constant_names = ["LUTGotthard2", "OffsetGotthard2", "BadPixelsDarkGotthard2"]
 if gain_correction:
    constant_names += ["RelativeGainGotthard2", "BadPixelsFFGotthard2"]

 g2_metadata = g2_cal.metadata(calibrations=constant_names)
 # Display retrieved calibration constants timestamps
 g2_cal.display_markdown_retrieved_constants(metadata=g2_metadata)

 # Validate the constants availability and raise/warn correspondingly.
 for mod, calibrations in g2_metadata.items():

    dark_constants = {"LUTGotthard2"}
    if offset_correction:
        dark_constants |= {"OffsetGotthard2", "BadPixelsDarkGotthard2"}

    missing_dark_constants = dark_constants - set(calibrations)
    if missing_dark_constants:
        karabo_da.remove(mod)
        warning(f"Dark constants {missing_dark_constants} are not available to correct {mod}.")  # noqa

    missing_gain_constants = {
        "BadPixelsFFGotthard2", "RelativeGainGotthard2"} - set(calibrations)
    if gain_correction and missing_gain_constants:
        warning(f"Gain constants {missing_gain_constants} are not retrieved for mod {mod}.")

 if not karabo_da:
    raise ValueError("Dark constants are not available for all modules.")
 ```

 %% Cell type:code id:ac1cdec5 tags:

 ``` python
 # Record constant details in YAML metadata.
 write_constants_fragment(
    out_folder=(metadata_folder or out_folder),
    det_metadata=g2_metadata,
    caldb_root=g2_cal.caldb_root)

 # Load constants data for all constants.
 const_data = g2_cal.ndarray_map(metadata=g2_metadata)

 # Prepare constant arrays.
 for mod in karabo_da:
    # Create the mask array.
    bpix = const_data[mod].get("BadPixelsDarkGotthard2")
    if bpix is None:
        bpix = np.zeros((1280, 2, 3), dtype=np.uint32)
    if const_data[mod].get("BadPixelsFFGotthard2") is not None:
        bpix |= const_data[mod]["BadPixelsFFGotthard2"]
    const_data[mod]["Mask"] = bpix

    # Prepare empty arrays for missing constants.
    if const_data[mod].get("OffsetGotthard2") is None:
        const_data[mod]["OffsetGotthard2"] = np.zeros(
            (1280, 2, 3), dtype=np.float32)

    if const_data[mod].get("RelativeGainGotthard2") is None:
        const_data[mod]["RelativeGainGotthard2"] = np.ones(
            (1280, 2, 3), dtype=np.float32)
    const_data[mod]["RelativeGainGotthard2"] = const_data[mod]["RelativeGainGotthard2"].astype(  # noqa
        np.float32, copy=False)  # Old gain constants are not float32.
 ```

 %% Cell type:code id:2c7dd0bb tags:

 ``` python
 file_da = list({kda.split('/')[0] for kda in karabo_da})
 mapped_files, total_files = map_seq_files(
    run_folder,
    file_da,
    sequences,
 )
 # This notebook doesn't account for processing more
 # than one file data aggregator.
 seq_files = mapped_files[file_da[0]]

 if not len(seq_files):
    raise IndexError(
        "No sequence files available to correct for the selected sequences and karabo_da.")
 print(f"Processing a total of {total_files} sequence files")
 ```

 %% Cell type:code id:23fcf7f4-351a-4df7-8829-d8497d94fecc tags:

 ``` python
 context = psh.ProcessContext(num_workers=23)
 ```

 %% Cell type:code id:daecd662-26d2-4cb8-aa70-383a579cf9f9 tags:

 ``` python
 def correct_train(wid, index, d):
    g = gain[index]
    gotthard2algs.convert_to_10bit(d, const_data[mod]["LUTGotthard2"], data_corr[index, ...])
    gotthard2algs.correct_train(
        data_corr[index, ...],
        mask[index, ...],
        g,
        const_data[mod]["OffsetGotthard2"].astype(np.float32),  # PSI map is in f8
        const_data[mod]["RelativeGainGotthard2"],
        const_data[mod]["Mask"],
        apply_offset=offset_correction,
        apply_gain=gain_correction,
    )
 ```

 %% Cell type:code id:f88c1aa6-a735-4b72-adce-b30162f5daea tags:

 ``` python
 corr_data_source = output_source_template.format(karabo_id=karabo_id)
 empty_seq = 0
 for raw_file in seq_files:

    out_file = out_folder / raw_file.name.replace("RAW", "CORR")
    # Select module INSTRUMENT sources and deselect empty trains.
    dc = H5File(raw_file).select(data_sources, require_all=True)
    n_trains = len(dc.train_ids)

    if n_trains == 0:
        empty_seq += 1
        warning(f"Skipping correction. No trains to correct for this sequence file: {raw_file}.")
        continue
    else:
        print(f"Correcting {n_trains} for {raw_file}.")

    # Initialize GH2 data and gain arrays to store in corrected files.
-    if gh2_detector == "25um":
+    if sensor_type == "25um":
        dshape_stored = (dc[data_sources[0], "data.adc"].shape[:2] + (1280 * 2,))
        data_stored = np.zeros(dshape_stored, dtype=np.float32)
        gain_stored = np.zeros(dshape_stored, dtype=np.uint8)
        mask_stored = np.zeros(dshape_stored, dtype=np.uint32)
    else:
        data_stored, gain_stored, mask_stored = None, None, None

    for i, mod in enumerate(karabo_da):
        src = da_to_source[mod]
        step_timer.start()
        print(f"Correcting {src} for {raw_file}")

        data = dc[src, "data.adc"].ndarray()
        gain = dc[src, "data.gain"].ndarray()

        # This is a safeguard, addressing the rare scenario of
        # configuring the second module to not reverse the readout.
        if reverse_second_module and i == 1:
            data = np.flip(data, axis=-1)
            gain = np.flip(gain, axis=-1)

        step_timer.done_step("Preparing raw data")
        dshape = data.shape

        step_timer.start()

        # Allocate shared arrays.
        data_corr = context.alloc(shape=dshape, dtype=np.float32)
        mask = context.alloc(shape=dshape, dtype=np.uint32)
        context.map(correct_train, data)
        step_timer.done_step(f"Correcting one receiver in one sequence file")

        step_timer.start()

        # Provided PSI gain map has 0 values. Set inf values to nan.
        # TODO: This can maybe be removed after creating XFEL gain maps.?
        data_corr[np.isinf(data_corr)] = np.nan

        # Create CORR files and add corrected data sections.
        image_counts = dc[src, "data.adc"].data_counts(labelled=False)

-        if gh2_detector == "25um":
+        if sensor_type == "25um":
            data_stored[..., i::2] = data_corr
            gain_stored[..., i::2] = gain
            mask_stored[..., i::2] = mask
        else:  # "50um"
            data_stored, gain_stored, mask_stored = data_corr, gain, mask

    seq_file = dc.files[0]  # FileAccess
    with DataFile(out_file, "w") as ofile:
        # Create INDEX datasets.
        ofile.create_index(dc.train_ids, from_file=seq_file)
        ofile.create_metadata(
            like=dc,
            sequence=seq_file.sequence,
            instrument_channels=(f"{corr_data_source}/data",)
        )

        # Create Instrument section to later add corrected datasets.
        outp_source = ofile.create_instrument_source(corr_data_source)

        # Create count/first datasets at INDEX source.
        outp_source.create_index(data=image_counts)

        # Store uncorrected trainId in the corrected file.
        outp_source.create_key(
                f"data.trainId", data=dc.train_ids,
                chunks=min(50, len(dc.train_ids))
            )

        # Create datasets with the available corrected data
        for field_name, field_data in {
            "adc": data_stored,
            "gain": gain_stored,
        }.items():
            outp_source.create_key(
                f"data.{field_name}", data=field_data,
                chunks=((chunks_data,) + data_corr.shape[1:])
        )

        # For GH2 25um, the data of the second receiver is
        # stored in the corrected file.
        for field in ["bunchId", "memoryCell", "frameNumber", "timestamp"]:
                outp_source.create_key(
                    f"data.{field}", data=dc[src, f"data.{field}"].ndarray(),
                    chunks=(chunks_data, data_corr.shape[1])
            )
        outp_source.create_compressed_key(f"data.mask", data=mask_stored)
        step_timer.done_step("Storing data")
 ```

 %% Cell type:code id:94b8e4d2-9f8c-4c23-a509-39238dd8435c tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id:0ccc7f7e-2a3f-4ac0-b854-7d505410d2fd tags:

 ``` python
 if skip_plots:
    exit_notebook('Skipping plots as configured.')
 if empty_seq == len(seq_files):
    exit_notebook(f"All sequence files contain no data for correction.")
 ```

 %% Cell type:code id:ff203f77-3811-46f3-bf7d-226d2dcab13f tags:

 ``` python
 mod_dcs = {}
 first_seq_raw = seq_files[0]
 first_seq_corr = out_folder / first_seq_raw.name.replace("RAW", "CORR")
 mod_dcs[corr_data_source] = {}
 with H5File(first_seq_corr) as out_dc:
    tid, mod_dcs[corr_data_source]["train_corr_data"] = next(
        out_dc[corr_data_source, "data.adc"].trains()
    )
    mask = out_dc[corr_data_source, "data.mask"].train_from_id(tid)[1]

-if gh2_detector == "25um":
+if sensor_type == "25um":
    mod_dcs[corr_data_source]["train_raw_data"] = np.zeros((data_corr.shape[1], 1280 * 2), dtype=np.float32)
    mod_dcs[corr_data_source]["train_raw_gain"] = np.zeros((data_corr.shape[1], 1280 * 2), dtype=np.uint8)

 for i, src in enumerate(data_sources):
    with H5File(first_seq_raw) as in_dc:
        train_dict = in_dc.train_from_id(tid)[1][src]
-        if gh2_detector == "25um":
+        if sensor_type == "25um":
            if reverse_second_module and i == 1:
                data = np.flip(train_dict["data.adc"], axis=-1)
                gain = np.flip(train_dict["data.gain"], axis=-1)
            else:
                data = train_dict["data.adc"]
                gain = train_dict["data.gain"]
            mod_dcs[corr_data_source]["train_raw_data"][..., i::2] = data
            mod_dcs[corr_data_source]["train_raw_gain"][..., i::2] = gain
        else:
            mod_dcs[corr_data_source]["train_raw_data"] = train_dict["data.adc"]
            mod_dcs[corr_data_source]["train_raw_gain"] = train_dict["data.gain"]
 ```

 %% Cell type:code id:bcb2c472 tags:

 ``` python
 def plot_mean_data(data, title_format, y_label, fig_size=(10, 6)):
    _, ax = plt.subplots(figsize=fig_size)
    mean_data = np.mean(data, axis=0)

    ax.plot(mean_data)
    ax.set_title(title_format, fontsize=12)
    ax.set_xlabel("Strip #", size=12)
    ax.set_ylabel(y_label, size=12)
    plt.xticks(fontsize=10)
    plt.yticks(fontsize=10)
    plt.show()

 # Display markdown headers
 display(Markdown("### Mean RAW and CORRECTED across pulses for one train:"))
 display(Markdown(f"Train: {tid}"))

-# Set title format based on gh2_detector
-if gh2_detector == "50um":
+# Set title format based on sensor_type
+if sensor_type == "50um":
    title_format = f"{{}} data for {karabo_da} ({db_modules})"
 else:
    title_format = f"Interleaved {{}} data for {karabo_da} ({db_modules})"

 step_timer.start()

 # Plot RAW data
 raw_data = mod_dcs[corr_data_source]["train_raw_data"]
 plot_mean_data(raw_data, title_format.format("RAW"), "12-bit ADC output")

 # Plot CORRECTED data
 corr_data = mod_dcs[corr_data_source]["train_corr_data"]
 plot_mean_data(corr_data, title_format.format("CORRECTED"), "10-bit KeV. output")

 # Plot CORRECTED MASKED data
 corr_data_masked = corr_data.copy()
 corr_data_masked[mask!=0] = np.nan
 plot_mean_data(corr_data_masked, title_format.format("CORRECTED MASKED"), "10-bit KeV. output")

 step_timer.done_step("Plotting mean data")
 ```

 %% Cell type:code id:58a6a276 tags:

 ``` python
 display(Markdown(f"### RAW and CORRECTED strips across pulses for train {tid}"))
 step_timer.start()
 for plt_data, dname in zip(
    [raw_data, corr_data, corr_data_masked], ["RAW", "CORRECTED", "CORRECTED MASKED"]
 ):
    fig, ax = plt.subplots(figsize=(15, 10))
    plt.rcParams.update({"font.size": 14})
    vmin, vmax = np.nanpercentile(plt_data, [1, 99])
    cb_aspect = max(2, (vmax-vmin) / 25)
    ax.tick_params(axis='both', which='major', labelsize=14)

    heatmapPlot(
        plt_data,
        y_label="Pulses",
        x_label="Strips",
        title=title_format.format(dname),
        use_axis=ax,
        cb_aspect=cb_aspect,
        vmin=vmin, vmax=vmax,
        cmap='viridis',
    )
    pass

 step_timer.done_step("Plotting RAW and CORRECTED data for one train")
 ```

 %% Cell type:code id:cd8f5e08-fcee-4bff-ba63-6452b3d892a2 tags:

 ``` python
 # Validate given "pulse_idx_preview"

 if pulse_idx_preview + 1 > data.shape[1]:
    print(
        f"WARNING: selected pulse_idx_preview {pulse_idx_preview} is not available in data."
        " Previewing 1st pulse."
    )
    pulse_idx_preview = 1

 if data.shape[1] == 1:
    odd_pulse = 1
    even_pulse = None
 else:
    odd_pulse = pulse_idx_preview if pulse_idx_preview % 2 else pulse_idx_preview + 1
    even_pulse = (
        pulse_idx_preview if not (pulse_idx_preview % 2) else pulse_idx_preview + 1
    )

 if pulse_idx_preview + 1 > data.shape[1]:
    pulse_idx_preview = 1
    if data.shape[1] > 1:
        pulse_idx_preview = 2
 ```

 %% Cell type:code id:e2ecafe1 tags:

 ``` python
 def plot_pulses(data, odd_pulse, even_pulse, title, y_label, fig_size=(10, 6)):
    fig, ax = plt.subplots(figsize=fig_size)
    ax.plot(data[odd_pulse], label=f"Odd Pulse {odd_pulse}")
    if even_pulse:
        ax.plot(data[even_pulse], label=f"Even Pulse {even_pulse}")
    ax.set_title(title, fontsize=12)
    ax.set_xlabel("Strip #", size=12)
    ax.set_ylabel(y_label, size=12)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    ax.legend()
    plt.show()

 # Display markdown headers
 display(Markdown("### RAW and CORRECTED even/odd pulses for one train:"))
 display(Markdown(f"Train: {tid}"))

 # Plot RAW data
 plot_pulses(
    raw_data, odd_pulse, even_pulse, title_format.format("RAW"), "12-bit ADC RAW")

 # Plot CORRECTED data
 plot_pulses(
    corr_data, odd_pulse, even_pulse,
    title_format.format("CORRECTED"), "10-bit KeV CORRECTED")

 # Plot CORRECTED MASKED data
 plot_pulses(
    corr_data_masked, odd_pulse, even_pulse,
    title_format.format("CORRECTED MASKED"), "10-bit KeV CORRECTED MASKED")

 step_timer.done_step("Plotting RAW and CORRECTED odd/even pulses.")
 ```

--- a/notebooks/Gotthard2/Summary_Darks_Gotthard2_NBC.ipynb
+++ b/notebooks/Gotthard2/Summary_Darks_Gotthard2_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Gotthard2 Dark Summary

 Author: European XFEL Detector Department, Version: 1.0

 Summary for process dark constants and a comparison with previously injected constants with the same conditions.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/DETLAB/202330/p900326/raw"  # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/gotthard2"  # path to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate.
 run_high = 20  # run number for G0 dark run, required
 run_med = 21  # run number for G1 dark run, required
 run_low = 22  # run number for G2 dark run, required

 # Parameters used to access raw data.
 karabo_id = "DETLAB_25UM_GH2"  # detector identifier.
 karabo_da = ["DA01/1", "DA01/2"]  # list of data aggregators, which corresponds to different JF modules. This is only needed for the detectors of one module.
 control_template = "CONTROL"  # control template used to read CONTROL keys.
 ctrl_source_template = '{karabo_id}/DET/{control_template}'  # template for control source name (filled with karabo_id_control)
 karabo_id_control = ""  # Control karabo ID. Set to empty string to use the karabo-id

+sensor_type = ""  # Specifies the GH2 sensor type: '50um', '25um', or "" to automatically decide from RUN data.
+
 # Parameters to be used for injecting dark calibration constants.
 local_output = True  # Boolean indicating that local constants were stored in the out_folder

 # Skip the whole notebook if local_output is false in the preceding notebooks.
 if not local_output:
    from cal_tools.tools import exit_notebook
    exit_notebook('No local constants saved. Skipping summary plots')
 ```

 %% Cell type:code id: tags:

 ``` python
 import warnings
 from pathlib import Path

 warnings.filterwarnings('ignore')

 import h5py
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
 import yaml
 from extra_data import RunDirectory
 from IPython.display import Markdown, display

 from cal_tools.gotthard2 import gotthard2lib

 matplotlib.use("agg")
 %matplotlib inline

 from cal_tools.tools import CalibrationMetadata
 ```

 %% Cell type:code id: tags:

 ``` python
+# Input parameters validation
+gotthard2lib.validate_sensor_type(sensor_type)
+```
+
+%% Cell type:code id: tags:
+
+``` python
 out_folder = Path(out_folder)
 metadata = CalibrationMetadata(metadata_folder or out_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 if not karabo_id_control:
    karabo_id_control = karabo_id
 g2ctrl = gotthard2lib.Gotthard2Ctrl(
    run_dc=RunDirectory(Path(in_folder) / f"r{run_high:04d}"),
    ctrl_src=ctrl_source_template.format(
        karabo_id_control=karabo_id_control,
        control_template=control_template
    ))
-gh2_detector = g2ctrl.get_det_type()
-print(f"Processing {gh2_detector} Gotthard2.")
+if not sensor_type:
+    sensor_type = g2ctrl.get_sensor_type()
+print(f"Processing {sensor_type} Gotthard2.")
 ```

 %% Cell type:code id: tags:

 ``` python
 mod_mapping = dict()
 for fn in Path(metadata_folder or out_folder).glob("module_metadata_*.yml"):
    with fn.open("r") as fd:
        fdict = yaml.safe_load(fd)
        mod_mapping[fdict["module"].replace("-", "/")] = fdict["pdu"]

 mod_mapping = dict(sorted(mod_mapping.items(), key=lambda item: item[0]))
 ```

 %% Cell type:code id: tags:

 ``` python
 plt_map = dict()
 dark_constants = ["Offset", "Noise", "BadPixelsDark"]
-if gh2_detector == "25um":
+if sensor_type == "25um":
    for cname in dark_constants:
        plt_map[cname] = np.zeros(
            (1280 * 2, 2, 3),
            dtype=np.uint32 if cname == "BadPixelsDark" else np.float32
        )
 ```

 %% Cell type:code id: tags:

 ``` python
 for cname in dark_constants:

    for i, (mod, pdu) in enumerate(mod_mapping.items()):
        module = mod.replace("-", "/")

        with h5py.File(out_folder / f"const_{cname}_{pdu}.h5", 'r') as f:
-            if gh2_detector == "25um":
+            if sensor_type == "25um":
                plt_map[cname][i::2] = f["data"][()]
            else:
                plt_map[cname] = f["data"][()]
 ```

 %% Cell type:code id: tags:

 ``` python
-if gh2_detector == "50um":
+if sensor_type == "50um":
    title = (
        f"{{}} data for "
        f"{[f'{mod}({pdu})' for mod, pdu in mod_mapping.items()]}"
    )
 else:
    title = (
        f"Interleaved {{}} data for "
        f"{[f'{mod}({pdu})' for mod, pdu in mod_mapping.items()]}"
    )

 for cname in dark_constants:
    display(Markdown(f"### {cname}"))

    for cell in [0, 1]:
        fig, ax = plt.subplots(figsize=(10, 5))
        for g_idx in [0, 1, 2]:
            ax.plot(plt_map[cname][:, cell, g_idx], label=f"G{g_idx} {cname} map")
        ax.set_xticks(
            np.arange(0, plt_map[cname].shape[0]+1, plt_map[cname].shape[0]//16)
        )
        ax.set_xlabel("Stripes #", fontsize=15)
        ax.set_ylabel("ADU", fontsize=15)
        ax.set_title(title.format(f"{cname} map - Cell {cell}"), fontsize=15)
        if cname == "BadPixelsDark":
            ax.set_ylim([0, 5])
        ax.legend()
        plt.show()
 ```

 %% Cell type:markdown id: tags:

 # Gotthard2 Dark Summary

 Author: European XFEL Detector Department, Version: 1.0

 Summary for process dark constants and a comparison with previously injected constants with the same conditions.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/DETLAB/202330/p900326/raw"  # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/gotthard2"  # path to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate.
 run_high = 20  # run number for G0 dark run, required
 run_med = 21  # run number for G1 dark run, required
 run_low = 22  # run number for G2 dark run, required

 # Parameters used to access raw data.
 karabo_id = "DETLAB_25UM_GH2"  # detector identifier.
 karabo_da = ["DA01/1", "DA01/2"]  # list of data aggregators, which corresponds to different JF modules. This is only needed for the detectors of one module.
 control_template = "CONTROL"  # control template used to read CONTROL keys.
 ctrl_source_template = '{karabo_id}/DET/{control_template}'  # template for control source name (filled with karabo_id_control)
 karabo_id_control = ""  # Control karabo ID. Set to empty string to use the karabo-id

+sensor_type = ""  # Specifies the GH2 sensor type: '50um', '25um', or "" to automatically decide from RUN data.
+
 # Parameters to be used for injecting dark calibration constants.
 local_output = True  # Boolean indicating that local constants were stored in the out_folder

 # Skip the whole notebook if local_output is false in the preceding notebooks.
 if not local_output:
    from cal_tools.tools import exit_notebook
    exit_notebook('No local constants saved. Skipping summary plots')
 ```

 %% Cell type:code id: tags:

 ``` python
 import warnings
 from pathlib import Path

 warnings.filterwarnings('ignore')

 import h5py
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
 import yaml
 from extra_data import RunDirectory
 from IPython.display import Markdown, display

 from cal_tools.gotthard2 import gotthard2lib

 matplotlib.use("agg")
 %matplotlib inline

 from cal_tools.tools import CalibrationMetadata
 ```

 %% Cell type:code id: tags:

 ``` python
+# Input parameters validation
+gotthard2lib.validate_sensor_type(sensor_type)
+```
+
+%% Cell type:code id: tags:
+
+``` python
 out_folder = Path(out_folder)
 metadata = CalibrationMetadata(metadata_folder or out_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 if not karabo_id_control:
    karabo_id_control = karabo_id
 g2ctrl = gotthard2lib.Gotthard2Ctrl(
    run_dc=RunDirectory(Path(in_folder) / f"r{run_high:04d}"),
    ctrl_src=ctrl_source_template.format(
        karabo_id_control=karabo_id_control,
        control_template=control_template
    ))
-gh2_detector = g2ctrl.get_det_type()
-print(f"Processing {gh2_detector} Gotthard2.")
+if not sensor_type:
+    sensor_type = g2ctrl.get_sensor_type()
+print(f"Processing {sensor_type} Gotthard2.")
 ```

 %% Cell type:code id: tags:

 ``` python
 mod_mapping = dict()
 for fn in Path(metadata_folder or out_folder).glob("module_metadata_*.yml"):
    with fn.open("r") as fd:
        fdict = yaml.safe_load(fd)
        mod_mapping[fdict["module"].replace("-", "/")] = fdict["pdu"]

 mod_mapping = dict(sorted(mod_mapping.items(), key=lambda item: item[0]))
 ```

 %% Cell type:code id: tags:

 ``` python
 plt_map = dict()
 dark_constants = ["Offset", "Noise", "BadPixelsDark"]
-if gh2_detector == "25um":
+if sensor_type == "25um":
    for cname in dark_constants:
        plt_map[cname] = np.zeros(
            (1280 * 2, 2, 3),
            dtype=np.uint32 if cname == "BadPixelsDark" else np.float32
        )
 ```

 %% Cell type:code id: tags:

 ``` python
 for cname in dark_constants:

    for i, (mod, pdu) in enumerate(mod_mapping.items()):
        module = mod.replace("-", "/")

        with h5py.File(out_folder / f"const_{cname}_{pdu}.h5", 'r') as f:
-            if gh2_detector == "25um":
+            if sensor_type == "25um":
                plt_map[cname][i::2] = f["data"][()]
            else:
                plt_map[cname] = f["data"][()]
 ```

 %% Cell type:code id: tags:

 ``` python
-if gh2_detector == "50um":
+if sensor_type == "50um":
    title = (
        f"{{}} data for "
        f"{[f'{mod}({pdu})' for mod, pdu in mod_mapping.items()]}"
    )
 else:
    title = (
        f"Interleaved {{}} data for "
        f"{[f'{mod}({pdu})' for mod, pdu in mod_mapping.items()]}"
    )

 for cname in dark_constants:
    display(Markdown(f"### {cname}"))

    for cell in [0, 1]:
        fig, ax = plt.subplots(figsize=(10, 5))
        for g_idx in [0, 1, 2]:
            ax.plot(plt_map[cname][:, cell, g_idx], label=f"G{g_idx} {cname} map")
        ax.set_xticks(
            np.arange(0, plt_map[cname].shape[0]+1, plt_map[cname].shape[0]//16)
        )
        ax.set_xlabel("Stripes #", fontsize=15)
        ax.set_ylabel("ADU", fontsize=15)
        ax.set_title(title.format(f"{cname} map - Cell {cell}"), fontsize=15)
        if cname == "BadPixelsDark":
            ax.set_ylim([0, 5])
        ax.legend()
        plt.show()
 ```

--- a/notebooks/Jungfrau/Jungfrau_Create_Fit_Spectra_Histos_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_Create_Fit_Spectra_Histos_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Histogram of single photon spectra

 Author: European XFEL Detector Group, Version: 1.0

 Creates histograms from flat fields and store it in HDF5 files then perform fitting based on the selecting `fit_func`.

 Histograms are on a pixel-by-pixel, memory cell by memory cell basis to save memory and space, histogram range are to be optimized around the desired peak.

 %% Cell type:code id: tags:

 ``` python
 in_folder = '/gpfs/exfel/exp/SPB/202330/p900343/raw'  # RAW data path, required
 out_folder = '/gpfs/exfel/data/scratch/ahmedk/test/remove/JF4M_SPB_gain/r66/no_offset'  # Output path for gain data, required
 metadata_folder = '.'  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 runs = [66]  # can be a list of runs
 sensor_size = [512, 1024]  # size of the array in the 'row' and 'col' dimensions
 chunked_trains = 1000  # Number of trains per chunk.
 gains = [0, 1, 2]  # gain bit values
 karabo_id = "SPB_IRDA_JF4M"  # karabo prefix of Jungfrau devices
 karabo_da = [""]  # Detector's data aggregators. Leave empty to derive from CALCAT.
 creation_time = ''  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"

 # Parameter conditions
 bias_voltage = -1  # bias voltage - Set to -1 to derive from CONTROL file.
 integration_time = -1.  # integration time - Set to -1 to derive from CONTROL file.
 gain_mode = -1  # number of memory cells - Set to -1 to derive from CONTROL file.
 gain_setting = -1  # gain setting parameter conditions (High or low CDS) - Set to -1 to derive from CONTROL file.
 memory_cells = -1  # number of memory cells - Set to -1 to derive from CONTROL file.
 sc_start = -1  # storage cell start value - should be derived from CONTROL file.

 h_bins_s = 2  # bin width in ADC units of the histogram
-h_range = (-50, 450.) # histogram range in ADC units
+h_range = [-50, 450.] # histogram range in ADC units
 block_size = [256, 64] # dimension of the chunks in 'row' and 'col'
 control_src_template = '{}/DET/CONTROL'
 extra_dims = ['cell', 'row', 'col']  # labels for the DataArray dims after the first
 fit_func = 'CHARGE_SHARING'  # function used to fit the single photon peak. The 3 available options are: CHARGE_SHARING, CHARGE_SHARING_2, and GAUSS
-fit_h_range = (150, 350.)  # range of the histogram in x-axis units used in fitting
+fit_h_range = [150, 350.]  # range of the histogram in x-axis units used in fitting

 # parameters for the peak finder
 n_sigma = 5.  # n of sigma above pedestal threshold
 ratio = 0.99  # ratio of the next peak amplitude in the peak_finder
 rebin = 1


 def find_das(in_folder, runs, karabo_da):
    run = runs[0]
    if (karabo_da is not None) and karabo_da != [""]:
        return karabo_da
    from pathlib import Path
    import re
    karabo_da = set()
    for file in Path(in_folder, f'r{run:04d}').iterdir():
        da = re.search(r'JNGFR\d{2}', file.name)
        if da:
            karabo_da.add(da.group())
    return sorted(karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import multiprocessing
 import time
 from functools import partial
 from logging import warning
 from pathlib import Path

 import numpy as np
 import pasha as psh
 from extra_data import RunDirectory
 from h5py import File as h5file

 import cal_tools.restful_config as rest_cfg
 from cal_tools.calcat_interface import JUNGFRAU_CalibrationData
 from cal_tools.jungfrau import jungfrau_ff
 from cal_tools.jungfrau.jungfraulib import JungfrauCtrl
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import calcat_creation_time
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 out_folder.mkdir(exist_ok=True, parents=True)
 print(f'Creating directory {out_folder}')
 ```

 %% Cell type:code id: tags:

 ``` python
 begin_stuff = time.localtime()
 control_src = control_src_template.format(karabo_id, karabo_da[0])
 first_run_folder = in_folder / f'r{runs[0]:04d}'
 ctrl_data = JungfrauCtrl(RunDirectory(first_run_folder), control_src)

 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, runs[0], creation_time)
 print(f"Creation time: {creation_time}")
 print("Chunked trains: ", chunked_trains)
 ```

 %% Cell type:code id: tags:

 ``` python
 def read_detector_conditions(
    run_dc,
    run_number,
    bias_voltage,
    memory_cells,
    integration_time,
    gain_mode,
    gain_setting,
 ):
    """Get parameter conditions for a run.

    Args:
        run_dc(extra_data.DataCollection):
        run_number(int):
        integration_time(float)
        bias_voltage(float):
        gain_mode(int):
        gain_setting(int):

    Returns:
        float, int, float, int, int, int
    """

    ## Retrieve DB constants
    ctrl_data = JungfrauCtrl(run_dc, control_src)

    if memory_cells < 0:
        memory_cells, sc_start = ctrl_data.get_memory_cells()
    if integration_time < 0:
        integration_time = ctrl_data.get_integration_time()
    if bias_voltage < 0:
        bias_voltage = ctrl_data.get_bias_voltage()
    if gain_setting < 0:
        gain_setting = ctrl_data.get_gain_setting()
    if gain_mode < 0:
        gain_mode = ctrl_data.get_gain_mode()
        if memory_cells == 16 and gain_mode == 0:
            warning(
                "Data is in burst mode and dynamic gain. Forcing gain mode to fixed gain.")
            gain_mode = 1

    return (
        bias_voltage,
        memory_cells,
        sc_start,
        integration_time,
        gain_mode,
        gain_setting,
        run_number,
    )

 condition_lists = {
    "integration_time": [],
    "memory_cells": [],
    "sc_start": [],
    "bias_voltage": [],
    "gain_setting": [],
    "gain_mode": [],
    "runs": [],
 }

 for run in runs:
    voltage, cells, sc_start, integration, mode, setting, run_number = read_detector_conditions(  # noqa
        RunDirectory(in_folder / f"r{run:04d}"),
        run,
        bias_voltage=bias_voltage,
        memory_cells=memory_cells,
        integration_time=integration_time,
        gain_mode=gain_mode,
        gain_setting=gain_setting)

    condition_lists["bias_voltage"].append(voltage)
    condition_lists["memory_cells"].append(cells)
    condition_lists["integration_time"].append(integration)
    condition_lists["gain_mode"].append(mode)
    condition_lists["gain_setting"].append(setting)
    condition_lists["sc_start"].append(sc_start)

 # Validate run conditions
 runs_results = condition_lists["runs"]
 for c, lst in condition_lists.items():
    if c == "runs":
        continue
    if not all(val == lst[0] for val in lst):
        warning(
            f"{c} is not the same for all runs: {lst} "
            f"for runs {runs_results}, respectively"
        )

 memory_cells = condition_lists["memory_cells"][0]
 sc_start = condition_lists["sc_start"][0]
 print(f"Memory cells: {memory_cells:d}")
 bias_voltage = condition_lists["bias_voltage"][0]
 print(f"Bias Voltage: {bias_voltage:3.1f} V")
 integration_time = condition_lists["integration_time"][0]
 print(f"Exposure Time: {integration_time:1.2f} us")
 gain_mode = condition_lists["gain_mode"][0]
 print(f"Gain mode: {gain_mode}")
 gain_setting = condition_lists["gain_setting"][0]
 print(f"Gain setting: {gain_setting}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve calibration constants
 jf_cal = JUNGFRAU_CalibrationData(
    detector_name=karabo_id,
    sensor_bias_voltage=bias_voltage,
    event_at=creation_time,
    modules=karabo_da,
    memory_cells=memory_cells,
    integration_time=integration_time,
    gain_setting=gain_setting,
    # Retrieve fixed gain constants for burst mode.
    # Dark MDL pushes fixed gain constants for burst mode.
    gain_mode=gain_mode,
    client=rest_cfg.calibration_client(),
 )

 jf_metadata = jf_cal.metadata(
    calibrations=["Offset10Hz", "Noise10Hz"])

 # TODO: display CCV timestamp
 const_data = jf_cal.ndarray_map(metadata=jf_metadata)
 jf_cal.display_markdown_retrieved_constants(metadata=jf_metadata)

 for mod in karabo_da:
    constants_data = const_data[mod]

    if "Offset10Hz" in constants_data:
        # From (512, 1024, 1, 3) to (3, 1, 512, 1024)
        constants_data["Offset10Hz"] = np.transpose(
            np.swapaxes(constants_data["Offset10Hz"], 0, 1)).astype(np.float32)
    else:
        raise ValueError(f"Offset constant is not found for {mod}.")

    if "Noise10Hz" in constants_data:
        # From (512, 1024, 1, 3) to (3, 1, 512, 1024)
        constants_data["Noise10Hz"] = np.transpose(
            np.swapaxes(constants_data["Noise10Hz"], 0, 1)).astype(np.float32)
    else:
        constants_data["Noise10Hz"] = None
        warning(f"Noise constant is not found for {mod}")
 ```

 %% Cell type:markdown id: tags:

 ## Filling the histogram
 * first it will retreive DB offsets for a given run
 * on a file-by-file basis it will perform offset subtraction and fill the histogram

 %% Cell type:markdown id: tags:

 ### Creating empty histogram

 %% Cell type:code id: tags:

 ``` python
 h_n_bins = int((h_range[1] - h_range[0])/h_bins_s)
 h_bins = np.linspace(h_range[0], h_range[1], h_n_bins)

 h_spectra = dict()
 edges = dict()
 for da in karabo_da:
    h_spectra[da] = np.zeros(
        (h_n_bins-1, memory_cells, sensor_size[0], sensor_size[1]),
        dtype=np.int32,
    )
    edges[da] = None
 ```

 %% Cell type:code id: tags:

 ``` python
 def correct_train(wid, index, d):
    """Offset correct per train."""

    g = gain[index].values
    # Jungfrau gains 0[00], 1[01], 3[11]
    # Change low gain to 2 for indexing purposes.
    g[g==3] = 2

    # Select memory cells
    if memory_cells > 1:
        """
        Even though it is correct to assume that memory cells pattern
        can be the same across all trains (for one correction run
        taken with one acquisition), it is preferred to not assume
        this to account for exceptions that can happen.
        """
        m = memcells[index].copy()
+
        # 255 is a cell value pointing to no cell image data (image of 0 pixels).
        # Corresponding image will be corrected with constant of cell 0. To avoid values of 0.
        # This line is depending on not storing the modified memory cells in the corrected data.
        m[m==255] = 0
-
        offset_map_cell = offset_map[:, m, ...]  # [16 + empty cell, x, y]
    else:
        offset_map_cell = offset_map[:, 0, ...]

    # Offset correction
    offset = np.choose(g, offset_map_cell)

    d -= offset
-    data_corr[index, ...] = d
+    # Sort correct data based on memory cell order.
+    sort_indices = np.argsort(m)
+    data_corr[index, ...] = d[sort_indices, ...]
+
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 n_cpus = multiprocessing.cpu_count()
 context = psh.context.ProcessContext(num_workers=n_cpus)
 print(f"Using {n_cpus} workers for correction.")
 ```

 %% Cell type:code id: tags:

 ``` python
 for run in runs:
    for da in karabo_da:
        det_src = f'{karabo_id}/DET/{da}:daqOutput'
        offset_map = const_data[da]["Offset10Hz"]
        run_folder = in_folder / f'r{run:04d}'
        ## Offset subtraction & Histogram filling
        ### performs offset subtraction andchunk_trains fills the histogram
        ### looping on individual files because single photon runs can be very large
        for dc_chunk in RunDirectory(
            run_folder, include=f"*{da}*"
        ).split_trains(trains_per_part=chunked_trains):

            trains = dc_chunk.get_array(det_src, 'data.trainId')
            memcells = dc_chunk.get_array(
                det_src,
                'data.memoryCell',
                extra_dims=extra_dims[0:1]).astype(np.int16)

            adc = dc_chunk.get_array(det_src, 'data.adc', extra_dims=extra_dims).astype(np.float32)
            gain = dc_chunk.get_array(det_src, 'data.gain', extra_dims=extra_dims)

            # gain = gain.where(gain < 2, other = 2).astype(np.int16)
            step_timer.start()

            # Allocate shared arrays for corrected data. Used in `correct_train()`
            data_corr = context.alloc(shape=adc.shape, dtype=np.float32)
            context.map(correct_train, adc)
            step_timer.done_step("correct_train")
            step_timer.start()
            chunks = jungfrau_ff.chunk_Multi([data_corr,], block_size)

            with multiprocessing.Pool() as pool:

                partial_fill = partial(jungfrau_ff.fill_histogram, h_bins)
                r_maps = pool.map(partial_fill, chunks)

                for i, r in enumerate(r_maps):
                    h_chk, e_chk = r
                    if edges[da] is None:
                        edges[da] = np.array(e_chk)

                    n_blocks_col = int(h_spectra[da].shape[-1]/block_size[1])
                    irow = int(np.floor(i/n_blocks_col)) * block_size[0]
                    icol = i%n_blocks_col * block_size[1]

                    h_spectra[da][..., irow:irow+block_size[0], icol:icol+block_size[1]] += h_chk
            step_timer.done_step("Histogram created")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:markdown id: tags:

 ### Saving histogram

 %% Cell type:code id: tags:

 ``` python
 for da in karabo_da:
    # transpose h_spectra for the following cells.
    h_spectra[da] = h_spectra[da]
    fout_h_path = f'{out_folder}/R{runs[0]:04d}_{proposal.upper()}_Gain_Spectra_{da}_Histo.h5'
    hists = h_spectra[da]
    with h5file(fout_h_path, 'w') as fout_h:
        print(f"Saving histograms at {fout_h_path}.")
        dset_h = fout_h.create_dataset("histos", data=hists)
        dset_e = fout_h.create_dataset("edges", data=edges[da])
        dset_noi = fout_h.create_dataset(
            "noise_map",
            data=const_data[da]["Noise10Hz"])
        fout_h.attrs["memory_cells"] = memory_cells
        fout_h.attrs["gain_setting"] = gain_setting
        fout_h.attrs["gain_mode"] = gain_mode
        fout_h.attrs["integration_time"] = integration_time
        fout_h.attrs["bias_voltage"] = bias_voltage
        fout_h.attrs["creation_time"] = str(creation_time)
 ```

 %% Cell type:markdown id: tags:

 ## Fitting histograms

 %% Cell type:code id: tags:

 ``` python
 fout_temp = f"R{runs[0]:04d}_{proposal.upper()}_Gain_Spectra_{{}}_{fit_func}_Fit.h5"

 for da in karabo_da:
    _edges = np.array(edges[da])
    x = (_edges[1:] + _edges[:-1]) / 2.0

    x, _h_spectra = jungfrau_ff.set_histo_range(x, h_spectra[da], fit_h_range)
    print(f'adjusting histogram range: {x[0]} - {x[-1]}')
    print(f'Histo shape updated from {h_spectra[da].shape} to {_h_spectra.shape}')
    print(f'x-axis: {x.shape}')
    chunks = jungfrau_ff.chunk_Multi([_h_spectra], block_size)
    pool = multiprocessing.Pool()

    partial_fit = partial(
        jungfrau_ff.fit_histogram,
        x,
        fit_func,
        n_sigma,
        rebin,
        ratio,
        const_data[da]["Noise10Hz"],
    )
    print("starting spectra fit")
    step_timer.start()
    with multiprocessing.Pool() as pool:
        r_maps = pool.map(partial_fit, chunks)
    step_timer.done_step("r_maps calculation")

    step_timer.start()
    g0_map = np.zeros((memory_cells, sensor_size[0], sensor_size[1]), dtype=np.float32)
    sigma_map = np.zeros((memory_cells, sensor_size[0], sensor_size[1]), dtype=np.float32)
    chi2ndf_map = np.zeros((memory_cells, sensor_size[0], sensor_size[1]), dtype=np.float32)
    alpha_map = np.zeros((memory_cells, sensor_size[0], sensor_size[1]), dtype=np.float32)
    for i, r in enumerate(r_maps):
        g0_chk, sigma_chk, chi2ndf_chk, alpha_chk = r

        n_blocks_col = int(g0_map.shape[-1] / block_size[1])
        irow = int(np.floor(i / n_blocks_col)) * block_size[0]
        icol = i % n_blocks_col * block_size[1]

        g0_map[..., irow : irow + block_size[0], icol : icol + block_size[1]] = g0_chk
        sigma_map[..., irow : irow + block_size[0], icol : icol + block_size[1]] = sigma_chk
        chi2ndf_map[
            ..., irow : irow + block_size[0], icol : icol + block_size[1]
        ] = chi2ndf_chk
        alpha_map[..., irow : irow + block_size[0], icol : icol + block_size[1]] = alpha_chk
    step_timer.done_step("Finished fitting")
    fout_path = f"{out_folder}/{fout_temp.format(da)}"

    step_timer.start()
    with h5file(fout_path, "w") as fout:
        dset_chi2 = fout.create_dataset("chi2map", data=np.transpose(chi2ndf_map))
        dset_gmap_fit = fout.create_dataset("gainMap_fit", data=np.transpose(g0_map))
        dset_std = fout.create_dataset("sigmamap", data=np.transpose(sigma_map))
        dset_alpha = fout.create_dataset("alphamap", data=np.transpose(alpha_map))
        dset_noi = fout.create_dataset(
            "noise_map",
            data=const_data[da]["Noise10Hz"])
        fout.attrs["memory_cells"] = memory_cells
        fout.attrs["integration_time"] = integration_time
        fout.attrs["bias_voltage"] = bias_voltage
        fout.attrs["gain_setting"] = gain_setting
        fout.attrs["gain_mode"] = gain_mode
        fout.attrs["creation_time"] = str(creation_time)
        fout.attrs["karabo_id"] = karabo_id
    step_timer.done_step("Saved fit results")
 ```

 %% Cell type:markdown id: tags:

 # Histogram of single photon spectra

 Author: European XFEL Detector Group, Version: 1.0

 Creates histograms from flat fields and store it in HDF5 files then perform fitting based on the selecting `fit_func`.

 Histograms are on a pixel-by-pixel, memory cell by memory cell basis to save memory and space, histogram range are to be optimized around the desired peak.

 %% Cell type:code id: tags:

 ``` python
 in_folder = '/gpfs/exfel/exp/SPB/202330/p900343/raw'  # RAW data path, required
 out_folder = '/gpfs/exfel/data/scratch/ahmedk/test/remove/JF4M_SPB_gain/r66/no_offset'  # Output path for gain data, required
 metadata_folder = '.'  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 runs = [66]  # can be a list of runs
 sensor_size = [512, 1024]  # size of the array in the 'row' and 'col' dimensions
 chunked_trains = 1000  # Number of trains per chunk.
 gains = [0, 1, 2]  # gain bit values
 karabo_id = "SPB_IRDA_JF4M"  # karabo prefix of Jungfrau devices
 karabo_da = [""]  # Detector's data aggregators. Leave empty to derive from CALCAT.
 creation_time = ''  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"

 # Parameter conditions
 bias_voltage = -1  # bias voltage - Set to -1 to derive from CONTROL file.
 integration_time = -1.  # integration time - Set to -1 to derive from CONTROL file.
 gain_mode = -1  # number of memory cells - Set to -1 to derive from CONTROL file.
 gain_setting = -1  # gain setting parameter conditions (High or low CDS) - Set to -1 to derive from CONTROL file.
 memory_cells = -1  # number of memory cells - Set to -1 to derive from CONTROL file.
 sc_start = -1  # storage cell start value - should be derived from CONTROL file.

 h_bins_s = 2  # bin width in ADC units of the histogram
-h_range = (-50, 450.) # histogram range in ADC units
+h_range = [-50, 450.] # histogram range in ADC units
 block_size = [256, 64] # dimension of the chunks in 'row' and 'col'
 control_src_template = '{}/DET/CONTROL'
 extra_dims = ['cell', 'row', 'col']  # labels for the DataArray dims after the first
 fit_func = 'CHARGE_SHARING'  # function used to fit the single photon peak. The 3 available options are: CHARGE_SHARING, CHARGE_SHARING_2, and GAUSS
-fit_h_range = (150, 350.)  # range of the histogram in x-axis units used in fitting
+fit_h_range = [150, 350.]  # range of the histogram in x-axis units used in fitting

 # parameters for the peak finder
 n_sigma = 5.  # n of sigma above pedestal threshold
 ratio = 0.99  # ratio of the next peak amplitude in the peak_finder
 rebin = 1


 def find_das(in_folder, runs, karabo_da):
    run = runs[0]
    if (karabo_da is not None) and karabo_da != [""]:
        return karabo_da
    from pathlib import Path
    import re
    karabo_da = set()
    for file in Path(in_folder, f'r{run:04d}').iterdir():
        da = re.search(r'JNGFR\d{2}', file.name)
        if da:
            karabo_da.add(da.group())
    return sorted(karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import multiprocessing
 import time
 from functools import partial
 from logging import warning
 from pathlib import Path

 import numpy as np
 import pasha as psh
 from extra_data import RunDirectory
 from h5py import File as h5file

 import cal_tools.restful_config as rest_cfg
 from cal_tools.calcat_interface import JUNGFRAU_CalibrationData
 from cal_tools.jungfrau import jungfrau_ff
 from cal_tools.jungfrau.jungfraulib import JungfrauCtrl
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import calcat_creation_time
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 out_folder.mkdir(exist_ok=True, parents=True)
 print(f'Creating directory {out_folder}')
 ```

 %% Cell type:code id: tags:

 ``` python
 begin_stuff = time.localtime()
 control_src = control_src_template.format(karabo_id, karabo_da[0])
 first_run_folder = in_folder / f'r{runs[0]:04d}'
 ctrl_data = JungfrauCtrl(RunDirectory(first_run_folder), control_src)

 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, runs[0], creation_time)
 print(f"Creation time: {creation_time}")
 print("Chunked trains: ", chunked_trains)
 ```

 %% Cell type:code id: tags:

 ``` python
 def read_detector_conditions(
    run_dc,
    run_number,
    bias_voltage,
    memory_cells,
    integration_time,
    gain_mode,
    gain_setting,
 ):
    """Get parameter conditions for a run.

    Args:
        run_dc(extra_data.DataCollection):
        run_number(int):
        integration_time(float)
        bias_voltage(float):
        gain_mode(int):
        gain_setting(int):

    Returns:
        float, int, float, int, int, int
    """

    ## Retrieve DB constants
    ctrl_data = JungfrauCtrl(run_dc, control_src)

    if memory_cells < 0:
        memory_cells, sc_start = ctrl_data.get_memory_cells()
    if integration_time < 0:
        integration_time = ctrl_data.get_integration_time()
    if bias_voltage < 0:
        bias_voltage = ctrl_data.get_bias_voltage()
    if gain_setting < 0:
        gain_setting = ctrl_data.get_gain_setting()
    if gain_mode < 0:
        gain_mode = ctrl_data.get_gain_mode()
        if memory_cells == 16 and gain_mode == 0:
            warning(
                "Data is in burst mode and dynamic gain. Forcing gain mode to fixed gain.")
            gain_mode = 1

    return (
        bias_voltage,
        memory_cells,
        sc_start,
        integration_time,
        gain_mode,
        gain_setting,
        run_number,
    )

 condition_lists = {
    "integration_time": [],
    "memory_cells": [],
    "sc_start": [],
    "bias_voltage": [],
    "gain_setting": [],
    "gain_mode": [],
    "runs": [],
 }

 for run in runs:
    voltage, cells, sc_start, integration, mode, setting, run_number = read_detector_conditions(  # noqa
        RunDirectory(in_folder / f"r{run:04d}"),
        run,
        bias_voltage=bias_voltage,
        memory_cells=memory_cells,
        integration_time=integration_time,
        gain_mode=gain_mode,
        gain_setting=gain_setting)

    condition_lists["bias_voltage"].append(voltage)
    condition_lists["memory_cells"].append(cells)
    condition_lists["integration_time"].append(integration)
    condition_lists["gain_mode"].append(mode)
    condition_lists["gain_setting"].append(setting)
    condition_lists["sc_start"].append(sc_start)

 # Validate run conditions
 runs_results = condition_lists["runs"]
 for c, lst in condition_lists.items():
    if c == "runs":
        continue
    if not all(val == lst[0] for val in lst):
        warning(
            f"{c} is not the same for all runs: {lst} "
            f"for runs {runs_results}, respectively"
        )

 memory_cells = condition_lists["memory_cells"][0]
 sc_start = condition_lists["sc_start"][0]
 print(f"Memory cells: {memory_cells:d}")
 bias_voltage = condition_lists["bias_voltage"][0]
 print(f"Bias Voltage: {bias_voltage:3.1f} V")
 integration_time = condition_lists["integration_time"][0]
 print(f"Exposure Time: {integration_time:1.2f} us")
 gain_mode = condition_lists["gain_mode"][0]
 print(f"Gain mode: {gain_mode}")
 gain_setting = condition_lists["gain_setting"][0]
 print(f"Gain setting: {gain_setting}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve calibration constants
 jf_cal = JUNGFRAU_CalibrationData(
    detector_name=karabo_id,
    sensor_bias_voltage=bias_voltage,
    event_at=creation_time,
    modules=karabo_da,
    memory_cells=memory_cells,
    integration_time=integration_time,
    gain_setting=gain_setting,
    # Retrieve fixed gain constants for burst mode.
    # Dark MDL pushes fixed gain constants for burst mode.
    gain_mode=gain_mode,
    client=rest_cfg.calibration_client(),
 )

 jf_metadata = jf_cal.metadata(
    calibrations=["Offset10Hz", "Noise10Hz"])

 # TODO: display CCV timestamp
 const_data = jf_cal.ndarray_map(metadata=jf_metadata)
 jf_cal.display_markdown_retrieved_constants(metadata=jf_metadata)

 for mod in karabo_da:
    constants_data = const_data[mod]

    if "Offset10Hz" in constants_data:
        # From (512, 1024, 1, 3) to (3, 1, 512, 1024)
        constants_data["Offset10Hz"] = np.transpose(
            np.swapaxes(constants_data["Offset10Hz"], 0, 1)).astype(np.float32)
    else:
        raise ValueError(f"Offset constant is not found for {mod}.")

    if "Noise10Hz" in constants_data:
        # From (512, 1024, 1, 3) to (3, 1, 512, 1024)
        constants_data["Noise10Hz"] = np.transpose(
            np.swapaxes(constants_data["Noise10Hz"], 0, 1)).astype(np.float32)
    else:
        constants_data["Noise10Hz"] = None
        warning(f"Noise constant is not found for {mod}")
 ```

 %% Cell type:markdown id: tags:

 ## Filling the histogram
 * first it will retreive DB offsets for a given run
 * on a file-by-file basis it will perform offset subtraction and fill the histogram

 %% Cell type:markdown id: tags:

 ### Creating empty histogram

 %% Cell type:code id: tags:

 ``` python
 h_n_bins = int((h_range[1] - h_range[0])/h_bins_s)
 h_bins = np.linspace(h_range[0], h_range[1], h_n_bins)

 h_spectra = dict()
 edges = dict()
 for da in karabo_da:
    h_spectra[da] = np.zeros(
        (h_n_bins-1, memory_cells, sensor_size[0], sensor_size[1]),
        dtype=np.int32,
    )
    edges[da] = None
 ```

 %% Cell type:code id: tags:

 ``` python
 def correct_train(wid, index, d):
    """Offset correct per train."""

    g = gain[index].values
    # Jungfrau gains 0[00], 1[01], 3[11]
    # Change low gain to 2 for indexing purposes.
    g[g==3] = 2

    # Select memory cells
    if memory_cells > 1:
        """
        Even though it is correct to assume that memory cells pattern
        can be the same across all trains (for one correction run
        taken with one acquisition), it is preferred to not assume
        this to account for exceptions that can happen.
        """
        m = memcells[index].copy()
+
        # 255 is a cell value pointing to no cell image data (image of 0 pixels).
        # Corresponding image will be corrected with constant of cell 0. To avoid values of 0.
        # This line is depending on not storing the modified memory cells in the corrected data.
        m[m==255] = 0
-
        offset_map_cell = offset_map[:, m, ...]  # [16 + empty cell, x, y]
    else:
        offset_map_cell = offset_map[:, 0, ...]

    # Offset correction
    offset = np.choose(g, offset_map_cell)

    d -= offset
-    data_corr[index, ...] = d
+    # Sort correct data based on memory cell order.
+    sort_indices = np.argsort(m)
+    data_corr[index, ...] = d[sort_indices, ...]
+
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 n_cpus = multiprocessing.cpu_count()
 context = psh.context.ProcessContext(num_workers=n_cpus)
 print(f"Using {n_cpus} workers for correction.")
 ```

 %% Cell type:code id: tags:

 ``` python
 for run in runs:
    for da in karabo_da:
        det_src = f'{karabo_id}/DET/{da}:daqOutput'
        offset_map = const_data[da]["Offset10Hz"]
        run_folder = in_folder / f'r{run:04d}'
        ## Offset subtraction & Histogram filling
        ### performs offset subtraction andchunk_trains fills the histogram
        ### looping on individual files because single photon runs can be very large
        for dc_chunk in RunDirectory(
            run_folder, include=f"*{da}*"
        ).split_trains(trains_per_part=chunked_trains):

            trains = dc_chunk.get_array(det_src, 'data.trainId')
            memcells = dc_chunk.get_array(
                det_src,
                'data.memoryCell',
                extra_dims=extra_dims[0:1]).astype(np.int16)

            adc = dc_chunk.get_array(det_src, 'data.adc', extra_dims=extra_dims).astype(np.float32)
            gain = dc_chunk.get_array(det_src, 'data.gain', extra_dims=extra_dims)

            # gain = gain.where(gain < 2, other = 2).astype(np.int16)
            step_timer.start()

            # Allocate shared arrays for corrected data. Used in `correct_train()`
            data_corr = context.alloc(shape=adc.shape, dtype=np.float32)
            context.map(correct_train, adc)
            step_timer.done_step("correct_train")
            step_timer.start()
            chunks = jungfrau_ff.chunk_Multi([data_corr,], block_size)

            with multiprocessing.Pool() as pool:

                partial_fill = partial(jungfrau_ff.fill_histogram, h_bins)
                r_maps = pool.map(partial_fill, chunks)

                for i, r in enumerate(r_maps):
                    h_chk, e_chk = r
                    if edges[da] is None:
                        edges[da] = np.array(e_chk)

                    n_blocks_col = int(h_spectra[da].shape[-1]/block_size[1])
                    irow = int(np.floor(i/n_blocks_col)) * block_size[0]
                    icol = i%n_blocks_col * block_size[1]

                    h_spectra[da][..., irow:irow+block_size[0], icol:icol+block_size[1]] += h_chk
            step_timer.done_step("Histogram created")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:markdown id: tags:

 ### Saving histogram

 %% Cell type:code id: tags:

 ``` python
 for da in karabo_da:
    # transpose h_spectra for the following cells.
    h_spectra[da] = h_spectra[da]
    fout_h_path = f'{out_folder}/R{runs[0]:04d}_{proposal.upper()}_Gain_Spectra_{da}_Histo.h5'
    hists = h_spectra[da]
    with h5file(fout_h_path, 'w') as fout_h:
        print(f"Saving histograms at {fout_h_path}.")
        dset_h = fout_h.create_dataset("histos", data=hists)
        dset_e = fout_h.create_dataset("edges", data=edges[da])
        dset_noi = fout_h.create_dataset(
            "noise_map",
            data=const_data[da]["Noise10Hz"])
        fout_h.attrs["memory_cells"] = memory_cells
        fout_h.attrs["gain_setting"] = gain_setting
        fout_h.attrs["gain_mode"] = gain_mode
        fout_h.attrs["integration_time"] = integration_time
        fout_h.attrs["bias_voltage"] = bias_voltage
        fout_h.attrs["creation_time"] = str(creation_time)
 ```

 %% Cell type:markdown id: tags:

 ## Fitting histograms

 %% Cell type:code id: tags:

 ``` python
 fout_temp = f"R{runs[0]:04d}_{proposal.upper()}_Gain_Spectra_{{}}_{fit_func}_Fit.h5"

 for da in karabo_da:
    _edges = np.array(edges[da])
    x = (_edges[1:] + _edges[:-1]) / 2.0

    x, _h_spectra = jungfrau_ff.set_histo_range(x, h_spectra[da], fit_h_range)
    print(f'adjusting histogram range: {x[0]} - {x[-1]}')
    print(f'Histo shape updated from {h_spectra[da].shape} to {_h_spectra.shape}')
    print(f'x-axis: {x.shape}')
    chunks = jungfrau_ff.chunk_Multi([_h_spectra], block_size)
    pool = multiprocessing.Pool()

    partial_fit = partial(
        jungfrau_ff.fit_histogram,
        x,
        fit_func,
        n_sigma,
        rebin,
        ratio,
        const_data[da]["Noise10Hz"],
    )
    print("starting spectra fit")
    step_timer.start()
    with multiprocessing.Pool() as pool:
        r_maps = pool.map(partial_fit, chunks)
    step_timer.done_step("r_maps calculation")

    step_timer.start()
    g0_map = np.zeros((memory_cells, sensor_size[0], sensor_size[1]), dtype=np.float32)
    sigma_map = np.zeros((memory_cells, sensor_size[0], sensor_size[1]), dtype=np.float32)
    chi2ndf_map = np.zeros((memory_cells, sensor_size[0], sensor_size[1]), dtype=np.float32)
    alpha_map = np.zeros((memory_cells, sensor_size[0], sensor_size[1]), dtype=np.float32)
    for i, r in enumerate(r_maps):
        g0_chk, sigma_chk, chi2ndf_chk, alpha_chk = r

        n_blocks_col = int(g0_map.shape[-1] / block_size[1])
        irow = int(np.floor(i / n_blocks_col)) * block_size[0]
        icol = i % n_blocks_col * block_size[1]

        g0_map[..., irow : irow + block_size[0], icol : icol + block_size[1]] = g0_chk
        sigma_map[..., irow : irow + block_size[0], icol : icol + block_size[1]] = sigma_chk
        chi2ndf_map[
            ..., irow : irow + block_size[0], icol : icol + block_size[1]
        ] = chi2ndf_chk
        alpha_map[..., irow : irow + block_size[0], icol : icol + block_size[1]] = alpha_chk
    step_timer.done_step("Finished fitting")
    fout_path = f"{out_folder}/{fout_temp.format(da)}"

    step_timer.start()
    with h5file(fout_path, "w") as fout:
        dset_chi2 = fout.create_dataset("chi2map", data=np.transpose(chi2ndf_map))
        dset_gmap_fit = fout.create_dataset("gainMap_fit", data=np.transpose(g0_map))
        dset_std = fout.create_dataset("sigmamap", data=np.transpose(sigma_map))
        dset_alpha = fout.create_dataset("alphamap", data=np.transpose(alpha_map))
        dset_noi = fout.create_dataset(
            "noise_map",
            data=const_data[da]["Noise10Hz"])
        fout.attrs["memory_cells"] = memory_cells
        fout.attrs["integration_time"] = integration_time
        fout.attrs["bias_voltage"] = bias_voltage
        fout.attrs["gain_setting"] = gain_setting
        fout.attrs["gain_mode"] = gain_mode
        fout.attrs["creation_time"] = str(creation_time)
        fout.attrs["karabo_id"] = karabo_id
    step_timer.done_step("Saved fit results")
 ```

--- a/notebooks/Jungfrau/Jungfrau_Create_Gain_maps_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_Create_Gain_maps_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Create Gain Map

 Author: European XFEL Detector Group, Version: 1.0

 Converts the map with fit value of the photon peak into a gain conversion factor map.

 Calculates the bad pixels map according to:

 - Failed fit
 - Not enough entries in the fit
 - Gain value deviation too high

 %% Cell type:code id: tags:

 ``` python
 in_folder = '/gpfs/exfel/exp/MID/202330/p900322/raw'  # RAW data path, required
 fit_data_folder = "/gpfs/exfel/data/scratch/ahmedk/test/jf_ff/gain_maps"  # Output path for gain data, required
 metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 runs = [94]  # Number of high gain

 karabo_id = 'MID_EXP_JF500K2'  # karabo prefix of Jungfrau devices
 karabo_da = ['']
 fit_func = 'CHARGE_SHARING'  # function used to fit the single photon peak. The 3 available options are: CHARGE_SHARING, CHARGE_SHARING_2, and GAUSS
 gains = [0, 1, 2]

 # Parameter conditions
 bias_voltage = -1  # detector bias voltage
 integration_time = -1.  # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = -1  # gain setting parameter conditions (High or low CDS) - Set to -1 to derive from CONTROL file.
 gain_mode = -1  # number of memory cells - Set to -1 to derive from CONTROL file.
 memory_cells = -1  # number of memory cells - Set to -1 to derive from CONTROL file.
 # Condition limits
 bias_voltage_lims = [0, 200]  # Acceptable deviations for bias voltages operating conditions.
 integration_time_lims = [0.1, 1000]  # Acceptable deviations for integration time operating conditions.

 adc_fit = True
 strixel_sensor = ""  # reordering for strixel detector layout. Possible strixels to choose from are A0123 and A1256.
 spectra_fit_temp = 'R{:04d}_{}_Gain_Spectra_{}_{}_Fit.h5'  # 'R{:04d}_{proposal.upper()}_Gain_Spectra_{da}_{fit_func}_Fit.h5'
 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
 g_map_old = ''  # '/gpfs/exfel/data/user/mramilli/jungfrau/module_PSI_gainmaps/M302/gainMaps_M302_2022-02-23.h5'  # old gain map file path to calculate gain ratios G0/G1 and G1/G2. Set to "" to get last gain constant from the database.
 old_gain_dataset_name = 'gain_map_g0'  # name of the data structure in the old gain map
 correct_offset = False  # correct the photon peak value with a pedestal fit position
 db_output = False  # A boolean to inject the gain maps to the database.
 local_output = True  # A boolean to store a local version of the constants in the out-folder
 g0_fit_dataset = 'gainMap_fit'  # name of the data structure in the fit files
 E_ph = 8.048  # photon energy of the peak fitted
 badpixel_threshold_sigma = 3.  # number of std in gain distribution to mark bad pixels
 roi = [0, 1024, 0, 512]  # ROI to consider to evaluate gain distribution (for bad pixels evaluation)

 # CALCAT API parameters
 cal_db_interface = "tcp://max-exfl-cal001:8020"  # the database interface to use
 cal_db_timeout = 180000


 def find_das(in_folder, runs, karabo_da):
    run = runs[0]
    if (karabo_da is not None) and karabo_da != [""]:
        return karabo_da
    from pathlib import Path
    import re
    karabo_da = set()
    for file in Path(in_folder, f'r{run:04d}').iterdir():
        da = re.search(r'JNGFR\d{2}', file.name)
        if da:
            karabo_da.add(da.group())
    return sorted(karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import matplotlib.pyplot as plt
 import numpy as np
 from IPython.display import Markdown, display
 from h5py import File as h5file
 from logging import warning

 import cal_tools.restful_config as rest_cfg

 from pathlib import Path

 import matplotlib.pyplot as plt
 from extra_data import RunDirectory
 from iCalibrationDB import Conditions, Constants
 from XFELDetAna.plotting.heatmap import heatmapPlot

 from cal_tools.calcat_interface import JUNGFRAU_CalibrationData
 from cal_tools.enums import BadPixels
 from cal_tools.jungfrau.jfstrixel import from_strixel
 from cal_tools.tools import (
    calcat_creation_time,
    get_report,
    raw_data_location_string,
    save_const_to_h5,
    send_to_db,
 )

 %matplotlib inline
 ```

 %% Cell type:markdown id: tags:

 ## Open the photon peak fit file

 %% Cell type:code id: tags:

 ``` python
 run = runs[0]
 fit_data_folder = Path(fit_data_folder)
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = raw_data_location_string(proposal=proposal, runs=runs)
 report = get_report(metadata_folder)
 in_folder = Path(in_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 g0_new = dict()
 for da in karabo_da:
    with h5file(
        fit_data_folder / spectra_fit_temp.format(run, proposal.upper(), da, fit_func),
        'r'
    ) as f:
        if strixel_sensor:
            g0_new[da] = np.transpose(
                from_strixel(np.transpose(f[g0_fit_dataset][()])),
                kind=strixel_sensor)
        else:
            g0_new[da] = f[g0_fit_dataset][()]

        print("Fit gain data shape:", g0_new[da].shape)

        if integration_time < 0  and 'integration_time' in f.attrs.keys():
            integration_time = np.float32(f.attrs['integration_time'])
            print(f'Integration time: {integration_time}')
        else:
            print(f'integration time not found!\nUsing default value {integration_time}')

        if bias_voltage < 0 and 'bias_voltage' in f.attrs.keys():
            bias_voltage = np.float32(f.attrs['bias_voltage'])
            print(f'Bias voltage: {bias_voltage}')
        else:
            print(f'Bias voltage not found!\nUsing default value {bias_voltage}')

        if gain_setting < 0 and 'gain_setting' in f.attrs.keys():
            gain_setting = bool(f.attrs['gain_setting'])
            print(f'Gain setting: {gain_setting}')
        else:
            print(f'Gain setting not found!\nUsing default value {gain_setting}')

        if gain_mode < 0 and 'gain_mode' in f.attrs.keys():
            gain_mode = bool(f.attrs['gain_mode'])
            print(f'Gain mode: {gain_mode}')
        else:
            print(f'Gain mode not found!\nUsing default value {gain_mode}')

        if memory_cells < 0 and 'memory_cells' in f.attrs.keys():
            memory_cells = f.attrs['memory_cells']
            print(f'Memory cells: {memory_cells}')
        else:
            print(f'Memory cells not found!\nUsing default value {memory_cells}')

        # Run's creation time:
        creation_time = calcat_creation_time(in_folder, run, creation_time)
        print(f"Creation time: {creation_time}")
 ```

 %% Cell type:markdown id: tags:

 ## Plot Spectra fit map

 %% Cell type:code id: tags:

 ``` python
 for da in karabo_da:
    for cell in range(g0_new[da].shape[-1]):
        vmin, vmax = np.percentile(g0_new[da][..., cell], [5, 95])
        fig = heatmapPlot(
            np.swapaxes(g0_new[da][..., cell], 0, 1),
            y_label="Row",
            x_label="Column",
            lut_label="Output [keV]",
            title=f'Cell {cell} for module {da}',
            aspect=1.,
            log=False,
            vmin=vmin,
            vmax=vmax,
            panel_top_low_lim=35.,
            panel_top_high_lim=45.,
            panel_side_low_lim=35.,
            panel_side_high_lim=45.,
        )
        plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Retrieve the old gain map

 Currently only the high gain map can be generated. To calculate the new medium and low gain maps, we depend on the past gain maps to calculate both gain maps from the ratio on G0/G1 and G1/G2. If we can't use any old gain maps, it would be impossible to calculate gain maps for medium and low gain at the moment.

 %% Cell type:code id: tags:

 ``` python
 jf_cal = JUNGFRAU_CalibrationData(
    detector_name=karabo_id,
    sensor_bias_voltage=bias_voltage,
    event_at=None,
    modules=karabo_da,
    memory_cells=memory_cells,
    integration_time=integration_time,
    gain_setting=gain_setting,
-    gain_mode=gain_mode,
+    gain_mode=0,  # Always retrieve dynamic RelativeGain constant
    client=rest_cfg.calibration_client(),
 )
 gmap_old = dict()
 gmap_new = dict()
 bad_pixel_maps = dict()

 da_to_pdu = {}
 for mod_info in jf_cal.physical_detector_units.values():
    da_to_pdu[mod_info["karabo_da"]] = mod_info["physical_name"]

 jf_metadata = jf_cal.metadata(calibrations=["RelativeGain10Hz"])
 const_data = jf_cal.ndarray_map(metadata=jf_metadata)
 jf_cal.display_markdown_retrieved_constants(metadata=jf_metadata)

 for da in karabo_da:
    if const_data[da].get("RelativeGain10Hz") is not None:
        gmap_old[da] = np.array(
            const_data[da]["RelativeGain10Hz"],
            dtype=np.float32,
        )
-        print('Retrieved array of data:', gmap_old[da].shape)  # TODO: WHY ARE SHAPES BEING PRINTED?  I WILL REMOVE THIS
    else:
        gmap_old[da] = None
        warning("Gain not found")

    gmap_new[da] = np.zeros(
        (
            gmap_old[da].shape[0],
            gmap_old[da].shape[1],
            g0_new[da].shape[2],
            len(gains)
        ),
        dtype=np.float32)
    bad_pixel_maps[da] = np.zeros(gmap_new[da].shape, np.uint32)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create masks for failed fit and no entry pixels
 for da in karabo_da:
    no_entries = np.logical_and(g0_new[da] < 0., g0_new[da] > -2.) # no entries in pixel is marked with -1. in the gain map
    failed_fit = g0_new[da] < -999. # failed fit are marked with -1000. in the gain map

    # replace placeholder values with nan
    g0_new[da][no_entries] = np.nan
    g0_new[da][failed_fit] = np.nan

    no_entries = np.broadcast_to(np.expand_dims(no_entries, axis=len(no_entries.shape)), gmap_new[da].shape)
    failed_fit = np.broadcast_to(np.expand_dims(failed_fit, axis=len(failed_fit.shape)), gmap_new[da].shape)

    bad_pixel_maps[da][no_entries] |= BadPixels.FF_NO_ENTRIES.value
    bad_pixel_maps[da][failed_fit] |= BadPixels.FF_GAIN_EVAL_ERROR.value
 ```

 %% Cell type:code id: tags:

 ``` python
 _n = 0
 for da in karabo_da:
    for col in range(10, 200):
        for row in range(10, 200):
            if bad_pixel_maps[da][col, row, 0, 0] == BadPixels.FF_GAIN_EVAL_ERROR.value and _n < 20:
                print(f'Module {da}: Column-{col}, Row-{row}')
                _n += 1
 ```

 %% Cell type:markdown id: tags:

 ## Calculate gain

 %% Cell type:code id: tags:

 ``` python
 for da in karabo_da:

    if correct_offset:
        display(Markdown("#### Correct with better pedestal estimation"))
        with h5file(
            fit_data_folder / spectra_fit_temp.format(run, proposal.upper(), da, fit_func),
            'r'
        ) as fc:
            corr = np.array(fc[g0_fit_dataset])
            if strixel_sensor:
                corr = np.transpose(
                    from_strixel(np.transpose(corr)), kind=strixel_sensor)

            g0_new[da] -= corr

    # Calculate gain map in ADC units/keV
    g0_new[da] = g0_new[da] / E_ph

    if adc_fit:
        # Calculate gain ratios from old gain map
        g_zeros = np.zeros((gmap_old[da].shape[0], gmap_old[da].shape[1]), dtype=np.float32)

        g1g0 = g_zeros.copy()
        g2g1 = g_zeros.copy()

        g1g0 = gmap_old[da][..., 0, 1] / gmap_old[da][..., 0, 0]
        g2g1 = gmap_old[da][..., 0, 2] / gmap_old[da][..., 0, 1]

        # Calculate new G1 and G2 values from gain ratios and G0 fit value
        for cell in range(0, gmap_new[da].shape[2]):
            gmap_new[da][..., cell, 0] = g0_new[da][..., cell]
            gmap_new[da][..., cell, 1] = g0_new[da][..., cell] * g1g0
            gmap_new[da][..., cell, 2] = g0_new[da][..., cell] * g1g0 * g2g1
    else:
        g_ratio = np.broadcast_to(np.expand_dims(g0_new[da], axis=3), gmap_old[da].shape)
        gmap_new[da] = g_ratio * gmap_old[da]
 ```

 %% Cell type:code id: tags:

 ``` python
 # plotting the gain distribution per memory cell
 # the vertical bars indicate the cuts

 n_bins = 100
 props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
 for da in karabo_da:
    for cell in range(0, gmap_new[da].shape[2]):
        x_low = np.nanmin(gmap_new[da][..., cell, 0])
        x_up = np.nanmax(gmap_new[da][..., cell, 0])
        #x_low = 20.
        #x_up = 50.
        x_bins = np.linspace(x_low, x_up, n_bins)
        mdn = np.nanmedian(gmap_new[da][..., cell, 0])
        std = np.nanstd(gmap_new[da][..., cell, 0])
        _h, _e = np.histogram(gmap_new[da][..., cell, 0].ravel(), bins=x_bins)
        _x = (_e[1:] + _e[:-1])/2.
        f, a = plt.subplots()
        a.plot(_x, _h, drawstyle='steps', linewidth=3)
        a.axvline(x=mdn+badpixel_threshold_sigma*std, linestyle='--')
        a.axvline(x=mdn-badpixel_threshold_sigma*std, linestyle='--')
        a.grid(True)

        textstr = '\n'.join((
                r'$\mathrm{median}=%.2e$' % (mdn),
                r'$\sigma=%.2e$' % (std)))

        a.text(
            0.05,
            0.95,
            textstr,
            transform=a.transAxes,
            fontsize=20,
            verticalalignment='center',
            bbox=props,
        )

        a.set_yscale('log')
        a.set_xlabel('Gain [ADCu/keV]', fontsize=20)
        plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 def replace_with_median(data, bpix_map):
    """
    replaces the bad pixels with the median value of the other pixels
    args:
    data (array, float): map in; with shape (column, row, cell, gain)
    bpix_map (array, uint32): bad pixel map

    returns the new map
    """
    b_p_mask = bpix_map
    b_p_mask[b_p_mask > 0] = 1
    mskd = np.ma.MaskedArray(
        data, b_p_mask,
        hard_mask=True,
        keep_mask=False,
        fill_value=np.nan
    ).filled()
    median_value = np.nanmedian(mskd, axis=(0, 1))
    for cell in range(median_value.shape[0]):
        for gain in range(median_value.shape[1]):
            mskd[..., cell, gain][bpix_map[..., cell, gain] > 0] = median_value[cell, gain]

    return mskd


 def eval_bpidx(d_in, badpixel_threshold_sigma=badpixel_threshold_sigma, roi=roi):
    """
    evaluates the indexes of the pixels with high deviation from the median value
    args:
    * d_in (array, float): the map with the values; with shape (columns, rows, cells, gain)
    * badpixel_threshold_sigma (float): number of std outside which a value indicates a bad pixel
    * roi (int, tuple): ROI to use to calculate median and std: (col_min, col_max, row_min, row_max)

    returns mask where True indicates bad pixel
    """
    d_eval = d_in[roi[0]:roi[1], roi[2]:roi[3]]
    mdn = np.nanmedian(d_eval, axis=(0, 1))[None, None, :, :]
    std = np.nanstd(d_eval, axis=(0, 1))[None, None, :, :]
    idx = (d_in > badpixel_threshold_sigma*std+mdn) | (d_in < (-badpixel_threshold_sigma)*std+mdn)

    return idx
 ```

 %% Cell type:code id: tags:

 ``` python
 for da in karabo_da:
    # Create map of bad pixels with too high gain deviation
    idxs = eval_bpidx(gmap_new[da])
    bad_pixel_maps[da][idxs] |= BadPixels.FF_GAIN_DEVIATION.value

    # Replace all the bad pixels with the median gain value across the module
    gmap_new[da] = replace_with_median(gmap_new[da], bad_pixel_maps[da])
 ```

 %% Cell type:markdown id: tags:

 ### New gain map

 #### Plot new gain map for the first gain and for each cell

 %% Cell type:code id: tags:

 ``` python
 for da in karabo_da:
    for cell in range(gmap_new[da].shape[-2]):
        vmin, vmax = np.percentile(gmap_new[da][..., cell, 0], [5, 95])
        fig = heatmapPlot(
            np.swapaxes(gmap_new[da][..., cell, 0], 0, 1),
            y_label="Row",
            x_label="Column",
            lut_label="Output [keV]",
            title=f'Cell {cell} for module {da}({da_to_pdu[da]})',
            aspect=1.,
            log=False,
            vmin=vmin,
            vmax=vmax,
            panel_top_low_lim=25.,
            panel_top_high_lim=45.,
            panel_side_low_lim=25.,
            panel_side_high_lim=45.,
        )
        plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Save and Inject gain map

 %% Cell type:code id: tags:

 ``` python
-for mode in [0, 1]:
-    # Inject constants for fixed and adaptive gain maps.
-    conditions = Conditions.Dark.jungfrau(
-        memory_cells=memory_cells,
-        bias_voltage=bias_voltage,
-        integration_time=integration_time,
-        gain_setting=gain_setting,
-        gain_mode=mode,
-    )
-    for parm in conditions.parameters:
-        if parm.name == "Integration Time":
-            print('Setting integration time limits.')
-            parm.lower_deviation = integration_time - integration_time_lims[0]
-            parm.upper_deviation = integration_time_lims[1] - integration_time
-
-    for parm in conditions.parameters:
-        if parm.name == "Sensor Bias Voltage":
-            print('Setting bias voltage limits.')
-            parm.lower_deviation = bias_voltage - bias_voltage_lims[0]
-            parm.upper_deviation = bias_voltage_lims[1] - bias_voltage
+
+def create_constants(bad_pixels, gmap, gain_mode):
    constants = {
-        "BadPixelsFF": bad_pixel_maps[da],  # 512, 1024, 1, 3
+        "BadPixelsFF": bad_pixels,  # 512, 1024, 1, 3
+        "RelativeGain": gmap.copy() if gain_mode else gmap
    }
-    if mode:
-        # 512, 1024, 1, 3
-        constants["RelativeGain"] = gmap_new[da].copy()
+    if gain_mode:
        constants["RelativeGain"][..., 1:] *= -1
-    else:
-        constants["RelativeGain"] = gmap_new[da]
+    return constants

+for gain_mode in [0, 1]:
    for da in karabo_da:
+        # Inject constants for fixed and adaptive gain maps.
+        conditions = Conditions.Dark.jungfrau(
+            memory_cells=memory_cells,
+            bias_voltage=bias_voltage,
+            integration_time=integration_time,
+            gain_setting=gain_setting,
+            gain_mode=gain_mode,
+        )
+
+        for parm in conditions.parameters:
+            if parm.name == "Integration Time":
+                print('Setting integration time limits.')
+                parm.lower_deviation = integration_time - integration_time_lims[0]
+                parm.upper_deviation = integration_time_lims[1] - integration_time
+            elif parm.name == "Sensor Bias Voltage":
+                print('Setting bias voltage limits.')
+                parm.lower_deviation = bias_voltage - bias_voltage_lims[0]
+                parm.upper_deviation = bias_voltage_lims[1] - bias_voltage
+
+        constants = create_constants(
+            bad_pixels=bad_pixel_maps[da],
+            gmap=gmap_new[da],
+            gain_mode=gain_mode,
+        )
+
        pdu = da_to_pdu[da]
        for cname, cdata in constants.items():
            if db_output:
-                print(f"Storing maps for {'fixed' if mode else 'adaptive'} gain mode.")
-                # Inject both fixed and adaptive gain maps.
-                for parm in conditions.parameters:
-                    if parm.name == "Gain mode":
-                        parm.value = mode
-
+                print(f"Storing maps for {'fixed' if gain_mode else 'adaptive'} gain mode.")
                const = getattr(Constants.jungfrau, cname)()
                const.data = cdata
                send_to_db(
                    db_module=pdu,
                    karabo_id=karabo_id,
                    constant=const,
                    condition=conditions,
                    file_loc=file_loc,
                    report_path=report,
                    cal_db_interface=cal_db_interface,
                    creation_time=creation_time,
                )
-            if local_output and mode == 0:  # Store locally adaptive gain maps only.
-                md = save_const_to_h5(
+
+            if local_output and gain_mode == 0:  # Store locally adaptive gain maps only.
+                save_const_to_h5(
                    db_module=pdu,
                    karabo_id=karabo_id,
                    constant=getattr(Constants.jungfrau, cname)(),
                    condition=conditions,
                    data=cdata,
                    file_loc=file_loc,
                    report=report,
                    creation_time=creation_time,
                    out_folder=fit_data_folder,
                )
                print(f"Calibration constant {cname} is stored locally at {fit_data_folder}.\n")
 ```

 %% Cell type:markdown id: tags:

 ## Plot Gain map for the 1st cell and each gain

 %% Cell type:code id: tags:

 ``` python
 for da in karabo_da:
    for g in gains:  # TODO: Isn't it always 3 gains??
        vmin, vmax = np.percentile(gmap_new[da][..., 0, g], [5, 95])
        f_im = heatmapPlot(
            np.swapaxes(gmap_new[da][..., 0, g], 0, 1),
            title=f"1st cell of G{g} gain map for {da}({da_to_pdu[da]})",
            y_label="Row",
            x_label="Column",
            lut_label="G{:01d}[ADCu/keV]".format(g),
            aspect=1.,
            vmin=vmin,
            vmax=vmax,
        )
        plt.show()
 ```

 %% Cell type:markdown id: tags:

 # Create Gain Map

 Author: European XFEL Detector Group, Version: 1.0

 Converts the map with fit value of the photon peak into a gain conversion factor map.

 Calculates the bad pixels map according to:

 - Failed fit
 - Not enough entries in the fit
 - Gain value deviation too high

 %% Cell type:code id: tags:

 ``` python
 in_folder = '/gpfs/exfel/exp/MID/202330/p900322/raw'  # RAW data path, required
 fit_data_folder = "/gpfs/exfel/data/scratch/ahmedk/test/jf_ff/gain_maps"  # Output path for gain data, required
 metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 runs = [94]  # Number of high gain

 karabo_id = 'MID_EXP_JF500K2'  # karabo prefix of Jungfrau devices
 karabo_da = ['']
 fit_func = 'CHARGE_SHARING'  # function used to fit the single photon peak. The 3 available options are: CHARGE_SHARING, CHARGE_SHARING_2, and GAUSS
 gains = [0, 1, 2]

 # Parameter conditions
 bias_voltage = -1  # detector bias voltage
 integration_time = -1.  # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = -1  # gain setting parameter conditions (High or low CDS) - Set to -1 to derive from CONTROL file.
 gain_mode = -1  # number of memory cells - Set to -1 to derive from CONTROL file.
 memory_cells = -1  # number of memory cells - Set to -1 to derive from CONTROL file.
 # Condition limits
 bias_voltage_lims = [0, 200]  # Acceptable deviations for bias voltages operating conditions.
 integration_time_lims = [0.1, 1000]  # Acceptable deviations for integration time operating conditions.

 adc_fit = True
 strixel_sensor = ""  # reordering for strixel detector layout. Possible strixels to choose from are A0123 and A1256.
 spectra_fit_temp = 'R{:04d}_{}_Gain_Spectra_{}_{}_Fit.h5'  # 'R{:04d}_{proposal.upper()}_Gain_Spectra_{da}_{fit_func}_Fit.h5'
 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
 g_map_old = ''  # '/gpfs/exfel/data/user/mramilli/jungfrau/module_PSI_gainmaps/M302/gainMaps_M302_2022-02-23.h5'  # old gain map file path to calculate gain ratios G0/G1 and G1/G2. Set to "" to get last gain constant from the database.
 old_gain_dataset_name = 'gain_map_g0'  # name of the data structure in the old gain map
 correct_offset = False  # correct the photon peak value with a pedestal fit position
 db_output = False  # A boolean to inject the gain maps to the database.
 local_output = True  # A boolean to store a local version of the constants in the out-folder
 g0_fit_dataset = 'gainMap_fit'  # name of the data structure in the fit files
 E_ph = 8.048  # photon energy of the peak fitted
 badpixel_threshold_sigma = 3.  # number of std in gain distribution to mark bad pixels
 roi = [0, 1024, 0, 512]  # ROI to consider to evaluate gain distribution (for bad pixels evaluation)

 # CALCAT API parameters
 cal_db_interface = "tcp://max-exfl-cal001:8020"  # the database interface to use
 cal_db_timeout = 180000


 def find_das(in_folder, runs, karabo_da):
    run = runs[0]
    if (karabo_da is not None) and karabo_da != [""]:
        return karabo_da
    from pathlib import Path
    import re
    karabo_da = set()
    for file in Path(in_folder, f'r{run:04d}').iterdir():
        da = re.search(r'JNGFR\d{2}', file.name)
        if da:
            karabo_da.add(da.group())
    return sorted(karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import matplotlib.pyplot as plt
 import numpy as np
 from IPython.display import Markdown, display
 from h5py import File as h5file
 from logging import warning

 import cal_tools.restful_config as rest_cfg

 from pathlib import Path

 import matplotlib.pyplot as plt
 from extra_data import RunDirectory
 from iCalibrationDB import Conditions, Constants
 from XFELDetAna.plotting.heatmap import heatmapPlot

 from cal_tools.calcat_interface import JUNGFRAU_CalibrationData
 from cal_tools.enums import BadPixels
 from cal_tools.jungfrau.jfstrixel import from_strixel
 from cal_tools.tools import (
    calcat_creation_time,
    get_report,
    raw_data_location_string,
    save_const_to_h5,
    send_to_db,
 )

 %matplotlib inline
 ```

 %% Cell type:markdown id: tags:

 ## Open the photon peak fit file

 %% Cell type:code id: tags:

 ``` python
 run = runs[0]
 fit_data_folder = Path(fit_data_folder)
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = raw_data_location_string(proposal=proposal, runs=runs)
 report = get_report(metadata_folder)
 in_folder = Path(in_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 g0_new = dict()
 for da in karabo_da:
    with h5file(
        fit_data_folder / spectra_fit_temp.format(run, proposal.upper(), da, fit_func),
        'r'
    ) as f:
        if strixel_sensor:
            g0_new[da] = np.transpose(
                from_strixel(np.transpose(f[g0_fit_dataset][()])),
                kind=strixel_sensor)
        else:
            g0_new[da] = f[g0_fit_dataset][()]

        print("Fit gain data shape:", g0_new[da].shape)

        if integration_time < 0  and 'integration_time' in f.attrs.keys():
            integration_time = np.float32(f.attrs['integration_time'])
            print(f'Integration time: {integration_time}')
        else:
            print(f'integration time not found!\nUsing default value {integration_time}')

        if bias_voltage < 0 and 'bias_voltage' in f.attrs.keys():
            bias_voltage = np.float32(f.attrs['bias_voltage'])
            print(f'Bias voltage: {bias_voltage}')
        else:
            print(f'Bias voltage not found!\nUsing default value {bias_voltage}')

        if gain_setting < 0 and 'gain_setting' in f.attrs.keys():
            gain_setting = bool(f.attrs['gain_setting'])
            print(f'Gain setting: {gain_setting}')
        else:
            print(f'Gain setting not found!\nUsing default value {gain_setting}')

        if gain_mode < 0 and 'gain_mode' in f.attrs.keys():
            gain_mode = bool(f.attrs['gain_mode'])
            print(f'Gain mode: {gain_mode}')
        else:
            print(f'Gain mode not found!\nUsing default value {gain_mode}')

        if memory_cells < 0 and 'memory_cells' in f.attrs.keys():
            memory_cells = f.attrs['memory_cells']
            print(f'Memory cells: {memory_cells}')
        else:
            print(f'Memory cells not found!\nUsing default value {memory_cells}')

        # Run's creation time:
        creation_time = calcat_creation_time(in_folder, run, creation_time)
        print(f"Creation time: {creation_time}")
 ```

 %% Cell type:markdown id: tags:

 ## Plot Spectra fit map

 %% Cell type:code id: tags:

 ``` python
 for da in karabo_da:
    for cell in range(g0_new[da].shape[-1]):
        vmin, vmax = np.percentile(g0_new[da][..., cell], [5, 95])
        fig = heatmapPlot(
            np.swapaxes(g0_new[da][..., cell], 0, 1),
            y_label="Row",
            x_label="Column",
            lut_label="Output [keV]",
            title=f'Cell {cell} for module {da}',
            aspect=1.,
            log=False,
            vmin=vmin,
            vmax=vmax,
            panel_top_low_lim=35.,
            panel_top_high_lim=45.,
            panel_side_low_lim=35.,
            panel_side_high_lim=45.,
        )
        plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Retrieve the old gain map

 Currently only the high gain map can be generated. To calculate the new medium and low gain maps, we depend on the past gain maps to calculate both gain maps from the ratio on G0/G1 and G1/G2. If we can't use any old gain maps, it would be impossible to calculate gain maps for medium and low gain at the moment.

 %% Cell type:code id: tags:

 ``` python
 jf_cal = JUNGFRAU_CalibrationData(
    detector_name=karabo_id,
    sensor_bias_voltage=bias_voltage,
    event_at=None,
    modules=karabo_da,
    memory_cells=memory_cells,
    integration_time=integration_time,
    gain_setting=gain_setting,
-    gain_mode=gain_mode,
+    gain_mode=0,  # Always retrieve dynamic RelativeGain constant
    client=rest_cfg.calibration_client(),
 )
 gmap_old = dict()
 gmap_new = dict()
 bad_pixel_maps = dict()

 da_to_pdu = {}
 for mod_info in jf_cal.physical_detector_units.values():
    da_to_pdu[mod_info["karabo_da"]] = mod_info["physical_name"]

 jf_metadata = jf_cal.metadata(calibrations=["RelativeGain10Hz"])
 const_data = jf_cal.ndarray_map(metadata=jf_metadata)
 jf_cal.display_markdown_retrieved_constants(metadata=jf_metadata)

 for da in karabo_da:
    if const_data[da].get("RelativeGain10Hz") is not None:
        gmap_old[da] = np.array(
            const_data[da]["RelativeGain10Hz"],
            dtype=np.float32,
        )
-        print('Retrieved array of data:', gmap_old[da].shape)  # TODO: WHY ARE SHAPES BEING PRINTED?  I WILL REMOVE THIS
    else:
        gmap_old[da] = None
        warning("Gain not found")

    gmap_new[da] = np.zeros(
        (
            gmap_old[da].shape[0],
            gmap_old[da].shape[1],
            g0_new[da].shape[2],
            len(gains)
        ),
        dtype=np.float32)
    bad_pixel_maps[da] = np.zeros(gmap_new[da].shape, np.uint32)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create masks for failed fit and no entry pixels
 for da in karabo_da:
    no_entries = np.logical_and(g0_new[da] < 0., g0_new[da] > -2.) # no entries in pixel is marked with -1. in the gain map
    failed_fit = g0_new[da] < -999. # failed fit are marked with -1000. in the gain map

    # replace placeholder values with nan
    g0_new[da][no_entries] = np.nan
    g0_new[da][failed_fit] = np.nan

    no_entries = np.broadcast_to(np.expand_dims(no_entries, axis=len(no_entries.shape)), gmap_new[da].shape)
    failed_fit = np.broadcast_to(np.expand_dims(failed_fit, axis=len(failed_fit.shape)), gmap_new[da].shape)

    bad_pixel_maps[da][no_entries] |= BadPixels.FF_NO_ENTRIES.value
    bad_pixel_maps[da][failed_fit] |= BadPixels.FF_GAIN_EVAL_ERROR.value
 ```

 %% Cell type:code id: tags:

 ``` python
 _n = 0
 for da in karabo_da:
    for col in range(10, 200):
        for row in range(10, 200):
            if bad_pixel_maps[da][col, row, 0, 0] == BadPixels.FF_GAIN_EVAL_ERROR.value and _n < 20:
                print(f'Module {da}: Column-{col}, Row-{row}')
                _n += 1
 ```

 %% Cell type:markdown id: tags:

 ## Calculate gain

 %% Cell type:code id: tags:

 ``` python
 for da in karabo_da:

    if correct_offset:
        display(Markdown("#### Correct with better pedestal estimation"))
        with h5file(
            fit_data_folder / spectra_fit_temp.format(run, proposal.upper(), da, fit_func),
            'r'
        ) as fc:
            corr = np.array(fc[g0_fit_dataset])
            if strixel_sensor:
                corr = np.transpose(
                    from_strixel(np.transpose(corr)), kind=strixel_sensor)

            g0_new[da] -= corr

    # Calculate gain map in ADC units/keV
    g0_new[da] = g0_new[da] / E_ph

    if adc_fit:
        # Calculate gain ratios from old gain map
        g_zeros = np.zeros((gmap_old[da].shape[0], gmap_old[da].shape[1]), dtype=np.float32)

        g1g0 = g_zeros.copy()
        g2g1 = g_zeros.copy()

        g1g0 = gmap_old[da][..., 0, 1] / gmap_old[da][..., 0, 0]
        g2g1 = gmap_old[da][..., 0, 2] / gmap_old[da][..., 0, 1]

        # Calculate new G1 and G2 values from gain ratios and G0 fit value
        for cell in range(0, gmap_new[da].shape[2]):
            gmap_new[da][..., cell, 0] = g0_new[da][..., cell]
            gmap_new[da][..., cell, 1] = g0_new[da][..., cell] * g1g0
            gmap_new[da][..., cell, 2] = g0_new[da][..., cell] * g1g0 * g2g1
    else:
        g_ratio = np.broadcast_to(np.expand_dims(g0_new[da], axis=3), gmap_old[da].shape)
        gmap_new[da] = g_ratio * gmap_old[da]
 ```

 %% Cell type:code id: tags:

 ``` python
 # plotting the gain distribution per memory cell
 # the vertical bars indicate the cuts

 n_bins = 100
 props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
 for da in karabo_da:
    for cell in range(0, gmap_new[da].shape[2]):
        x_low = np.nanmin(gmap_new[da][..., cell, 0])
        x_up = np.nanmax(gmap_new[da][..., cell, 0])
        #x_low = 20.
        #x_up = 50.
        x_bins = np.linspace(x_low, x_up, n_bins)
        mdn = np.nanmedian(gmap_new[da][..., cell, 0])
        std = np.nanstd(gmap_new[da][..., cell, 0])
        _h, _e = np.histogram(gmap_new[da][..., cell, 0].ravel(), bins=x_bins)
        _x = (_e[1:] + _e[:-1])/2.
        f, a = plt.subplots()
        a.plot(_x, _h, drawstyle='steps', linewidth=3)
        a.axvline(x=mdn+badpixel_threshold_sigma*std, linestyle='--')
        a.axvline(x=mdn-badpixel_threshold_sigma*std, linestyle='--')
        a.grid(True)

        textstr = '\n'.join((
                r'$\mathrm{median}=%.2e$' % (mdn),
                r'$\sigma=%.2e$' % (std)))

        a.text(
            0.05,
            0.95,
            textstr,
            transform=a.transAxes,
            fontsize=20,
            verticalalignment='center',
            bbox=props,
        )

        a.set_yscale('log')
        a.set_xlabel('Gain [ADCu/keV]', fontsize=20)
        plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 def replace_with_median(data, bpix_map):
    """
    replaces the bad pixels with the median value of the other pixels
    args:
    data (array, float): map in; with shape (column, row, cell, gain)
    bpix_map (array, uint32): bad pixel map

    returns the new map
    """
    b_p_mask = bpix_map
    b_p_mask[b_p_mask > 0] = 1
    mskd = np.ma.MaskedArray(
        data, b_p_mask,
        hard_mask=True,
        keep_mask=False,
        fill_value=np.nan
    ).filled()
    median_value = np.nanmedian(mskd, axis=(0, 1))
    for cell in range(median_value.shape[0]):
        for gain in range(median_value.shape[1]):
            mskd[..., cell, gain][bpix_map[..., cell, gain] > 0] = median_value[cell, gain]

    return mskd


 def eval_bpidx(d_in, badpixel_threshold_sigma=badpixel_threshold_sigma, roi=roi):
    """
    evaluates the indexes of the pixels with high deviation from the median value
    args:
    * d_in (array, float): the map with the values; with shape (columns, rows, cells, gain)
    * badpixel_threshold_sigma (float): number of std outside which a value indicates a bad pixel
    * roi (int, tuple): ROI to use to calculate median and std: (col_min, col_max, row_min, row_max)

    returns mask where True indicates bad pixel
    """
    d_eval = d_in[roi[0]:roi[1], roi[2]:roi[3]]
    mdn = np.nanmedian(d_eval, axis=(0, 1))[None, None, :, :]
    std = np.nanstd(d_eval, axis=(0, 1))[None, None, :, :]
    idx = (d_in > badpixel_threshold_sigma*std+mdn) | (d_in < (-badpixel_threshold_sigma)*std+mdn)

    return idx
 ```

 %% Cell type:code id: tags:

 ``` python
 for da in karabo_da:
    # Create map of bad pixels with too high gain deviation
    idxs = eval_bpidx(gmap_new[da])
    bad_pixel_maps[da][idxs] |= BadPixels.FF_GAIN_DEVIATION.value

    # Replace all the bad pixels with the median gain value across the module
    gmap_new[da] = replace_with_median(gmap_new[da], bad_pixel_maps[da])
 ```

 %% Cell type:markdown id: tags:

 ### New gain map

 #### Plot new gain map for the first gain and for each cell

 %% Cell type:code id: tags:

 ``` python
 for da in karabo_da:
    for cell in range(gmap_new[da].shape[-2]):
        vmin, vmax = np.percentile(gmap_new[da][..., cell, 0], [5, 95])
        fig = heatmapPlot(
            np.swapaxes(gmap_new[da][..., cell, 0], 0, 1),
            y_label="Row",
            x_label="Column",
            lut_label="Output [keV]",
            title=f'Cell {cell} for module {da}({da_to_pdu[da]})',
            aspect=1.,
            log=False,
            vmin=vmin,
            vmax=vmax,
            panel_top_low_lim=25.,
            panel_top_high_lim=45.,
            panel_side_low_lim=25.,
            panel_side_high_lim=45.,
        )
        plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Save and Inject gain map

 %% Cell type:code id: tags:

 ``` python
-for mode in [0, 1]:
-    # Inject constants for fixed and adaptive gain maps.
-    conditions = Conditions.Dark.jungfrau(
-        memory_cells=memory_cells,
-        bias_voltage=bias_voltage,
-        integration_time=integration_time,
-        gain_setting=gain_setting,
-        gain_mode=mode,
-    )
-    for parm in conditions.parameters:
-        if parm.name == "Integration Time":
-            print('Setting integration time limits.')
-            parm.lower_deviation = integration_time - integration_time_lims[0]
-            parm.upper_deviation = integration_time_lims[1] - integration_time
-
-    for parm in conditions.parameters:
-        if parm.name == "Sensor Bias Voltage":
-            print('Setting bias voltage limits.')
-            parm.lower_deviation = bias_voltage - bias_voltage_lims[0]
-            parm.upper_deviation = bias_voltage_lims[1] - bias_voltage
+
+def create_constants(bad_pixels, gmap, gain_mode):
    constants = {
-        "BadPixelsFF": bad_pixel_maps[da],  # 512, 1024, 1, 3
+        "BadPixelsFF": bad_pixels,  # 512, 1024, 1, 3
+        "RelativeGain": gmap.copy() if gain_mode else gmap
    }
-    if mode:
-        # 512, 1024, 1, 3
-        constants["RelativeGain"] = gmap_new[da].copy()
+    if gain_mode:
        constants["RelativeGain"][..., 1:] *= -1
-    else:
-        constants["RelativeGain"] = gmap_new[da]
+    return constants

+for gain_mode in [0, 1]:
    for da in karabo_da:
+        # Inject constants for fixed and adaptive gain maps.
+        conditions = Conditions.Dark.jungfrau(
+            memory_cells=memory_cells,
+            bias_voltage=bias_voltage,
+            integration_time=integration_time,
+            gain_setting=gain_setting,
+            gain_mode=gain_mode,
+        )
+
+        for parm in conditions.parameters:
+            if parm.name == "Integration Time":
+                print('Setting integration time limits.')
+                parm.lower_deviation = integration_time - integration_time_lims[0]
+                parm.upper_deviation = integration_time_lims[1] - integration_time
+            elif parm.name == "Sensor Bias Voltage":
+                print('Setting bias voltage limits.')
+                parm.lower_deviation = bias_voltage - bias_voltage_lims[0]
+                parm.upper_deviation = bias_voltage_lims[1] - bias_voltage
+
+        constants = create_constants(
+            bad_pixels=bad_pixel_maps[da],
+            gmap=gmap_new[da],
+            gain_mode=gain_mode,
+        )
+
        pdu = da_to_pdu[da]
        for cname, cdata in constants.items():
            if db_output:
-                print(f"Storing maps for {'fixed' if mode else 'adaptive'} gain mode.")
-                # Inject both fixed and adaptive gain maps.
-                for parm in conditions.parameters:
-                    if parm.name == "Gain mode":
-                        parm.value = mode
-
+                print(f"Storing maps for {'fixed' if gain_mode else 'adaptive'} gain mode.")
                const = getattr(Constants.jungfrau, cname)()
                const.data = cdata
                send_to_db(
                    db_module=pdu,
                    karabo_id=karabo_id,
                    constant=const,
                    condition=conditions,
                    file_loc=file_loc,
                    report_path=report,
                    cal_db_interface=cal_db_interface,
                    creation_time=creation_time,
                )
-            if local_output and mode == 0:  # Store locally adaptive gain maps only.
-                md = save_const_to_h5(
+
+            if local_output and gain_mode == 0:  # Store locally adaptive gain maps only.
+                save_const_to_h5(
                    db_module=pdu,
                    karabo_id=karabo_id,
                    constant=getattr(Constants.jungfrau, cname)(),
                    condition=conditions,
                    data=cdata,
                    file_loc=file_loc,
                    report=report,
                    creation_time=creation_time,
                    out_folder=fit_data_folder,
                )
                print(f"Calibration constant {cname} is stored locally at {fit_data_folder}.\n")
 ```

 %% Cell type:markdown id: tags:

 ## Plot Gain map for the 1st cell and each gain

 %% Cell type:code id: tags:

 ``` python
 for da in karabo_da:
    for g in gains:  # TODO: Isn't it always 3 gains??
        vmin, vmax = np.percentile(gmap_new[da][..., 0, g], [5, 95])
        f_im = heatmapPlot(
            np.swapaxes(gmap_new[da][..., 0, g], 0, 1),
            title=f"1st cell of G{g} gain map for {da}({da_to_pdu[da]})",
            y_label="Row",
            x_label="Column",
            lut_label="G{:01d}[ADCu/keV]".format(g),
            aspect=1.,
            vmin=vmin,
            vmax=vmax,
        )
        plt.show()
 ```

--- a/notebooks/Jungfrau/Jungfrau_gain_Spectra_Fit_Summary_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_gain_Spectra_Fit_Summary_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Jungfrau Spectra Fit Summary

 Author: European XFEL Detector Department, Version: 1.0

 Summary for plotting Spectra Fit results for Jungfrau FF histogram and fitting notebook

 %% Cell type:code id: tags:

 ``` python
 in_folder = '/gpfs/exfel/exp/SPB/202330/p900343/raw'  # RAW data path, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/remove/JF4M_SPB_gain/r67"  # Output path for gain data, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate.
 runs = [67]
 # Parameters used to access raw data.
 karabo_da = []  # list of data aggregators, which corresponds to different JF modules. This is only needed for the detectors of one module.
 karabo_id = "SPB_IRDA_JF4M"  # detector identifier.
 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
 fit_func = 'CHARGE_SHARING'  # function used to fit the single photon peak\
 g0_fit_dataset = 'gainMap_fit'  # name of the data structure in the fit files
 spectra_fit_temp = 'R{:04d}_{}_Gain_Spectra_{}_{}_Fit.h5'
 histo_temp = 'R{:04d}_{}_Gain_Spectra_{}_Histo.h5'
 ```

 %% Cell type:code id: tags:

 ``` python
+import math
 import warnings
 from pathlib import Path

 warnings.filterwarnings('ignore')

 from h5py import File as h5file
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np

 matplotlib.use("agg")
 %matplotlib inline

 from cal_tools.plotting import init_jungfrau_geom
 from cal_tools.restful_config import calibration_client
 from cal_tools.calcat_interface import CalCatApi
 ```

 %% Cell type:code id: tags:

 ``` python
 expected_modules, geom = init_jungfrau_geom(karabo_id=karabo_id, karabo_da=karabo_da)
 nmods = len(expected_modules)
 ```

 %% Cell type:code id: tags:

 ``` python
 calcat_client = calibration_client()
 calcat = CalCatApi(client=calcat_client)
 detector_id = calcat.detector(karabo_id)['id']
 da_mapping = calcat.physical_detector_units(detector_id, pdu_snapshot_at=creation_time)
 da_to_pdu = {k: v["physical_name"] for k, v in da_mapping.items()}
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 run = runs[0]  # TODO this will need to be fixed when I start implementing multiple runs.
 stacked_constants = np.full(geom.expected_data_shape, np.nan)  # nmods, 512, 1024

 for i, da in enumerate (da_to_pdu.keys()):
   with h5file(
      Path(out_folder) / spectra_fit_temp.format(run, proposal.upper(), da, fit_func),
      'r'
   ) as f:
      # f[g0_fit_dataset] shape is 1024, 512, mem_cells
      stacked_constants[i] = np.moveaxis(
         np.mean(np.array(f[g0_fit_dataset]), axis=-1), 0, 1)

 fig, ax = plt.subplots(figsize=(18, 10))
 vmin, vmax = np.percentile(stacked_constants, [5, 95])
 geom.plot_data_fast(
   stacked_constants,
   ax=ax,
   vmin=vmin,
   vmax=vmax,
   colorbar={'shrink': 1, 'pad': 0.01},
 )
 ax.set_title(f'{karabo_id} - One photon peak position', size=18)
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Histogram data for all cells for each module

 %% Cell type:code id: tags:

 ``` python
 if nmods > 4:
    fixed_cols = 4
    row, col = math.ceil(nmods / fixed_cols), 4
 else:
    row, col = 1, nmods

 fig, axs = plt.subplots(row, col, figsize=(20, 10))  # Adjust for spatial histograms
 axs = axs.ravel()
 for i, da in enumerate (da_to_pdu.keys()):
    with h5file(
        Path(out_folder) / histo_temp.format(run, proposal.upper(), da),
        'r'
    ) as f:
        histos = f["histos"][:]
    row, col = divmod(i, 4)
    for m in range(histos.shape[1]):
        cell_hist = histos[m]
        axs[i].plot(cell_hist.ravel(), label=f'Cell {m}')
        axs[i].set_title(f"{da} ({da_to_pdu[da]})")

 for i, ax in enumerate(axs):
    if i > nmods-1:
        ax.set_visible(False)  # Hide unused subplots
 # Create a legend for the whole figure
 handles, labels = axs[0].get_legend_handles_labels()
 fig.legend(handles, labels, loc="center right" if nmods > 4 else "upper right")
 plt.tight_layout(pad=3)
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 # Jungfrau Spectra Fit Summary

 Author: European XFEL Detector Department, Version: 1.0

 Summary for plotting Spectra Fit results for Jungfrau FF histogram and fitting notebook

 %% Cell type:code id: tags:

 ``` python
 in_folder = '/gpfs/exfel/exp/SPB/202330/p900343/raw'  # RAW data path, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/remove/JF4M_SPB_gain/r67"  # Output path for gain data, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate.
 runs = [67]
 # Parameters used to access raw data.
 karabo_da = []  # list of data aggregators, which corresponds to different JF modules. This is only needed for the detectors of one module.
 karabo_id = "SPB_IRDA_JF4M"  # detector identifier.
 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
 fit_func = 'CHARGE_SHARING'  # function used to fit the single photon peak\
 g0_fit_dataset = 'gainMap_fit'  # name of the data structure in the fit files
 spectra_fit_temp = 'R{:04d}_{}_Gain_Spectra_{}_{}_Fit.h5'
 histo_temp = 'R{:04d}_{}_Gain_Spectra_{}_Histo.h5'
 ```

 %% Cell type:code id: tags:

 ``` python
+import math
 import warnings
 from pathlib import Path

 warnings.filterwarnings('ignore')

 from h5py import File as h5file
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np

 matplotlib.use("agg")
 %matplotlib inline

 from cal_tools.plotting import init_jungfrau_geom
 from cal_tools.restful_config import calibration_client
 from cal_tools.calcat_interface import CalCatApi
 ```

 %% Cell type:code id: tags:

 ``` python
 expected_modules, geom = init_jungfrau_geom(karabo_id=karabo_id, karabo_da=karabo_da)
 nmods = len(expected_modules)
 ```

 %% Cell type:code id: tags:

 ``` python
 calcat_client = calibration_client()
 calcat = CalCatApi(client=calcat_client)
 detector_id = calcat.detector(karabo_id)['id']
 da_mapping = calcat.physical_detector_units(detector_id, pdu_snapshot_at=creation_time)
 da_to_pdu = {k: v["physical_name"] for k, v in da_mapping.items()}
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 run = runs[0]  # TODO this will need to be fixed when I start implementing multiple runs.
 stacked_constants = np.full(geom.expected_data_shape, np.nan)  # nmods, 512, 1024

 for i, da in enumerate (da_to_pdu.keys()):
   with h5file(
      Path(out_folder) / spectra_fit_temp.format(run, proposal.upper(), da, fit_func),
      'r'
   ) as f:
      # f[g0_fit_dataset] shape is 1024, 512, mem_cells
      stacked_constants[i] = np.moveaxis(
         np.mean(np.array(f[g0_fit_dataset]), axis=-1), 0, 1)

 fig, ax = plt.subplots(figsize=(18, 10))
 vmin, vmax = np.percentile(stacked_constants, [5, 95])
 geom.plot_data_fast(
   stacked_constants,
   ax=ax,
   vmin=vmin,
   vmax=vmax,
   colorbar={'shrink': 1, 'pad': 0.01},
 )
 ax.set_title(f'{karabo_id} - One photon peak position', size=18)
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Histogram data for all cells for each module

 %% Cell type:code id: tags:

 ``` python
 if nmods > 4:
    fixed_cols = 4
    row, col = math.ceil(nmods / fixed_cols), 4
 else:
    row, col = 1, nmods

 fig, axs = plt.subplots(row, col, figsize=(20, 10))  # Adjust for spatial histograms
 axs = axs.ravel()
 for i, da in enumerate (da_to_pdu.keys()):
    with h5file(
        Path(out_folder) / histo_temp.format(run, proposal.upper(), da),
        'r'
    ) as f:
        histos = f["histos"][:]
    row, col = divmod(i, 4)
    for m in range(histos.shape[1]):
        cell_hist = histos[m]
        axs[i].plot(cell_hist.ravel(), label=f'Cell {m}')
        axs[i].set_title(f"{da} ({da_to_pdu[da]})")

 for i, ax in enumerate(axs):
    if i > nmods-1:
        ax.set_visible(False)  # Hide unused subplots
 # Create a legend for the whole figure
 handles, labels = axs[0].get_legend_handles_labels()
 fig.legend(handles, labels, loc="center right" if nmods > 4 else "upper right")
 plt.tight_layout(pad=3)
 plt.show()
 ```

--- a/notebooks/LPD/LPDChar_Darks_NBC.ipynb
+++ b/notebooks/LPD/LPDChar_Darks_NBC.ipynb
 %% Cell type:markdown id: tags:

 # LPD Offset, Noise and Dead Pixels Characterization #

 Author: M. Karnevskiy, S. Hauf

 This notebook performs re-characterize of dark images to derive offset, noise and bad-pixel maps. All three types of constants are evaluated per-pixel and per-memory cell.

 The notebook will correctly handle veto settings, but note that if you veto cells you will not be able to use these offsets for runs with different veto settings - vetoed cells will have zero offset.

 The evaluated calibration constants are stored locally and injected in the calibration data base.


 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/FXE/202304/p003338/raw" # path to input data, required
 out_folder = "/gpfs/exfel/data/scratch/kluyvert/lpd-darks-p3338-r133-134-135/" # path to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 modules = [-1] # list of modules to evaluate, RANGE ALLOWED
-run_high = 133 # run number in which high gain data was recorded, required
-run_med = 134 # run number in which medium gain data was recorded, required
-run_low = 135 # run number in which low gain data was recorded, required
+run_high = 133 # run number in which high gain data was recorded
+run_med = 134 # run number in which medium gain data was recorded
+run_low = 135 # run number in which low gain data was recorded
 run = 88
 operation_mode = 'LPD_ADAPTIVE_GAIN'

 karabo_id = "FXE_DET_LPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 source_name = "{}/DET/{}CH0:xtdf"  # Source name for raw detector data - filled with karabo_id & module number
 ctrl_src_template =  "{}/COMP/FEM_MDL_COMP"  # Control device source name template.

 use_dir_creation_date = True # use the creation date of the directory for database time derivation
 cal_db_interface = "tcp://max-exfl-cal001:8015#8025" # the database interface to use
 cal_db_timeout = 300000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database

 capacitor_setting = 5 # capacitor_setting for which data was taken
 mem_cells = 512 # number of memory cells used
 bias_voltage = 250 # detector bias voltage
 thresholds_offset_sigma = 3. # bad pixel relative threshold in terms of n sigma offset
 thresholds_offset_hard = [400, 1500] # bad pixel hard threshold
 thresholds_noise_sigma = 7. # bad pixel relative threshold in terms of n sigma noise
 thresholds_noise_hard = [1, 35] # bad pixel hard threshold

 ntrains = 500  # maximum number of trains to use in each gain stage
 skip_first_ntrains = 10  # Number of first trains to skip
 min_trains = 370  # minimum number of trains needed for each gain stage
+drop_last_frames_parallelgain = 1  # Discard last N frames of each gain stage in parallel gain mode
+bad_gain_tolerance_parallelgain = 0.2  # Error if more than this proportion of pixels are in the wrong gain stage in parallel gain mode

 # Parameters for plotting
 skip_plots = False  # exit after writing corrected files

 high_res_badpix_3d = False # plot bad-pixel summary in high resolution
 test_for_normality = False # permorm normality test
 inject_cell_order = 'auto'  # Include memory cell order as part of the detector condition: auto/always/never
 ```

 %% Cell type:code id: tags:

 ``` python
 import copy
 import multiprocessing
 import warnings
 from datetime import datetime
 from functools import partial
 from logging import warning
 from pathlib import Path

 warnings.filterwarnings('ignore')

 import dateutil.parser
 import matplotlib
 import pasha as psh
 import scipy.stats
 from IPython.display import Latex, Markdown, display

 matplotlib.use("agg")
 import matplotlib.patches as patches
 import matplotlib.pyplot as plt

 %matplotlib inline
 import numpy as np
 import tabulate
 import yaml
 from iCalibrationDB import Conditions, Constants, Detectors, Versions
 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.simpleplot import simplePlot
-from extra_data import RunDirectory
+from extra_data import RunDirectory, by_id

 from cal_tools.enums import BadPixels
 from cal_tools.lpdlib import (
    make_cell_order_condition,
    sort_dark_runs_by_gain,
 )
 from cal_tools.plotting import (
    create_constant_overview,
    plot_badpix_3d,
    show_overview,
    show_processed_modules,
 )
 from cal_tools.tools import (
    get_dir_creation_date,
    get_from_db,
    get_notebook_name,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    map_gain_stages,
    module_index_to_qm,
    parse_runs,
    raw_data_location_string,
    reorder_axes,
    run_prop_seq_from_path,
    save_const_to_h5,
    send_to_db,
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 gains = np.arange(3)
 max_cells = mem_cells
 cells = np.arange(max_cells)
 gain_names = ['High', 'Medium', 'Low']

 is_adaptive_gain = False
 is_parallel_gain = False

 if operation_mode == 'LPD_ADAPTIVE_GAIN':
    is_adaptive_gain = True

    # Check dark runs order and sort if needed.
    run_nums = sort_dark_runs_by_gain(
        raw_folder=in_folder,
        runs=[run_high, run_med, run_low],
        ctrl_src=ctrl_src_template.format(karabo_id))
 elif operation_mode == 'LPD_PARALLEL_GAIN':
    is_parallel_gain = True
    run_nums = [run, run, run]
 else:
    raise ValueError('Invalid LPD operation mode')

 for run_num in set(run_nums):  # Avoid visiting identical runs more than once.
    dc = RunDirectory(Path(in_folder, f'r{run_num:04d}'))

    try:
        gain_override = bool(dc[ctrl_src_template.format(karabo_id)].run_value('femAsicGainOverride'))
    except KeyError:
        # Data taken before August 2024 is missing femAsicGainOverride on the FEM
        # control device, prevent using parallel gain mode on this data. There are
        # only a very small number of parallel gain test runs from before that date.

        if is_parallel_gain:
            raise ValueError('Control device is lacking data to verify detector '
                             'is operating in parallel gain mode') from None

        continue

    if gain_override != is_parallel_gain:
        gain_str = 'parallel gain' if gain_override else 'adaptive gain'
        raise ValueError(f'Mismatch between chosen operation mode {operation_mode} '
                         f'and control data indicating {gain_str}') from None

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(16))
    karabo_da = ['LPD{:02d}'.format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 capacitor_setting_s = f'{capacitor_setting}pf'

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_nums[0])
    print("Using {} as creation time".format(creation_time))

 if inject_cell_order not in {'auto', 'always', 'never'}:
    raise ValueError("inject_cell_order must be auto/always/never")

 run, prop, seq = run_prop_seq_from_path(in_folder)

 cal_db_interface = get_random_db_interface(cal_db_interface)

 display(Markdown('## Evaluated parameters'))
 print('CalDB Interface {}'.format(cal_db_interface))
 print("Proposal: {}".format(prop))
 print("Memory cells: {}/{}".format(mem_cells, max_cells))
 print("Run(s): {}".format(run_nums))
 print("Using DB: {}".format(db_output))
 print("Input: {}".format(in_folder))
 print("Output: {}".format(out_folder))
 print("Bias voltage: {}V".format(bias_voltage))
 print(f"Capacitor setting: {capacitor_setting_s}")
 print(f'Parallel gain: {is_parallel_gain}')
 ```

 %% Cell type:markdown id: tags:

 ## Data processing

 %% Cell type:code id: tags:

 ``` python
+def find_common_cell_pattern(srcdata):
+    """The memory cell pattern is not always entirely consistent (seen in p6936 r52, parallel gain mode)
+
+    Identify the most common cell pattern, and select data matching that.
+    """
+    cell_ids = srcdata['image.cellId'].drop_empty_trains()[skip_first_ntrains:]
+
+    tids_by_cell_pattern = {}
+
+    # Count up cell ID patterns per train
+    for tid, cid_arr in cell_ids.trains():
+        if cid_arr.ndim > 1:
+            cid_arr = cid_arr[:, 0]
+        if is_parallel_gain:
+            if len(cid_arr) % 3 != 0:
+                raise ValueError(f"{len(cid_arr)} frames in train {tid} is not divisible by 3 "
+                                 "(expected for parallel gain mode)")
+            # Only the high-gain cell IDs are valid
+            cid_arr = cid_arr[:len(cid_arr) // 3]
+            if drop_last_frames_parallelgain:
+                cid_arr = cid_arr[:-drop_last_frames_parallelgain]
+
+        tids_by_cell_pattern.setdefault(tuple(cid_arr), []).append(tid)
+
+    most_common, sel_tids = max(tids_by_cell_pattern.items(), key=lambda p: len(p[1]))
+
+    if len(sel_tids) <= (0.5 * len(cell_ids.train_ids)):
+        raise ValueError("No cell ID pattern matched over half of the trains "
+                         f"(max {len(sel_tids)} / {len(cell_ids.train_ids)}")
+
+    return np.array(most_common), sel_tids[:ntrains]
+```
+
+%% Cell type:code id: tags:
+
+``` python
 parallel_num_procs = min(6, len(modules)*3)
 parallel_num_threads = multiprocessing.cpu_count() // parallel_num_procs

 # the actual characterization
 def characterize_module(run_path, channel, gg):
    run = RunDirectory(run_path, parallelize=False)
-    det_source = source_name.format(karabo_id, channel)
-    data = run[det_source, 'image.data'].drop_empty_trains()
-    data = data[skip_first_ntrains : skip_first_ntrains + ntrains]
-    cell_ids = run[det_source, 'image.cellId'].drop_empty_trains()
-    cell_ids = cell_ids[skip_first_ntrains : skip_first_ntrains + ntrains]
+    det_source = run[source_name.format(karabo_id, channel)]
+    cellid_pattern, train_ids = find_common_cell_pattern(det_source)
+    data = det_source['image.data'][by_id[train_ids]]
+    cell_ids = det_source['image.cellId'][by_id[train_ids]]

    # If there is no data available, return and expect this
    # module to be skipped later.
    if len(data.train_ids) == 0:
        return None, None, None, None, None, None, None, None
    elif len(data.train_ids) < min_trains:
        raise Exception(f"Run {run_path} only contains {len(data.train_ids)} trains, but {min_trains} required")

    im = data.ndarray()
    if im.ndim > 3:
        im = im[:, 0]  # Drop extra dimension

    cellid = cell_ids.ndarray()
-    cellid_pattern = cell_ids[0].ndarray()
    if cellid.ndim > 1:
        cellid = cellid[:, 0]
-        cellid_pattern = cellid_pattern[:, 0]

    # Split off gain bits from pixel data.
    gains = (im & 0x3000)
    gains >>= 12
    im &= 0b0000111111111111

    im = im.astype(np.float32)

    if is_parallel_gain:
-        # Frames in the transition region between the gain stages
-        # often have some pixels in the previous gain stage.
-        # The proper way would be to use np.median, but this is
-        # significantly more expensive and .mean().round() should
-        # give the same result.
-        gain_by_frame = gains.mean(axis=(1, 2)).round().astype(np.int32)
-
-        num_frames_by_gain = np.bincount(gain_by_frame)
-
-        if not np.all(num_frames_by_gain == num_frames_by_gain[0]):
-            raise ValueError(f'Unequal number of frames per gain stage '
-                             f'in parallel gain: {num_frames_by_gain}')
-        elif len(num_frames_by_gain) != 3:
-            raise ValueError(f'Parallel gain contains pixels in {len(num_frames_by_gain)} '
-                             f'gain stages, expected 3')
-
-        # Always pick cell IDs from high gain,
-        # as the other gain stages are corrupted.
-        cellid = cellid[gain_by_frame == 0]
-
-        # By this point the parallel gain assumption is valid.
-        cellid_pattern = cellid_pattern[:(len(cellid_pattern) // 3)]
+        # (trains, gains, frames)
+        frame_indexes = np.arange(len(cellid)).reshape(len(train_ids), 3, -1)

-        # Pick images for the "right" gain stage.
-        im = im[gain_by_frame == gg]
+        # The last frame in each gain stage is often bad, so we may drop it
+        if drop_last_frames_parallelgain:
+            frame_indexes = frame_indexes[..., :-drop_last_frames_parallelgain]
+
+        # Cell IDs from high gain, data from the current gain stage
+        cellid = cellid[frame_indexes[:, 0].ravel()]
+        gains = gains[frame_indexes[:, gg].ravel()]
+        im = im[frame_indexes[:, gg].ravel()]
+
+        # Discard frames with too many pixels in the wrong gain stage,
+        # and check we still have data for each cell ID.
+        INVALID_CELLID = 9999
+        wrong_gain_fraction = (gains != gg).mean(axis=(1, 2))  # fraction per frame
+        cellid[wrong_gain_fraction > bad_gain_tolerance_parallelgain] = INVALID_CELLID
+        cellids_missing = cellid_pattern[~np.isin(cellid_pattern, cellid)]
+        if len(cellids_missing):
+            raise ValueError(f"No frames left for cells {cellids_missing} "
+                             "after discarding frames with wrong gain stage")

    elif (gains != gg).any():
        raise Exception(f"Adaptive gain run {run_path} contains pixels "
                        f"in gain state other than expected {gg}")

    im = reorder_axes(im,
        from_order=('frames', 'slow_scan', 'fast_scan'),
        to_order=('fast_scan', 'slow_scan', 'frames'),
    )

    context = psh.context.ThreadContext(num_workers=parallel_num_threads)
    offset = context.alloc(shape=(im.shape[0], im.shape[1], max_cells), dtype=np.float64)
    noise = context.alloc(like=offset)
    normal_test = context.alloc(like=offset)
    def process_cell(worker_id, array_index, cc):
        idx = cellid == cc
        im_slice = im[..., idx]
        if np.any(idx):
            offset[..., cc] = np.median(im_slice, axis=2)
            noise[..., cc] = np.std(im_slice, axis=2)
            if test_for_normality:
                _, normal_test[..., cc] = scipy.stats.normaltest(
                    im[:, :, idx], axis=2)
-    context.map(process_cell, np.unique(cellid))
+    context.map(process_cell, cellid_pattern)

    # bad pixels
    bp = np.zeros(offset.shape, np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0, 1))
    offset_std = np.nanstd(offset, axis=(0, 1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[(offset < thresholds_offset_hard[0]) | (
        offset > thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0, 1))
    noise_std = np.nanstd(noise, axis=(0, 1))

    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[(noise < thresholds_noise_hard[0]) | (
        noise > thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

-    idx = (cellid == cellid[0])
+    idx = (cellid == cellid_pattern[0])
    return offset, noise, channel, gg, bp, im[12, 12, idx], normal_test, cellid_pattern
 ```

 %% Cell type:code id: tags:

 ``` python
 offset_g = {}
 noise_g = {}
 badpix_g = {}
 data_g = {}
 ntest_g = {}
 # Should be the same cell order for all modules & all gain stages
 cellid_patterns_g = {}


 inp = []
 for gg, run_num in enumerate(run_nums):
    run_path = Path(in_folder, f"r{run_num:04d}")
    for i in modules:
        inp.append((run_path, i, gg))

 with multiprocessing.Pool(processes=parallel_num_procs) as pool:
    results = pool.starmap(characterize_module, inp)

 for ir, r in enumerate(results):
    offset, noise, i, gg, bp, data, normal, cellid_pattern = r
    if data is None:
        warning(f"No data for module {i} of gain {gg}")
        skip_plots = True
        continue
    qm = module_index_to_qm(i)
    if qm not in offset_g:
        offset_g[qm] = np.zeros(offset.shape[:3] + (3,))
        print("Constant shape:", offset_g[qm].shape)
        noise_g[qm] = np.zeros_like(offset_g[qm])
        badpix_g[qm] = np.zeros_like(offset_g[qm], dtype=np.uint32)
        data_g[qm] = np.full((ntrains, 3), np.nan)
        ntest_g[qm] = np.zeros_like(offset_g[qm])
        cellid_patterns_g[qm] = cellid_pattern
    else:
        if not np.array_equal(cellid_pattern, cellid_patterns_g[qm]):
            raise ValueError("Inconsistent cell ID pattern between gain stages")


    offset_g[qm][..., gg] = offset
    noise_g[qm][..., gg] = noise
    badpix_g[qm][..., gg] = bp
    data_g[qm][:data.shape[0], gg] = data
    ntest_g[qm][..., gg] = normal

    hn, cn = np.histogram(data, bins=20)
    print(f"{gain_names[gg]} gain, Module: {qm}. "
          f"Number of processed trains per cell: {data.shape[0]}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    import sys
    print('Skipping plots')
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read report path and create file location tuple to add with the injection
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = raw_data_location_string(proposal, run_nums)

 report = get_report(metadata_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 # TODO: add db_module when received from myMDC
 # Create the modules dict of karabo_das and PDUs
 qm_dict = {}
 for i, k_da in zip(modules, karabo_da):
    qm = module_index_to_qm(i)
    qm_dict[qm] = {"karabo_da": k_da,
                   "db_module": ""}
 ```

 %% Cell type:code id: tags:

 ``` python
 from iCalibrationDB import OperatingCondition

 def parallel_gain_parameter(condition):
    if is_parallel_gain:
        pgc = OperatingCondition()
        pgc.name = 'Parallel gain'
        pgc.description = ''
        pgc.lower_deviation = 0
        pgc.upper_deviation = 0
        pgc.value = 1
        condition.add_operating_condition(pgc)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve existing constants for comparison
 clist = ["Offset", "Noise", "BadPixelsDark"]

 print('Retrieve pre-existing constants for comparison.')

 old_const = {}
 old_mdata = {}
 for qm in offset_g.keys():
    old_const[qm] = {}
    old_mdata[qm] = {}
    qm_db = qm_dict[qm]
    karabo_da = qm_db["karabo_da"]
    cellid_pattern = cellid_patterns_g[qm]

    condition = Conditions.Dark.LPD(
        memory_cells=max_cells,
        bias_voltage=bias_voltage,
        capacitor=capacitor_setting_s,
        memory_cell_order=make_cell_order_condition(inject_cell_order, cellid_pattern),
    )
    parallel_gain_parameter(condition)

    for const in clist:
        constant = getattr(Constants.LPD, const)()
        if not qm_db["db_module"]:
            # This should be used in case of running notebook
            # by a different method other than myMDC which already
            # sends CalCat info.
            qm_db["db_module"] = get_pdu_from_db(karabo_id, [karabo_da], constant,
                                                 condition, cal_db_interface,
                                                 snapshot_at=creation_time)[0]

        data, mdata = get_from_db(karabo_id, karabo_da,
                                  constant,
                                  condition, None,
                                  cal_db_interface,
                                  creation_time=creation_time,
                                  verbosity=2, timeout=cal_db_timeout)

        old_const[qm][const] = data

        if mdata is None or data is None:
            old_mdata[qm][const] = {
                "timestamp": "Not found",
                "filepath": None,
                "h5path": None
            }
        else:
            timestamp = mdata.calibration_constant_version.begin_at.isoformat()
            filepath = Path(
                mdata.calibration_constant_version.hdf5path,
                mdata.calibration_constant_version.filename
            )
            h5path = mdata.calibration_constant_version.h5path
            old_mdata[qm][const] = {
                "timestamp": timestamp,
                "filepath": str(filepath),
                "h5path": h5path
            }

    with open(f"{out_folder}/module_metadata_{qm}.yml","w") as fd:
        yaml.safe_dump(
            {
                "module": qm,
                "pdu": qm_db["db_module"],
                "old-constants": old_mdata[qm]
            }, fd)
 ```

 %% Cell type:code id: tags:

 ``` python
 res = {}
 for i in modules:
    qm = module_index_to_qm(i)

    res[qm] = {'Offset': offset_g[qm],
               'Noise': noise_g[qm],
               'BadPixelsDark': badpix_g[qm]
              }
 ```

 %% Cell type:code id: tags:

 ``` python
 # Save constants in the calibration DB
 md = None

 for qm in res:

    karabo_da = qm_dict[qm]["karabo_da"]
    db_module = qm_dict[qm]["db_module"]
    mem_cell_order = make_cell_order_condition(
        inject_cell_order, cellid_patterns_g[qm]
    )
    print("Memory cell order:", mem_cell_order)

    # Do not store empty constants
    # In case of 0 trains data_g is initiated with nans and never refilled.
    if np.count_nonzero(~np.isnan(data_g[qm]))==0:
        print(f"Constant ({qm}) would be empty, skipping saving")
        continue

    for const in res[qm]:

        dconst = getattr(Constants.LPD, const)()
        dconst.data = res[qm][const]

        # set the operating condition

        condition = Conditions.Dark.LPD(memory_cells=max_cells,
                                    bias_voltage=bias_voltage,
                                    capacitor=capacitor_setting_s,
                                    memory_cell_order=mem_cell_order,
                                   )
        parallel_gain_parameter(condition)

        if db_output:
            md = send_to_db(db_module, karabo_id, dconst, condition,
                            file_loc, report_path=report,
                            cal_db_interface=cal_db_interface,
                            creation_time=creation_time,
                            timeout=cal_db_timeout)

        if local_output:
            md = save_const_to_h5(db_module, karabo_id, dconst, condition,
                                  dconst.data, file_loc, report, creation_time, out_folder)
            print(f"Calibration constant {const} is stored locally.\n")

    print("Constants parameter conditions are:\n")
    print(f"• memory_cells: {max_cells}\n"
          f"• bias_voltage: {bias_voltage}\n"
          f"• capacitor: {capacitor_setting_s}\n"
          f"• memory cell order: {mem_cell_order}\n"
          f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:code id: tags:

 ``` python
 show_processed_modules(
    karabo_id,
    constants=None,
    mnames=[module_index_to_qm(i) for i in modules],
    mode="position"
 )
 ```

 %% Cell type:markdown id: tags:

 ## Raw pedestal distribution ##

 Distribution of a pedestal (ADUs) over trains for the pixel (12,12), memory cell 12. A median of the distribution is shown in yellow. A standard deviation is shown in red. The green line shows average over all pixels for a given memory cell and gain stage.

 %% Cell type:code id: tags:

 ``` python
 fig, grid = plt.subplots(3, 1, sharex="col", sharey="row", figsize=(10, 7))
 fig.subplots_adjust(wspace=0, hspace=0)

 for i in modules:
    qm = module_index_to_qm(i)
    if np.count_nonzero(~np.isnan(data_g[qm])) == 0:
        break
    for gain in range(3):
        data = data_g[qm][:, gain]
        offset = np.nanmedian(data)
        noise = np.nanstd(data)
        xrange = [np.nanmin(data_g[qm]), np.nanmax(data_g[qm])]
        if xrange[1] == xrange[0]:
            xrange = [0, xrange[0]+xrange[0]//2]
            nbins = data_g[qm].shape[0]
        else:
            nbins = int(xrange[1] - xrange[0])

        hn, cn = np.histogram(data, bins=nbins, range=xrange)

        grid[gain].hist(data, range=xrange, bins=nbins)
        grid[gain].plot([offset-noise, offset-noise], [0, np.nanmax(hn)],
                        linewidth=1.5, color='red',
                        label='1 $\sigma$ deviation')
        grid[gain].plot([offset+noise, offset+noise],
                        [0, np.nanmax(hn)], linewidth=1.5, color='red')
        grid[gain].plot([offset, offset], [0, 0],
                        linewidth=1.5, color='y', label='median')

        grid[gain].plot([np.nanmedian(offset_g[qm][:, :, 12, gain]),
                         np.nanmedian(offset_g[qm][:, :, 12, gain])],
                        [0, np.nanmax(hn)], linewidth=1.5, color='green',
                        label='average over pixels')

        grid[gain].set_xlim(xrange)
        grid[gain].set_ylim(0, np.nanmax(hn)*1.1)
        grid[gain].set_xlabel("Offset value [ADU]")
        grid[gain].set_ylabel("# of occurance")

        if gain == 0:
            leg = grid[gain].legend(
                loc='upper center', ncol=3,
                bbox_to_anchor=(0.1, 0.25, 0.7, 1.0))

        grid[gain].text(820, np.nanmax(hn)*0.4,
                        "{} gain".format(gain_names[gain]), fontsize=20)

        a = plt.axes([.125, .1, 0.775, .8], frame_on=False)
        a.patch.set_alpha(0.05)
        a.set_xlim(xrange)
        plt.plot([offset, offset], [0, 1], linewidth=1.5, color='y')
        plt.xticks([])
        plt.yticks([])

    ypos = 0.9
    x1pos = (np.nanmedian(data_g[qm][:, 0]) +
             np.nanmedian(data_g[qm][:, 2]))/2.
    x2pos = (np.nanmedian(data_g[qm][:, 2]) +
             np.nanmedian(data_g[qm][:, 1]))/2.-10

    plt.annotate("", xy=(np.nanmedian(data_g[qm][:, 0]), ypos), xycoords='data',
                 xytext=(np.nanmedian(data_g[qm][:, 2]), ypos), textcoords='data',
                 arrowprops=dict(arrowstyle="<->", connectionstyle="arc3"))

    plt.annotate('{}'.format(np.nanmedian(data_g[qm][:, 0])-np.nanmedian(data_g[qm][:, 2])),
                 xy=(x1pos, ypos), xycoords='data', xytext=(5, 5), textcoords='offset points')

    plt.annotate("", xy=(np.nanmedian(data_g[qm][:, 2]), ypos), xycoords='data',
                 xytext=(np.nanmedian(data_g[qm][:, 1]), ypos), textcoords='data',
                 arrowprops=dict(arrowstyle="<->", connectionstyle="arc3"))

    plt.annotate('{}'.format(np.nanmedian(data_g[qm][:, 2])-np.nanmedian(data_g[qm][:, 1])),
                 xy=(x2pos, ypos), xycoords='data', xytext=(5, 5), textcoords='offset points')

 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Normality test ##

 Distributions of raw pedestal values have been tested if they are normally distributed. A normality test have been performed for each pixel and each memory cell. Plots below show histogram of p-Values and a 2D distribution for the  memory cell 12.

 %% Cell type:code id: tags:

 ``` python
 # Loop over modules, constants

 if not test_for_normality:
    print('Normality test was not requested. Flag `test_for_normality` False')
 else:
    for i in modules:
        qm = module_index_to_qm(i)

        data = np.copy(ntest_g[qm][:,:,:,:])
        data[badpix_g[qm][:,:,:,:]>0] = 1.01

        hn,cn = np.histogram(data[:,:,:,0], bins=100)

        d = [{'x': np.arange(100)*0.01+0.01,
              'y': np.histogram(data[:,:,:,0], bins=100)[0],
              'drawstyle': 'steps-pre',
              'label' : 'High gain',
              },
             {'x': np.arange(100)*0.01+0.01,
              'y': np.histogram(data[:,:,:,1], bins=100)[0],
              'drawstyle': 'steps-pre',
              'label' : 'Medium gain',
              },
             {'x': np.arange(100)*0.01+0.01,
              'y': np.histogram(data[:,:,:,2], bins=100)[0],
              'drawstyle': 'steps-pre',
              'label' : 'Low gain',
              },
            ]


        fig = plt.figure(figsize=(15,15), tight_layout={'pad': 0.5, 'w_pad': 0.3})

        for gain in range(3):
            ax = fig.add_subplot(221+gain)
            heatmapPlot(data[:,:,12,gain], add_panels=False, cmap='viridis', figsize=(10,10),
                y_label='Rows', x_label='Columns',
                lut_label='p-Value',
                use_axis=ax,
                title='p-Value for cell 12, {} gain'.format(gain_names[gain]) )

        ax = fig.add_subplot(224)
        _ = simplePlot(d, #aspect=1.6,
                              x_label = "p-Value".format(gain),
                              y_label="# of occurance",
                              use_axis=ax,
                               y_log=False, legend='outside-top-ncol3-frame', legend_pad=0.05, legend_size='5%')
        ax.ticklabel_format(style='sci', axis='y', scilimits=(4,6))

 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on a sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:code id: tags:

 ``` python
 cell = 12

 for gain in range(3):
    display(
        Markdown('### Cell-12 overview - {} gain'.format(gain_names[gain])))

    fig = plt.figure(figsize=(18, 22) , tight_layout={'pad': 0.1, 'w_pad': 0.1})
    for qm in res:
        for iconst, const in enumerate(['Offset', 'Noise', 'BadPixelsDark']):

            ax = fig.add_subplot(321+iconst)

            data = res[qm][const][:, :, 12, gain]
            vmax = 1.5 * np.nanmedian(res[qm][const][:, :, 12, gain])
            title = const
            label = '{} value [ADU]'.format(const)
            title = '{} value'.format(const)
            if const == 'BadPixelsDark':
                vmax = 4
                bpix_code = data.astype(np.float32)
                bpix_code[bpix_code == 0] = np.nan
                title = 'Bad pixel code'
                label = title

                cb_labels = ['1 {}'.format(BadPixels.NOISE_OUT_OF_THRESHOLD.name),
                             '2 {}'.format(BadPixels.OFFSET_NOISE_EVAL_ERROR.name),
                             '3 {}'.format(BadPixels.OFFSET_OUT_OF_THRESHOLD.name),
                             '4 {}'.format('MIXED')]

                heatmapPlot(bpix_code, add_panels=False, cmap='viridis',
                            y_label='Rows', x_label='Columns',
                            lut_label='', vmax=vmax,
                            use_axis=ax, cb_ticklabels=cb_labels, cb_ticks = np.arange(4)+1,
                            title='{}'.format(title))
                del bpix_code
            else:

                heatmapPlot(data, add_panels=False, cmap='viridis',
                            y_label='Rows', x_label='Columns',
                            lut_label=label, vmax=vmax,
                            use_axis=ax,
                            title='{}'.format(title))

    for qm in res:
        for iconst, const in enumerate(['Offset', 'Noise']):
            data = res[qm][const]
            dataBP = np.copy(data)
            dataBP[res[qm]['BadPixelsDark'] > 0] = -1

            x_ranges = [[0, 1500], [0, 40]]
            hn, cn = np.histogram(
                data[:, :, :, gain], bins=100, range=x_ranges[iconst])
            hnBP, cnBP = np.histogram(dataBP[:, :, :, gain], bins=cn)

            d = [{'x': cn[:-1],
                  'y': hn,
                  'drawstyle': 'steps-pre',
                  'label': 'All data',
                  },
                 {'x': cnBP[:-1],
                  'y': hnBP,
                  'drawstyle': 'steps-pre',
                  'label': 'Bad pixels masked',
                  },
                 ]

            ax = fig.add_subplot(325+iconst)
            _ = simplePlot(d, figsize=(5, 7), aspect=1,
                                x_label="{} value [ADU]".format(const),
                                y_label="# of occurance",
                                title='', legend_pad=0.1, legend_size='10%',
                                use_axis=ax,
                                y_log=True, legend='outside-top-2col-frame')

    plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:


 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.NOISE_OUT_OF_THRESHOLD.value: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR.value: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value | BadPixels.NOISE_OUT_OF_THRESHOLD.value: ('MIXED', '#DD00DD80')}

 if high_res_badpix_3d:
    display(Markdown("""

    ## Global Bad Pixel Behaviour ##

    The following plots shows the results of a bad pixel evaluation for all evaluated memory cells.
    Cells are stacked in the Z-dimension, while pixels values in x/y are re-binned with a factor of 2.
    This excludes single bad pixels present only in disconnected pixels.
    Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
    Colors encode the bad pixel type, or mixed type.

        """))
    # Switch rebin to 1 for full resolution and
    # no interpolation for badpixel values.
    rebin = 2
    for gain in range(3):
        display(Markdown('### Bad pixel behaviour - {} gain ###'.format(gain_names[gain])))
        for mod, data in badpix_g.items():
            plot_badpix_3d(data[...,gain], cols, title='', rebin_fac=rebin)
            ax = plt.gca()
            leg = ax.get_legend()
            leg.set(alpha=0.5)
        plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Summary across tiles ##

 Plots give an overview of calibration constants averaged across tiles. A bad pixel mask is applied. Constants are compared with pre-existing constants retrieved from the calibration database. Differences $\Delta$ between the old and new constants is shown.

 %% Cell type:code id: tags:

 ``` python
 time_summary = []
 time_summary.append(f"The following pre-existing constants are used for comparison:")
 for qm, qm_data in old_mdata.items():
    time_summary.append(f"- Module {qm}")
    for const, const_data in qm_data.items():
        time_summary.append(f"    - {const} created at {const_data['timestamp']}")
 display(Markdown("\n".join(time_summary)))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Loop over modules, constants
 for qm in res:
    for gain in range(3):
        display(Markdown('### Summary across tiles - {} gain'.format(gain_names[gain])))

        for const in res[qm]:
            data = np.copy(res[qm][const][:, :, :, gain])

            label = 'Fraction of bad pixels'

            if const != 'BadPixelsDark':
                data[badpix_g[qm][:, :, :, gain] > 0] = np.nan
                label = '{} value [ADU]'.format(const)
            else:
                data[data>0] = 1.0

            data = data.reshape(
                int(data.shape[0] / 32),
                32,
                int(data.shape[1] / 128),
                128,
                data.shape[2])
            data = np.nanmean(data, axis=(1, 3)).swapaxes(
                0, 2).reshape(512, 16)

            fig = plt.figure(figsize=(15, 6))
            ax = fig.add_subplot(121)

            _ = heatmapPlot(data[:510, :], add_panels=True,
                            y_label='Momery Cell ID', x_label='Tile ID',
                            lut_label=label, use_axis=ax,
                            panel_y_label=label, panel_x_label=label,
                            cmap='viridis',  # cb_loc='right',cb_aspect=15,
                            x_ticklabels=np.arange(16)+1,
                            x_ticks=np.arange(16)+0.5)

            if old_const[qm][const] is not None:
                ax = fig.add_subplot(122)

                dataold = np.copy(old_const[qm][const][:, :, :, gain])

                label = '$\Delta$ {}'.format(label)

                if const != 'BadPixelsDark':
                    if old_const[qm]['BadPixelsDark'] is not None:
                        dataold[old_const[qm]['BadPixelsDark'][:, :, :, gain] > 0] = np.nan
                    else:
                        dataold[:] = np.nan
                else:
                    dataold[dataold>0]=1.0

                dataold = dataold.reshape(
                    int(dataold.shape[0] / 32),
                    32,
                    int(dataold.shape[1] / 128),
                    128,
                    dataold.shape[2])
                dataold = np.nanmean(dataold, axis=(
                    1, 3)).swapaxes(0, 2).reshape(512, 16)
                dataold = dataold - data

                _ = heatmapPlot(dataold[:510, :], add_panels=True,
                                y_label='Momery Cell ID', x_label='Tile ID',
                                lut_label=label, use_axis=ax,
                                panel_y_label=label, panel_x_label=label,
                                cmap='viridis',  # cb_loc='right',cb_aspect=15,
                                x_ticklabels=np.arange(16)+1,
                                x_ticks=np.arange(16)+0.5)
        plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Variation of offset and noise across Tiles and ASICs ##

 The following plots show a standard deviation $\sigma$ of the calibration constant. The plot of standard deviations across tiles show pixels of one tile ($128 \times 32$). Value for each pixel shows a standard deviation across 16 tiles. The standard deviation across ASICs are shown overall tiles. The plot shows pixels of one ASIC ($16 \times 32$), where the value shows a standard deviation across all ACIS of the module.

 %% Cell type:code id: tags:

 ``` python
 # Loop over modules, constants
 for qm in res:
    for gain in range(3):
        display(Markdown('### Variation of offset and noise across ASICs - {} gain'.format(gain_names[gain])))

        fig = plt.figure(figsize=(15, 6))
        for iconst, const in enumerate(['Offset', 'Noise']):
            data = np.copy(res[qm][const][:, :, :, gain])
            data[badpix_g[qm][:, :, :, gain] > 0] = np.nan
            label = '$\sigma$ {} [ADU]'.format(const)

            dataA = np.nanmean(data, axis=2)  # average over cells
            dataA = dataA.reshape(8, 32, 16, 16)
            dataA = np.nanstd(dataA, axis=(0, 2))  # average across ASICs

            ax = fig.add_subplot(121+iconst)
            _ = heatmapPlot(dataA, add_panels=True,
                            y_label='rows', x_label='columns',
                            lut_label=label, use_axis=ax,
                            panel_y_label=label, panel_x_label=label,
                            cmap='viridis'
                            )

        plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Loop over modules, constants
 for qm in res:
    for gain in range(3):
        display(Markdown('### Variation of offset and noise across tiles - {} gain'.format(gain_names[gain])))

        fig = plt.figure(figsize=(15, 6))
        for iconst, const in enumerate(['Offset', 'Noise']):
            data = np.copy(res[qm][const][:, :, :, gain])
            data[badpix_g[qm][:, :, :, gain] > 0] = np.nan
            label = '$\sigma$ {} [ADU]'.format(const)

            dataT = data.reshape(
                int(data.shape[0] / 32),
                32,
                int(data.shape[1] / 128),
                128,
                data.shape[2])
            dataT = np.nanstd(dataT, axis=(0, 2))
            dataT = np.nanmean(dataT, axis=2)

            ax = fig.add_subplot(121+iconst)
            _ = heatmapPlot(dataT, add_panels=True,
                            y_label='rows', x_label='columns',
                            lut_label=label, use_axis=ax,
                            panel_y_label=label, panel_x_label=label,
                            cmap='viridis')
        plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Aggregate values and per cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per-cell behavior, averaged across pixels.

 %% Cell type:code id: tags:

 ``` python
 # Loop over modules, constants
 for qm in res:
    for gain in range(3):
        display(Markdown('### Mean over pixels - {} gain'.format(gain_names[gain])))

        fig = plt.figure(figsize=(9,11))

        for iconst, const in enumerate(res[qm]):

            ax = fig.add_subplot(311+iconst)

            data = res[qm][const][:,:,:510,gain]
            if const == 'BadPixelsDark':
                data[data>0] = 1.0

            dataBP = np.copy(data)
            dataBP[badpix_g[qm][:,:,:510,gain]>0] = -10

            data = np.nanmean(data, axis=(0,1))
            dataBP = np.nanmean(dataBP, axis=(0,1))

            d = [{'y': data,
                  'x': np.arange(data.shape[0]),
                  'drawstyle': 'steps-mid',
                  'label' : 'All data'
                 }
                ]

            if const != 'BadPixelsDark':
                d.append({'y': dataBP,
                  'x': np.arange(data.shape[0]),
                  'drawstyle': 'steps-mid',
                  'label' : 'good pixels only'
                 })
                y_title = "{} value [ADU]".format(const)
                title = "{} value, {} gain".format(const, gain_names[gain])
            else:
                y_title = "Fraction of Bad Pixels"
                title = "Fraction of Bad Pixels, {} gain".format(gain_names[gain])

            data_min = np.min([data, dataBP])if const != 'BadPixelsDark' else np.min([data])
            data_max = np.max([data[20:], dataBP[20:]])
            data_dif = data_max - data_min

            local_max = np.max([data[200:300], dataBP[200:300]])
            frac = 0.35
            new_max = (local_max - data_min*(1-frac))/frac
            new_max = np.max([data_max, new_max])

            _ = simplePlot(d, figsize=(10,10), aspect=2, xrange=(-12, 510),
                              x_label = 'Memory Cell ID',
                              y_label=y_title, use_axis=ax,
                              title=title,
                              title_position=[0.5, 1.15],
                              inset='xy-coord-right', inset_x_range=(0,20), inset_indicated=True,
                              inset_labeled=True, inset_coord=[0.2,0.5,0.6,0.95],
                                inset_lw = 1.0, y_range = [data_min-data_dif*0.05, new_max+data_dif*0.05],
                              y_log=False, legend='outside-top-ncol2-frame', legend_size='18%',
                                 legend_pad=0.00)

            plt.tight_layout(pad=1.08, h_pad=0.35)

        plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 table = []
 bits = [
    BadPixels.NOISE_OUT_OF_THRESHOLD,
    BadPixels.OFFSET_OUT_OF_THRESHOLD,
    BadPixels.OFFSET_NOISE_EVAL_ERROR
 ]

 for qm in res:
    for gain in range(3):

        l_data = []
        l_data_old = []

        data = np.copy(res[qm]['BadPixelsDark'][:,:,:,gain])
        l_data.append(len(data[data>0].flatten()))
        for bit in bits:
            l_data.append(np.count_nonzero(badpix_g[qm][:,:,:,gain] & bit.value))

        if old_const[qm]['BadPixelsDark'] is not None:
            old_const[qm]['BadPixelsDark'] = old_const[qm]['BadPixelsDark'].astype(np.uint32)
            dataold = np.copy(old_const[qm]['BadPixelsDark'][:, :, :, gain])
            l_data_old.append(len(dataold[dataold>0].flatten()))
            for bit in bits:
                l_data_old.append(np.count_nonzero(old_const[qm]['BadPixelsDark'][:, :, :, gain] & bit.value))

        l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',
                       'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR']

        l_threshold = ['', f'{thresholds_noise_sigma}', f'{thresholds_offset_sigma}',
                       f'{thresholds_offset_hard}/{thresholds_noise_hard}']

        for i in range(len(l_data)):
            line = [f'{l_data_name[i]}, gain {gain_names[gain]}', l_threshold[i], l_data[i]]

            if old_const[qm]['BadPixelsDark'] is not None:
                line += [l_data_old[i]]
            else:
                line += ['-']

            table.append(line)
        table.append(['', '', '', ''])

 display(Markdown('''

 ### Number of bad pixels ###

 One pixel can be bad for different reasons, therefore, the sum of all types of bad pixels can be more than the number of all bad pixels.

 '''))
 if len(table)>0:
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Pixel type", "Threshold",
                                              "New constant", "Old constant"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 for const in ['Offset', 'Noise']:
    table = [['','High gain', 'High gain', 'Medium gain', 'Medium gain', 'Low gain', 'Low gain']]
    for qm in res:

        data = np.copy(res[qm][const])
        data[res[qm]['BadPixelsDark']>0] = np.nan

        if old_const[qm][const] is not None and old_const[qm]['BadPixelsDark'] is not None :
            dataold = np.copy(old_const[qm][const])
            dataold[old_const[qm]['BadPixelsDark']>0] = np.nan

        f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
        n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

        for i, f in enumerate(f_list):
            line = [n_list[i]]
            for gain in range(3):
                line.append('{:6.1f}'.format(f(data[...,gain])))
                if old_const[qm][const] is not None and old_const[qm]['BadPixelsDark'] is not None:
                    line.append('{:6.1f}'.format(f(dataold[...,gain])))
                else:
                    line.append('-')

            table.append(line)

    display(Markdown('### {} [ADU], good pixels only ###'.format(const)))
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
 ```

 %% Cell type:markdown id: tags:

 # LPD Offset, Noise and Dead Pixels Characterization #

 Author: M. Karnevskiy, S. Hauf

 This notebook performs re-characterize of dark images to derive offset, noise and bad-pixel maps. All three types of constants are evaluated per-pixel and per-memory cell.

 The notebook will correctly handle veto settings, but note that if you veto cells you will not be able to use these offsets for runs with different veto settings - vetoed cells will have zero offset.

 The evaluated calibration constants are stored locally and injected in the calibration data base.


 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/FXE/202304/p003338/raw" # path to input data, required
 out_folder = "/gpfs/exfel/data/scratch/kluyvert/lpd-darks-p3338-r133-134-135/" # path to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 modules = [-1] # list of modules to evaluate, RANGE ALLOWED
-run_high = 133 # run number in which high gain data was recorded, required
-run_med = 134 # run number in which medium gain data was recorded, required
-run_low = 135 # run number in which low gain data was recorded, required
+run_high = 133 # run number in which high gain data was recorded
+run_med = 134 # run number in which medium gain data was recorded
+run_low = 135 # run number in which low gain data was recorded
 run = 88
 operation_mode = 'LPD_ADAPTIVE_GAIN'

 karabo_id = "FXE_DET_LPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 source_name = "{}/DET/{}CH0:xtdf"  # Source name for raw detector data - filled with karabo_id & module number
 ctrl_src_template =  "{}/COMP/FEM_MDL_COMP"  # Control device source name template.

 use_dir_creation_date = True # use the creation date of the directory for database time derivation
 cal_db_interface = "tcp://max-exfl-cal001:8015#8025" # the database interface to use
 cal_db_timeout = 300000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database

 capacitor_setting = 5 # capacitor_setting for which data was taken
 mem_cells = 512 # number of memory cells used
 bias_voltage = 250 # detector bias voltage
 thresholds_offset_sigma = 3. # bad pixel relative threshold in terms of n sigma offset
 thresholds_offset_hard = [400, 1500] # bad pixel hard threshold
 thresholds_noise_sigma = 7. # bad pixel relative threshold in terms of n sigma noise
 thresholds_noise_hard = [1, 35] # bad pixel hard threshold

 ntrains = 500  # maximum number of trains to use in each gain stage
 skip_first_ntrains = 10  # Number of first trains to skip
 min_trains = 370  # minimum number of trains needed for each gain stage
+drop_last_frames_parallelgain = 1  # Discard last N frames of each gain stage in parallel gain mode
+bad_gain_tolerance_parallelgain = 0.2  # Error if more than this proportion of pixels are in the wrong gain stage in parallel gain mode

 # Parameters for plotting
 skip_plots = False  # exit after writing corrected files

 high_res_badpix_3d = False # plot bad-pixel summary in high resolution
 test_for_normality = False # permorm normality test
 inject_cell_order = 'auto'  # Include memory cell order as part of the detector condition: auto/always/never
 ```

 %% Cell type:code id: tags:

 ``` python
 import copy
 import multiprocessing
 import warnings
 from datetime import datetime
 from functools import partial
 from logging import warning
 from pathlib import Path

 warnings.filterwarnings('ignore')

 import dateutil.parser
 import matplotlib
 import pasha as psh
 import scipy.stats
 from IPython.display import Latex, Markdown, display

 matplotlib.use("agg")
 import matplotlib.patches as patches
 import matplotlib.pyplot as plt

 %matplotlib inline
 import numpy as np
 import tabulate
 import yaml
 from iCalibrationDB import Conditions, Constants, Detectors, Versions
 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.simpleplot import simplePlot
-from extra_data import RunDirectory
+from extra_data import RunDirectory, by_id

 from cal_tools.enums import BadPixels
 from cal_tools.lpdlib import (
    make_cell_order_condition,
    sort_dark_runs_by_gain,
 )
 from cal_tools.plotting import (
    create_constant_overview,
    plot_badpix_3d,
    show_overview,
    show_processed_modules,
 )
 from cal_tools.tools import (
    get_dir_creation_date,
    get_from_db,
    get_notebook_name,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    map_gain_stages,
    module_index_to_qm,
    parse_runs,
    raw_data_location_string,
    reorder_axes,
    run_prop_seq_from_path,
    save_const_to_h5,
    send_to_db,
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 gains = np.arange(3)
 max_cells = mem_cells
 cells = np.arange(max_cells)
 gain_names = ['High', 'Medium', 'Low']

 is_adaptive_gain = False
 is_parallel_gain = False

 if operation_mode == 'LPD_ADAPTIVE_GAIN':
    is_adaptive_gain = True

    # Check dark runs order and sort if needed.
    run_nums = sort_dark_runs_by_gain(
        raw_folder=in_folder,
        runs=[run_high, run_med, run_low],
        ctrl_src=ctrl_src_template.format(karabo_id))
 elif operation_mode == 'LPD_PARALLEL_GAIN':
    is_parallel_gain = True
    run_nums = [run, run, run]
 else:
    raise ValueError('Invalid LPD operation mode')

 for run_num in set(run_nums):  # Avoid visiting identical runs more than once.
    dc = RunDirectory(Path(in_folder, f'r{run_num:04d}'))

    try:
        gain_override = bool(dc[ctrl_src_template.format(karabo_id)].run_value('femAsicGainOverride'))
    except KeyError:
        # Data taken before August 2024 is missing femAsicGainOverride on the FEM
        # control device, prevent using parallel gain mode on this data. There are
        # only a very small number of parallel gain test runs from before that date.

        if is_parallel_gain:
            raise ValueError('Control device is lacking data to verify detector '
                             'is operating in parallel gain mode') from None

        continue

    if gain_override != is_parallel_gain:
        gain_str = 'parallel gain' if gain_override else 'adaptive gain'
        raise ValueError(f'Mismatch between chosen operation mode {operation_mode} '
                         f'and control data indicating {gain_str}') from None

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(16))
    karabo_da = ['LPD{:02d}'.format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 capacitor_setting_s = f'{capacitor_setting}pf'

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_nums[0])
    print("Using {} as creation time".format(creation_time))

 if inject_cell_order not in {'auto', 'always', 'never'}:
    raise ValueError("inject_cell_order must be auto/always/never")

 run, prop, seq = run_prop_seq_from_path(in_folder)

 cal_db_interface = get_random_db_interface(cal_db_interface)

 display(Markdown('## Evaluated parameters'))
 print('CalDB Interface {}'.format(cal_db_interface))
 print("Proposal: {}".format(prop))
 print("Memory cells: {}/{}".format(mem_cells, max_cells))
 print("Run(s): {}".format(run_nums))
 print("Using DB: {}".format(db_output))
 print("Input: {}".format(in_folder))
 print("Output: {}".format(out_folder))
 print("Bias voltage: {}V".format(bias_voltage))
 print(f"Capacitor setting: {capacitor_setting_s}")
 print(f'Parallel gain: {is_parallel_gain}')
 ```

 %% Cell type:markdown id: tags:

 ## Data processing

 %% Cell type:code id: tags:

 ``` python
+def find_common_cell_pattern(srcdata):
+    """The memory cell pattern is not always entirely consistent (seen in p6936 r52, parallel gain mode)
+
+    Identify the most common cell pattern, and select data matching that.
+    """
+    cell_ids = srcdata['image.cellId'].drop_empty_trains()[skip_first_ntrains:]
+
+    tids_by_cell_pattern = {}
+
+    # Count up cell ID patterns per train
+    for tid, cid_arr in cell_ids.trains():
+        if cid_arr.ndim > 1:
+            cid_arr = cid_arr[:, 0]
+        if is_parallel_gain:
+            if len(cid_arr) % 3 != 0:
+                raise ValueError(f"{len(cid_arr)} frames in train {tid} is not divisible by 3 "
+                                 "(expected for parallel gain mode)")
+            # Only the high-gain cell IDs are valid
+            cid_arr = cid_arr[:len(cid_arr) // 3]
+            if drop_last_frames_parallelgain:
+                cid_arr = cid_arr[:-drop_last_frames_parallelgain]
+
+        tids_by_cell_pattern.setdefault(tuple(cid_arr), []).append(tid)
+
+    most_common, sel_tids = max(tids_by_cell_pattern.items(), key=lambda p: len(p[1]))
+
+    if len(sel_tids) <= (0.5 * len(cell_ids.train_ids)):
+        raise ValueError("No cell ID pattern matched over half of the trains "
+                         f"(max {len(sel_tids)} / {len(cell_ids.train_ids)}")
+
+    return np.array(most_common), sel_tids[:ntrains]
+```
+
+%% Cell type:code id: tags:
+
+``` python
 parallel_num_procs = min(6, len(modules)*3)
 parallel_num_threads = multiprocessing.cpu_count() // parallel_num_procs

 # the actual characterization
 def characterize_module(run_path, channel, gg):
    run = RunDirectory(run_path, parallelize=False)
-    det_source = source_name.format(karabo_id, channel)
-    data = run[det_source, 'image.data'].drop_empty_trains()
-    data = data[skip_first_ntrains : skip_first_ntrains + ntrains]
-    cell_ids = run[det_source, 'image.cellId'].drop_empty_trains()
-    cell_ids = cell_ids[skip_first_ntrains : skip_first_ntrains + ntrains]
+    det_source = run[source_name.format(karabo_id, channel)]
+    cellid_pattern, train_ids = find_common_cell_pattern(det_source)
+    data = det_source['image.data'][by_id[train_ids]]
+    cell_ids = det_source['image.cellId'][by_id[train_ids]]

    # If there is no data available, return and expect this
    # module to be skipped later.
    if len(data.train_ids) == 0:
        return None, None, None, None, None, None, None, None
    elif len(data.train_ids) < min_trains:
        raise Exception(f"Run {run_path} only contains {len(data.train_ids)} trains, but {min_trains} required")

    im = data.ndarray()
    if im.ndim > 3:
        im = im[:, 0]  # Drop extra dimension

    cellid = cell_ids.ndarray()
-    cellid_pattern = cell_ids[0].ndarray()
    if cellid.ndim > 1:
        cellid = cellid[:, 0]
-        cellid_pattern = cellid_pattern[:, 0]

    # Split off gain bits from pixel data.
    gains = (im & 0x3000)
    gains >>= 12
    im &= 0b0000111111111111

    im = im.astype(np.float32)

    if is_parallel_gain:
-        # Frames in the transition region between the gain stages
-        # often have some pixels in the previous gain stage.
-        # The proper way would be to use np.median, but this is
-        # significantly more expensive and .mean().round() should
-        # give the same result.
-        gain_by_frame = gains.mean(axis=(1, 2)).round().astype(np.int32)
-
-        num_frames_by_gain = np.bincount(gain_by_frame)
-
-        if not np.all(num_frames_by_gain == num_frames_by_gain[0]):
-            raise ValueError(f'Unequal number of frames per gain stage '
-                             f'in parallel gain: {num_frames_by_gain}')
-        elif len(num_frames_by_gain) != 3:
-            raise ValueError(f'Parallel gain contains pixels in {len(num_frames_by_gain)} '
-                             f'gain stages, expected 3')
-
-        # Always pick cell IDs from high gain,
-        # as the other gain stages are corrupted.
-        cellid = cellid[gain_by_frame == 0]
-
-        # By this point the parallel gain assumption is valid.
-        cellid_pattern = cellid_pattern[:(len(cellid_pattern) // 3)]
+        # (trains, gains, frames)
+        frame_indexes = np.arange(len(cellid)).reshape(len(train_ids), 3, -1)

-        # Pick images for the "right" gain stage.
-        im = im[gain_by_frame == gg]
+        # The last frame in each gain stage is often bad, so we may drop it
+        if drop_last_frames_parallelgain:
+            frame_indexes = frame_indexes[..., :-drop_last_frames_parallelgain]
+
+        # Cell IDs from high gain, data from the current gain stage
+        cellid = cellid[frame_indexes[:, 0].ravel()]
+        gains = gains[frame_indexes[:, gg].ravel()]
+        im = im[frame_indexes[:, gg].ravel()]
+
+        # Discard frames with too many pixels in the wrong gain stage,
+        # and check we still have data for each cell ID.
+        INVALID_CELLID = 9999
+        wrong_gain_fraction = (gains != gg).mean(axis=(1, 2))  # fraction per frame
+        cellid[wrong_gain_fraction > bad_gain_tolerance_parallelgain] = INVALID_CELLID
+        cellids_missing = cellid_pattern[~np.isin(cellid_pattern, cellid)]
+        if len(cellids_missing):
+            raise ValueError(f"No frames left for cells {cellids_missing} "
+                             "after discarding frames with wrong gain stage")

    elif (gains != gg).any():
        raise Exception(f"Adaptive gain run {run_path} contains pixels "
                        f"in gain state other than expected {gg}")

    im = reorder_axes(im,
        from_order=('frames', 'slow_scan', 'fast_scan'),
        to_order=('fast_scan', 'slow_scan', 'frames'),
    )

    context = psh.context.ThreadContext(num_workers=parallel_num_threads)
    offset = context.alloc(shape=(im.shape[0], im.shape[1], max_cells), dtype=np.float64)
    noise = context.alloc(like=offset)
    normal_test = context.alloc(like=offset)
    def process_cell(worker_id, array_index, cc):
        idx = cellid == cc
        im_slice = im[..., idx]
        if np.any(idx):
            offset[..., cc] = np.median(im_slice, axis=2)
            noise[..., cc] = np.std(im_slice, axis=2)
            if test_for_normality:
                _, normal_test[..., cc] = scipy.stats.normaltest(
                    im[:, :, idx], axis=2)
-    context.map(process_cell, np.unique(cellid))
+    context.map(process_cell, cellid_pattern)

    # bad pixels
    bp = np.zeros(offset.shape, np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0, 1))
    offset_std = np.nanstd(offset, axis=(0, 1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[(offset < thresholds_offset_hard[0]) | (
        offset > thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0, 1))
    noise_std = np.nanstd(noise, axis=(0, 1))

    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[(noise < thresholds_noise_hard[0]) | (
        noise > thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

-    idx = (cellid == cellid[0])
+    idx = (cellid == cellid_pattern[0])
    return offset, noise, channel, gg, bp, im[12, 12, idx], normal_test, cellid_pattern
 ```

 %% Cell type:code id: tags:

 ``` python
 offset_g = {}
 noise_g = {}
 badpix_g = {}
 data_g = {}
 ntest_g = {}
 # Should be the same cell order for all modules & all gain stages
 cellid_patterns_g = {}


 inp = []
 for gg, run_num in enumerate(run_nums):
    run_path = Path(in_folder, f"r{run_num:04d}")
    for i in modules:
        inp.append((run_path, i, gg))

 with multiprocessing.Pool(processes=parallel_num_procs) as pool:
    results = pool.starmap(characterize_module, inp)

 for ir, r in enumerate(results):
    offset, noise, i, gg, bp, data, normal, cellid_pattern = r
    if data is None:
        warning(f"No data for module {i} of gain {gg}")
        skip_plots = True
        continue
    qm = module_index_to_qm(i)
    if qm not in offset_g:
        offset_g[qm] = np.zeros(offset.shape[:3] + (3,))
        print("Constant shape:", offset_g[qm].shape)
        noise_g[qm] = np.zeros_like(offset_g[qm])
        badpix_g[qm] = np.zeros_like(offset_g[qm], dtype=np.uint32)
        data_g[qm] = np.full((ntrains, 3), np.nan)
        ntest_g[qm] = np.zeros_like(offset_g[qm])
        cellid_patterns_g[qm] = cellid_pattern
    else:
        if not np.array_equal(cellid_pattern, cellid_patterns_g[qm]):
            raise ValueError("Inconsistent cell ID pattern between gain stages")


    offset_g[qm][..., gg] = offset
    noise_g[qm][..., gg] = noise
    badpix_g[qm][..., gg] = bp
    data_g[qm][:data.shape[0], gg] = data
    ntest_g[qm][..., gg] = normal

    hn, cn = np.histogram(data, bins=20)
    print(f"{gain_names[gg]} gain, Module: {qm}. "
          f"Number of processed trains per cell: {data.shape[0]}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    import sys
    print('Skipping plots')
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read report path and create file location tuple to add with the injection
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = raw_data_location_string(proposal, run_nums)

 report = get_report(metadata_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 # TODO: add db_module when received from myMDC
 # Create the modules dict of karabo_das and PDUs
 qm_dict = {}
 for i, k_da in zip(modules, karabo_da):
    qm = module_index_to_qm(i)
    qm_dict[qm] = {"karabo_da": k_da,
                   "db_module": ""}
 ```

 %% Cell type:code id: tags:

 ``` python
 from iCalibrationDB import OperatingCondition

 def parallel_gain_parameter(condition):
    if is_parallel_gain:
        pgc = OperatingCondition()
        pgc.name = 'Parallel gain'
        pgc.description = ''
        pgc.lower_deviation = 0
        pgc.upper_deviation = 0
        pgc.value = 1
        condition.add_operating_condition(pgc)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve existing constants for comparison
 clist = ["Offset", "Noise", "BadPixelsDark"]

 print('Retrieve pre-existing constants for comparison.')

 old_const = {}
 old_mdata = {}
 for qm in offset_g.keys():
    old_const[qm] = {}
    old_mdata[qm] = {}
    qm_db = qm_dict[qm]
    karabo_da = qm_db["karabo_da"]
    cellid_pattern = cellid_patterns_g[qm]

    condition = Conditions.Dark.LPD(
        memory_cells=max_cells,
        bias_voltage=bias_voltage,
        capacitor=capacitor_setting_s,
        memory_cell_order=make_cell_order_condition(inject_cell_order, cellid_pattern),
    )
    parallel_gain_parameter(condition)

    for const in clist:
        constant = getattr(Constants.LPD, const)()
        if not qm_db["db_module"]:
            # This should be used in case of running notebook
            # by a different method other than myMDC which already
            # sends CalCat info.
            qm_db["db_module"] = get_pdu_from_db(karabo_id, [karabo_da], constant,
                                                 condition, cal_db_interface,
                                                 snapshot_at=creation_time)[0]

        data, mdata = get_from_db(karabo_id, karabo_da,
                                  constant,
                                  condition, None,
                                  cal_db_interface,
                                  creation_time=creation_time,
                                  verbosity=2, timeout=cal_db_timeout)

        old_const[qm][const] = data

        if mdata is None or data is None:
            old_mdata[qm][const] = {
                "timestamp": "Not found",
                "filepath": None,
                "h5path": None
            }
        else:
            timestamp = mdata.calibration_constant_version.begin_at.isoformat()
            filepath = Path(
                mdata.calibration_constant_version.hdf5path,
                mdata.calibration_constant_version.filename
            )
            h5path = mdata.calibration_constant_version.h5path
            old_mdata[qm][const] = {
                "timestamp": timestamp,
                "filepath": str(filepath),
                "h5path": h5path
            }

    with open(f"{out_folder}/module_metadata_{qm}.yml","w") as fd:
        yaml.safe_dump(
            {
                "module": qm,
                "pdu": qm_db["db_module"],
                "old-constants": old_mdata[qm]
            }, fd)
 ```

 %% Cell type:code id: tags:

 ``` python
 res = {}
 for i in modules:
    qm = module_index_to_qm(i)

    res[qm] = {'Offset': offset_g[qm],
               'Noise': noise_g[qm],
               'BadPixelsDark': badpix_g[qm]
              }
 ```

 %% Cell type:code id: tags:

 ``` python
 # Save constants in the calibration DB
 md = None

 for qm in res:

    karabo_da = qm_dict[qm]["karabo_da"]
    db_module = qm_dict[qm]["db_module"]
    mem_cell_order = make_cell_order_condition(
        inject_cell_order, cellid_patterns_g[qm]
    )
    print("Memory cell order:", mem_cell_order)

    # Do not store empty constants
    # In case of 0 trains data_g is initiated with nans and never refilled.
    if np.count_nonzero(~np.isnan(data_g[qm]))==0:
        print(f"Constant ({qm}) would be empty, skipping saving")
        continue

    for const in res[qm]:

        dconst = getattr(Constants.LPD, const)()
        dconst.data = res[qm][const]

        # set the operating condition

        condition = Conditions.Dark.LPD(memory_cells=max_cells,
                                    bias_voltage=bias_voltage,
                                    capacitor=capacitor_setting_s,
                                    memory_cell_order=mem_cell_order,
                                   )
        parallel_gain_parameter(condition)

        if db_output:
            md = send_to_db(db_module, karabo_id, dconst, condition,
                            file_loc, report_path=report,
                            cal_db_interface=cal_db_interface,
                            creation_time=creation_time,
                            timeout=cal_db_timeout)

        if local_output:
            md = save_const_to_h5(db_module, karabo_id, dconst, condition,
                                  dconst.data, file_loc, report, creation_time, out_folder)
            print(f"Calibration constant {const} is stored locally.\n")

    print("Constants parameter conditions are:\n")
    print(f"• memory_cells: {max_cells}\n"
          f"• bias_voltage: {bias_voltage}\n"
          f"• capacitor: {capacitor_setting_s}\n"
          f"• memory cell order: {mem_cell_order}\n"
          f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:code id: tags:

 ``` python
 show_processed_modules(
    karabo_id,
    constants=None,
    mnames=[module_index_to_qm(i) for i in modules],
    mode="position"
 )
 ```

 %% Cell type:markdown id: tags:

 ## Raw pedestal distribution ##

 Distribution of a pedestal (ADUs) over trains for the pixel (12,12), memory cell 12. A median of the distribution is shown in yellow. A standard deviation is shown in red. The green line shows average over all pixels for a given memory cell and gain stage.

 %% Cell type:code id: tags:

 ``` python
 fig, grid = plt.subplots(3, 1, sharex="col", sharey="row", figsize=(10, 7))
 fig.subplots_adjust(wspace=0, hspace=0)

 for i in modules:
    qm = module_index_to_qm(i)
    if np.count_nonzero(~np.isnan(data_g[qm])) == 0:
        break
    for gain in range(3):
        data = data_g[qm][:, gain]
        offset = np.nanmedian(data)
        noise = np.nanstd(data)
        xrange = [np.nanmin(data_g[qm]), np.nanmax(data_g[qm])]
        if xrange[1] == xrange[0]:
            xrange = [0, xrange[0]+xrange[0]//2]
            nbins = data_g[qm].shape[0]
        else:
            nbins = int(xrange[1] - xrange[0])

        hn, cn = np.histogram(data, bins=nbins, range=xrange)

        grid[gain].hist(data, range=xrange, bins=nbins)
        grid[gain].plot([offset-noise, offset-noise], [0, np.nanmax(hn)],
                        linewidth=1.5, color='red',
                        label='1 $\sigma$ deviation')
        grid[gain].plot([offset+noise, offset+noise],
                        [0, np.nanmax(hn)], linewidth=1.5, color='red')
        grid[gain].plot([offset, offset], [0, 0],
                        linewidth=1.5, color='y', label='median')

        grid[gain].plot([np.nanmedian(offset_g[qm][:, :, 12, gain]),
                         np.nanmedian(offset_g[qm][:, :, 12, gain])],
                        [0, np.nanmax(hn)], linewidth=1.5, color='green',
                        label='average over pixels')

        grid[gain].set_xlim(xrange)
        grid[gain].set_ylim(0, np.nanmax(hn)*1.1)
        grid[gain].set_xlabel("Offset value [ADU]")
        grid[gain].set_ylabel("# of occurance")

        if gain == 0:
            leg = grid[gain].legend(
                loc='upper center', ncol=3,
                bbox_to_anchor=(0.1, 0.25, 0.7, 1.0))

        grid[gain].text(820, np.nanmax(hn)*0.4,
                        "{} gain".format(gain_names[gain]), fontsize=20)

        a = plt.axes([.125, .1, 0.775, .8], frame_on=False)
        a.patch.set_alpha(0.05)
        a.set_xlim(xrange)
        plt.plot([offset, offset], [0, 1], linewidth=1.5, color='y')
        plt.xticks([])
        plt.yticks([])

    ypos = 0.9
    x1pos = (np.nanmedian(data_g[qm][:, 0]) +
             np.nanmedian(data_g[qm][:, 2]))/2.
    x2pos = (np.nanmedian(data_g[qm][:, 2]) +
             np.nanmedian(data_g[qm][:, 1]))/2.-10

    plt.annotate("", xy=(np.nanmedian(data_g[qm][:, 0]), ypos), xycoords='data',
                 xytext=(np.nanmedian(data_g[qm][:, 2]), ypos), textcoords='data',
                 arrowprops=dict(arrowstyle="<->", connectionstyle="arc3"))

    plt.annotate('{}'.format(np.nanmedian(data_g[qm][:, 0])-np.nanmedian(data_g[qm][:, 2])),
                 xy=(x1pos, ypos), xycoords='data', xytext=(5, 5), textcoords='offset points')

    plt.annotate("", xy=(np.nanmedian(data_g[qm][:, 2]), ypos), xycoords='data',
                 xytext=(np.nanmedian(data_g[qm][:, 1]), ypos), textcoords='data',
                 arrowprops=dict(arrowstyle="<->", connectionstyle="arc3"))

    plt.annotate('{}'.format(np.nanmedian(data_g[qm][:, 2])-np.nanmedian(data_g[qm][:, 1])),
                 xy=(x2pos, ypos), xycoords='data', xytext=(5, 5), textcoords='offset points')

 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Normality test ##

 Distributions of raw pedestal values have been tested if they are normally distributed. A normality test have been performed for each pixel and each memory cell. Plots below show histogram of p-Values and a 2D distribution for the  memory cell 12.

 %% Cell type:code id: tags:

 ``` python
 # Loop over modules, constants

 if not test_for_normality:
    print('Normality test was not requested. Flag `test_for_normality` False')
 else:
    for i in modules:
        qm = module_index_to_qm(i)

        data = np.copy(ntest_g[qm][:,:,:,:])
        data[badpix_g[qm][:,:,:,:]>0] = 1.01

        hn,cn = np.histogram(data[:,:,:,0], bins=100)

        d = [{'x': np.arange(100)*0.01+0.01,
              'y': np.histogram(data[:,:,:,0], bins=100)[0],
              'drawstyle': 'steps-pre',
              'label' : 'High gain',
              },
             {'x': np.arange(100)*0.01+0.01,
              'y': np.histogram(data[:,:,:,1], bins=100)[0],
              'drawstyle': 'steps-pre',
              'label' : 'Medium gain',
              },
             {'x': np.arange(100)*0.01+0.01,
              'y': np.histogram(data[:,:,:,2], bins=100)[0],
              'drawstyle': 'steps-pre',
              'label' : 'Low gain',
              },
            ]


        fig = plt.figure(figsize=(15,15), tight_layout={'pad': 0.5, 'w_pad': 0.3})

        for gain in range(3):
            ax = fig.add_subplot(221+gain)
            heatmapPlot(data[:,:,12,gain], add_panels=False, cmap='viridis', figsize=(10,10),
                y_label='Rows', x_label='Columns',
                lut_label='p-Value',
                use_axis=ax,
                title='p-Value for cell 12, {} gain'.format(gain_names[gain]) )

        ax = fig.add_subplot(224)
        _ = simplePlot(d, #aspect=1.6,
                              x_label = "p-Value".format(gain),
                              y_label="# of occurance",
                              use_axis=ax,
                               y_log=False, legend='outside-top-ncol3-frame', legend_pad=0.05, legend_size='5%')
        ax.ticklabel_format(style='sci', axis='y', scilimits=(4,6))

 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on a sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:code id: tags:

 ``` python
 cell = 12

 for gain in range(3):
    display(
        Markdown('### Cell-12 overview - {} gain'.format(gain_names[gain])))

    fig = plt.figure(figsize=(18, 22) , tight_layout={'pad': 0.1, 'w_pad': 0.1})
    for qm in res:
        for iconst, const in enumerate(['Offset', 'Noise', 'BadPixelsDark']):

            ax = fig.add_subplot(321+iconst)

            data = res[qm][const][:, :, 12, gain]
            vmax = 1.5 * np.nanmedian(res[qm][const][:, :, 12, gain])
            title = const
            label = '{} value [ADU]'.format(const)
            title = '{} value'.format(const)
            if const == 'BadPixelsDark':
                vmax = 4
                bpix_code = data.astype(np.float32)
                bpix_code[bpix_code == 0] = np.nan
                title = 'Bad pixel code'
                label = title

                cb_labels = ['1 {}'.format(BadPixels.NOISE_OUT_OF_THRESHOLD.name),
                             '2 {}'.format(BadPixels.OFFSET_NOISE_EVAL_ERROR.name),
                             '3 {}'.format(BadPixels.OFFSET_OUT_OF_THRESHOLD.name),
                             '4 {}'.format('MIXED')]

                heatmapPlot(bpix_code, add_panels=False, cmap='viridis',
                            y_label='Rows', x_label='Columns',
                            lut_label='', vmax=vmax,
                            use_axis=ax, cb_ticklabels=cb_labels, cb_ticks = np.arange(4)+1,
                            title='{}'.format(title))
                del bpix_code
            else:

                heatmapPlot(data, add_panels=False, cmap='viridis',
                            y_label='Rows', x_label='Columns',
                            lut_label=label, vmax=vmax,
                            use_axis=ax,
                            title='{}'.format(title))

    for qm in res:
        for iconst, const in enumerate(['Offset', 'Noise']):
            data = res[qm][const]
            dataBP = np.copy(data)
            dataBP[res[qm]['BadPixelsDark'] > 0] = -1

            x_ranges = [[0, 1500], [0, 40]]
            hn, cn = np.histogram(
                data[:, :, :, gain], bins=100, range=x_ranges[iconst])
            hnBP, cnBP = np.histogram(dataBP[:, :, :, gain], bins=cn)

            d = [{'x': cn[:-1],
                  'y': hn,
                  'drawstyle': 'steps-pre',
                  'label': 'All data',
                  },
                 {'x': cnBP[:-1],
                  'y': hnBP,
                  'drawstyle': 'steps-pre',
                  'label': 'Bad pixels masked',
                  },
                 ]

            ax = fig.add_subplot(325+iconst)
            _ = simplePlot(d, figsize=(5, 7), aspect=1,
                                x_label="{} value [ADU]".format(const),
                                y_label="# of occurance",
                                title='', legend_pad=0.1, legend_size='10%',
                                use_axis=ax,
                                y_log=True, legend='outside-top-2col-frame')

    plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:


 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.NOISE_OUT_OF_THRESHOLD.value: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR.value: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value | BadPixels.NOISE_OUT_OF_THRESHOLD.value: ('MIXED', '#DD00DD80')}

 if high_res_badpix_3d:
    display(Markdown("""

    ## Global Bad Pixel Behaviour ##

    The following plots shows the results of a bad pixel evaluation for all evaluated memory cells.
    Cells are stacked in the Z-dimension, while pixels values in x/y are re-binned with a factor of 2.
    This excludes single bad pixels present only in disconnected pixels.
    Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
    Colors encode the bad pixel type, or mixed type.

        """))
    # Switch rebin to 1 for full resolution and
    # no interpolation for badpixel values.
    rebin = 2
    for gain in range(3):
        display(Markdown('### Bad pixel behaviour - {} gain ###'.format(gain_names[gain])))
        for mod, data in badpix_g.items():
            plot_badpix_3d(data[...,gain], cols, title='', rebin_fac=rebin)
            ax = plt.gca()
            leg = ax.get_legend()
            leg.set(alpha=0.5)
        plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Summary across tiles ##

 Plots give an overview of calibration constants averaged across tiles. A bad pixel mask is applied. Constants are compared with pre-existing constants retrieved from the calibration database. Differences $\Delta$ between the old and new constants is shown.

 %% Cell type:code id: tags:

 ``` python
 time_summary = []
 time_summary.append(f"The following pre-existing constants are used for comparison:")
 for qm, qm_data in old_mdata.items():
    time_summary.append(f"- Module {qm}")
    for const, const_data in qm_data.items():
        time_summary.append(f"    - {const} created at {const_data['timestamp']}")
 display(Markdown("\n".join(time_summary)))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Loop over modules, constants
 for qm in res:
    for gain in range(3):
        display(Markdown('### Summary across tiles - {} gain'.format(gain_names[gain])))

        for const in res[qm]:
            data = np.copy(res[qm][const][:, :, :, gain])

            label = 'Fraction of bad pixels'

            if const != 'BadPixelsDark':
                data[badpix_g[qm][:, :, :, gain] > 0] = np.nan
                label = '{} value [ADU]'.format(const)
            else:
                data[data>0] = 1.0

            data = data.reshape(
                int(data.shape[0] / 32),
                32,
                int(data.shape[1] / 128),
                128,
                data.shape[2])
            data = np.nanmean(data, axis=(1, 3)).swapaxes(
                0, 2).reshape(512, 16)

            fig = plt.figure(figsize=(15, 6))
            ax = fig.add_subplot(121)

            _ = heatmapPlot(data[:510, :], add_panels=True,
                            y_label='Momery Cell ID', x_label='Tile ID',
                            lut_label=label, use_axis=ax,
                            panel_y_label=label, panel_x_label=label,
                            cmap='viridis',  # cb_loc='right',cb_aspect=15,
                            x_ticklabels=np.arange(16)+1,
                            x_ticks=np.arange(16)+0.5)

            if old_const[qm][const] is not None:
                ax = fig.add_subplot(122)

                dataold = np.copy(old_const[qm][const][:, :, :, gain])

                label = '$\Delta$ {}'.format(label)

                if const != 'BadPixelsDark':
                    if old_const[qm]['BadPixelsDark'] is not None:
                        dataold[old_const[qm]['BadPixelsDark'][:, :, :, gain] > 0] = np.nan
                    else:
                        dataold[:] = np.nan
                else:
                    dataold[dataold>0]=1.0

                dataold = dataold.reshape(
                    int(dataold.shape[0] / 32),
                    32,
                    int(dataold.shape[1] / 128),
                    128,
                    dataold.shape[2])
                dataold = np.nanmean(dataold, axis=(
                    1, 3)).swapaxes(0, 2).reshape(512, 16)
                dataold = dataold - data

                _ = heatmapPlot(dataold[:510, :], add_panels=True,
                                y_label='Momery Cell ID', x_label='Tile ID',
                                lut_label=label, use_axis=ax,
                                panel_y_label=label, panel_x_label=label,
                                cmap='viridis',  # cb_loc='right',cb_aspect=15,
                                x_ticklabels=np.arange(16)+1,
                                x_ticks=np.arange(16)+0.5)
        plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Variation of offset and noise across Tiles and ASICs ##

 The following plots show a standard deviation $\sigma$ of the calibration constant. The plot of standard deviations across tiles show pixels of one tile ($128 \times 32$). Value for each pixel shows a standard deviation across 16 tiles. The standard deviation across ASICs are shown overall tiles. The plot shows pixels of one ASIC ($16 \times 32$), where the value shows a standard deviation across all ACIS of the module.

 %% Cell type:code id: tags:

 ``` python
 # Loop over modules, constants
 for qm in res:
    for gain in range(3):
        display(Markdown('### Variation of offset and noise across ASICs - {} gain'.format(gain_names[gain])))

        fig = plt.figure(figsize=(15, 6))
        for iconst, const in enumerate(['Offset', 'Noise']):
            data = np.copy(res[qm][const][:, :, :, gain])
            data[badpix_g[qm][:, :, :, gain] > 0] = np.nan
            label = '$\sigma$ {} [ADU]'.format(const)

            dataA = np.nanmean(data, axis=2)  # average over cells
            dataA = dataA.reshape(8, 32, 16, 16)
            dataA = np.nanstd(dataA, axis=(0, 2))  # average across ASICs

            ax = fig.add_subplot(121+iconst)
            _ = heatmapPlot(dataA, add_panels=True,
                            y_label='rows', x_label='columns',
                            lut_label=label, use_axis=ax,
                            panel_y_label=label, panel_x_label=label,
                            cmap='viridis'
                            )

        plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Loop over modules, constants
 for qm in res:
    for gain in range(3):
        display(Markdown('### Variation of offset and noise across tiles - {} gain'.format(gain_names[gain])))

        fig = plt.figure(figsize=(15, 6))
        for iconst, const in enumerate(['Offset', 'Noise']):
            data = np.copy(res[qm][const][:, :, :, gain])
            data[badpix_g[qm][:, :, :, gain] > 0] = np.nan
            label = '$\sigma$ {} [ADU]'.format(const)

            dataT = data.reshape(
                int(data.shape[0] / 32),
                32,
                int(data.shape[1] / 128),
                128,
                data.shape[2])
            dataT = np.nanstd(dataT, axis=(0, 2))
            dataT = np.nanmean(dataT, axis=2)

            ax = fig.add_subplot(121+iconst)
            _ = heatmapPlot(dataT, add_panels=True,
                            y_label='rows', x_label='columns',
                            lut_label=label, use_axis=ax,
                            panel_y_label=label, panel_x_label=label,
                            cmap='viridis')
        plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Aggregate values and per cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per-cell behavior, averaged across pixels.

 %% Cell type:code id: tags:

 ``` python
 # Loop over modules, constants
 for qm in res:
    for gain in range(3):
        display(Markdown('### Mean over pixels - {} gain'.format(gain_names[gain])))

        fig = plt.figure(figsize=(9,11))

        for iconst, const in enumerate(res[qm]):

            ax = fig.add_subplot(311+iconst)

            data = res[qm][const][:,:,:510,gain]
            if const == 'BadPixelsDark':
                data[data>0] = 1.0

            dataBP = np.copy(data)
            dataBP[badpix_g[qm][:,:,:510,gain]>0] = -10

            data = np.nanmean(data, axis=(0,1))
            dataBP = np.nanmean(dataBP, axis=(0,1))

            d = [{'y': data,
                  'x': np.arange(data.shape[0]),
                  'drawstyle': 'steps-mid',
                  'label' : 'All data'
                 }
                ]

            if const != 'BadPixelsDark':
                d.append({'y': dataBP,
                  'x': np.arange(data.shape[0]),
                  'drawstyle': 'steps-mid',
                  'label' : 'good pixels only'
                 })
                y_title = "{} value [ADU]".format(const)
                title = "{} value, {} gain".format(const, gain_names[gain])
            else:
                y_title = "Fraction of Bad Pixels"
                title = "Fraction of Bad Pixels, {} gain".format(gain_names[gain])

            data_min = np.min([data, dataBP])if const != 'BadPixelsDark' else np.min([data])
            data_max = np.max([data[20:], dataBP[20:]])
            data_dif = data_max - data_min

            local_max = np.max([data[200:300], dataBP[200:300]])
            frac = 0.35
            new_max = (local_max - data_min*(1-frac))/frac
            new_max = np.max([data_max, new_max])

            _ = simplePlot(d, figsize=(10,10), aspect=2, xrange=(-12, 510),
                              x_label = 'Memory Cell ID',
                              y_label=y_title, use_axis=ax,
                              title=title,
                              title_position=[0.5, 1.15],
                              inset='xy-coord-right', inset_x_range=(0,20), inset_indicated=True,
                              inset_labeled=True, inset_coord=[0.2,0.5,0.6,0.95],
                                inset_lw = 1.0, y_range = [data_min-data_dif*0.05, new_max+data_dif*0.05],
                              y_log=False, legend='outside-top-ncol2-frame', legend_size='18%',
                                 legend_pad=0.00)

            plt.tight_layout(pad=1.08, h_pad=0.35)

        plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 table = []
 bits = [
    BadPixels.NOISE_OUT_OF_THRESHOLD,
    BadPixels.OFFSET_OUT_OF_THRESHOLD,
    BadPixels.OFFSET_NOISE_EVAL_ERROR
 ]

 for qm in res:
    for gain in range(3):

        l_data = []
        l_data_old = []

        data = np.copy(res[qm]['BadPixelsDark'][:,:,:,gain])
        l_data.append(len(data[data>0].flatten()))
        for bit in bits:
            l_data.append(np.count_nonzero(badpix_g[qm][:,:,:,gain] & bit.value))

        if old_const[qm]['BadPixelsDark'] is not None:
            old_const[qm]['BadPixelsDark'] = old_const[qm]['BadPixelsDark'].astype(np.uint32)
            dataold = np.copy(old_const[qm]['BadPixelsDark'][:, :, :, gain])
            l_data_old.append(len(dataold[dataold>0].flatten()))
            for bit in bits:
                l_data_old.append(np.count_nonzero(old_const[qm]['BadPixelsDark'][:, :, :, gain] & bit.value))

        l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',
                       'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR']

        l_threshold = ['', f'{thresholds_noise_sigma}', f'{thresholds_offset_sigma}',
                       f'{thresholds_offset_hard}/{thresholds_noise_hard}']

        for i in range(len(l_data)):
            line = [f'{l_data_name[i]}, gain {gain_names[gain]}', l_threshold[i], l_data[i]]

            if old_const[qm]['BadPixelsDark'] is not None:
                line += [l_data_old[i]]
            else:
                line += ['-']

            table.append(line)
        table.append(['', '', '', ''])

 display(Markdown('''

 ### Number of bad pixels ###

 One pixel can be bad for different reasons, therefore, the sum of all types of bad pixels can be more than the number of all bad pixels.

 '''))
 if len(table)>0:
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Pixel type", "Threshold",
                                              "New constant", "Old constant"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 for const in ['Offset', 'Noise']:
    table = [['','High gain', 'High gain', 'Medium gain', 'Medium gain', 'Low gain', 'Low gain']]
    for qm in res:

        data = np.copy(res[qm][const])
        data[res[qm]['BadPixelsDark']>0] = np.nan

        if old_const[qm][const] is not None and old_const[qm]['BadPixelsDark'] is not None :
            dataold = np.copy(old_const[qm][const])
            dataold[old_const[qm]['BadPixelsDark']>0] = np.nan

        f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
        n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

        for i, f in enumerate(f_list):
            line = [n_list[i]]
            for gain in range(3):
                line.append('{:6.1f}'.format(f(data[...,gain])))
                if old_const[qm][const] is not None and old_const[qm]['BadPixelsDark'] is not None:
                    line.append('{:6.1f}'.format(f(dataold[...,gain])))
                else:
                    line.append('-')

            table.append(line)

    display(Markdown('### {} [ADU], good pixels only ###'.format(const)))
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
 ```

--- a/notebooks/LPD/LPD_Correct_Fast.ipynb
+++ b/notebooks/LPD/LPD_Correct_Fast.ipynb
 %% Cell type:markdown id: tags:

 # LPD Offline Correction #

 Author: European XFEL Data Analysis Group

 %% Cell type:code id: tags:

 ``` python
 # Input parameters
 in_folder = "/gpfs/exfel/exp/FXE/202401/p005436/raw/"  # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/kluyvert/lpd-corr-p5436-r167"  # the folder to output to, required
 metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate.
 sequences = [-1]  # Sequences to correct, use [-1] for all
 modules = [-1]  # Modules indices to correct, use [-1] for all, only used when karabo_da is empty
 karabo_da = ['']  # Data aggregators names to correct, use [''] for all
 run = 167  # run to process, required

 # Source parameters
 karabo_id = 'FXE_DET_LPD1M-1'  # Karabo domain for detector.
 input_source = '{karabo_id}/DET/{module_index}CH0:xtdf'  # Input fast data source.
 output_source = '{karabo_id}/CORR/{module_index}CH0:output'  # Output fast data source, empty to use same as input.
+control_source = '{karabo_id}/COMP/FEM_MDL_COMP'  # Control data source.
 xgm_source = 'SA1_XTD2_XGM/DOOCS/MAIN'
 xgm_pulse_count_key = 'pulseEnergy.numberOfSa1BunchesActual'

 # CalCat parameters
 creation_time = ""  # The timestamp to use with Calibration DB. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41
 cal_db_interface = ''  # Not needed, compatibility with current webservice.
 cal_db_timeout = 0  # Not needed, compatbility with current webservice.
 cal_db_root = '/gpfs/exfel/d/cal/caldb_store'  # The calibration database root path to access constant files. For example accessing constants from the test database.

 # Operating conditions
 mem_cells = 512  # Memory cells, LPD constants are always taken with 512 cells.
 bias_voltage = 250.0  # Detector bias voltage.
 capacitor = '5pF'  # Capacitor setting: 5pF or 50pF
 photon_energy = 9.2  # Photon energy in keV.
 category = 0  # Whom to blame.
 use_cell_order = 'auto'  # Whether to use memory cell order as a detector condition; auto/always/never

 # Correction parameters
 offset_corr = True  # Offset correction.
 rel_gain = True  # Gain correction based on RelativeGain constant.
 ff_map = True  # Gain correction based on FFMap constant.
 gain_amp_map = True  # Gain correction based on GainAmpMap constant.
+combine_parallel_gain = True  # Combine parallel gain images into a single frame.
+threshold_sigma_high = 5.0  # Sigma level for threshold between high and medium gain.
+threshold_sigma_mid = 100.0  # Sigma level for threshold between medium and low gain.
+drop_last_frames_parallelgain = 1  # Discard last N frames of each gain stage in parallel gain mode

 # Output options
 ignore_no_frames_no_pulses = False  # Whether to run without SA1 pulses AND frames.
 overwrite = True  # set to True if existing data should be overwritten
 chunks_data = 1  # HDF chunk size for pixel data in number of frames.
 chunks_ids = 32  # HDF chunk size for cellId and pulseId datasets.
 create_virtual_cxi_in = ''  # Folder to create virtual CXI files in (for each sequence).

 # Parallelization options
 sequences_per_node = 1  # Sequence files to process per node
 max_nodes = 8  # Maximum number of SLURM jobs to split correction work into
 num_workers = 8  # Worker processes per node, 8 is safe on 768G nodes but won't work on 512G.
 num_threads_per_worker = 32  # Number of threads per worker.

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
 ```

 %% Cell type:code id: tags:

 ``` python
-from logging import warning
 from pathlib import Path
 from time import perf_counter
+from warnings import warn
 import gc
 import re

 import numpy as np
 import h5py

 import matplotlib
 matplotlib.use('agg')
 import matplotlib.pyplot as plt
 %matplotlib inline

 import extra_data as xd
 import extra_geom as xg
 import pasha as psh
 from extra_data.components import LPD1M

 from cal_tools.calcat_interface2 import CalibrationData, LPDConditions
 import cal_tools.restful_config as rest_cfg
 from cal_tools.lpdalgs import correct_lpd_frames
 from cal_tools.lpdlib import get_mem_cell_pattern, make_cell_order_condition
 from cal_tools.tools import (
    calcat_creation_time,
    write_constants_fragment_extracal,
 )
 from cal_tools.files import DataFile
 ```

 %% Cell type:markdown id: tags:

 # Prepare environment

 %% Cell type:code id: tags:

 ``` python
 file_re = re.compile(r'^RAW-R(\d{4})-(\w+\d+)-S(\d{5})$')  # This should probably move to cal_tools

 run_folder = Path(in_folder) / f'r{run:04d}'
 out_folder = Path(out_folder)
 out_folder.mkdir(exist_ok=True)

 output_source = output_source or input_source

 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f'Using {creation_time.isoformat()} as creation time')

 # Pick all modules/aggregators or those selected.
 if karabo_da == ['']:
    if modules == [-1]:
        modules = list(range(16))
    karabo_da = [f'LPD{i:02d}' for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 # Pick all sequences or those selected.
 if not sequences or sequences == [-1]:
    do_sequence = lambda seq: True
 else:
    do_sequence = [int(x) for x in sequences].__contains__

 # List of detector sources.
 det_inp_sources = [input_source.format(karabo_id=karabo_id, module_index=int(da[-2:])) for da in karabo_da]

 if use_cell_order not in {'auto', 'always', 'never'}:
    raise ValueError("use_cell_order must be auto/always/never")
 ```

 %% Cell type:markdown id: tags:

 # Select data to process

 %% Cell type:code id: tags:

 ``` python
 data_to_process = []

 for inp_path in run_folder.glob('RAW-*.h5'):
    match = file_re.match(inp_path.stem)

    if match[2] not in karabo_da or not do_sequence(int(match[3])):
        continue

    outp_path = out_folder / 'CORR-R{run:04d}-{aggregator}-S{seq:05d}.h5'.format(
        run=int(match[1]), aggregator=match[2], seq=int(match[3]))

    data_to_process.append((match[2], inp_path, outp_path))

 print('Files to process:')
 for data_descr in sorted(data_to_process, key=lambda x: f'{x[0]}{x[1]}'):
    print(f'{data_descr[0]}\t{data_descr[1]}')

 # Collect the train ID contained in the input LPD files.
 inp_lpd_dc = xd.DataCollection.from_paths([x[1] for x in data_to_process])

 frame_count = sum([
    int(inp_lpd_dc[source, 'image.data'].data_counts(labelled=False).sum())
    for source in inp_lpd_dc.all_sources], 0)

 if frame_count == 0:
    inp_dc = xd.RunDirectory(run_folder) \
        .select_trains(xd.by_id[inp_lpd_dc.train_ids])

    try:
        pulse_count = int(inp_dc[xgm_source, xgm_pulse_count_key].ndarray().sum())
    except xd.SourceNameError:
-        warning(f'Missing XGM source `{xgm_source}`')
+        warn(f'Missing XGM source `{xgm_source}`')
        pulse_count = None
    except xd.PropertyNameError:
-        warning(f'Missing XGM pulse count key `{xgm_pulse_count_key}`')
+        warn(f'Missing XGM pulse count key `{xgm_pulse_count_key}`')
        pulse_count = None

    if pulse_count == 0 and not ignore_no_frames_no_pulses:
-        warning(f'Affected files contain neither LPD frames nor SA1 pulses '
-                f'according to {xgm_source}, processing is skipped. If this '
-                f'incorrect, please contact da-support@xfel.eu')
+        warn(f'Affected files contain neither LPD frames nor SA1 pulses '
+             f'according to {xgm_source}, processing is skipped. If this '
+             f'incorrect, please contact da-support@xfel.eu')
        from sys import exit
        exit(0)
    elif pulse_count is None:
        raise ValueError('Affected files contain no LPD frames and SA1 pulses '
                         'could not be inferred from XGM data')
    else:
        raise ValueError('Affected files contain no LPD frames but SA1 pulses')

 else:
    print(f'Total number of LPD pulses across all modules: {frame_count}')
 ```

 %% Cell type:markdown id: tags:

 # Obtain and prepare calibration constants

 %% Cell type:code id: tags:

 ``` python
 start = perf_counter()

+raw_data = xd.RunDirectory(run_folder)
+
+try:
+    parallel_gain = bool(raw_data[control_source.format(karabo_id=karabo_id)].run_value('femAsicGainOverride'))
+except KeyError:
+    warn('Missing femAsicGainOverride property FEM control device, assuming auto gain')
+    parallel_gain = False
+print('Parallel gain mode:', parallel_gain)
+
 cell_ids_pattern_s = None
 if use_cell_order != 'never':
+    mem_cell_pattern = get_mem_cell_pattern(raw_data, det_inp_sources)
+
+    if parallel_gain:
+        mem_cell_pattern = mem_cell_pattern[:len(mem_cell_pattern) // 3]
+        if drop_last_frames_parallelgain:
+            mem_cell_pattern = mem_cell_pattern[:-drop_last_frames_parallelgain]
+
    # Read the order of memory cells used
-    raw_data = xd.DataCollection.from_paths([e[1] for e in data_to_process])
-    cell_ids_pattern_s = make_cell_order_condition(
-        use_cell_order, get_mem_cell_pattern(raw_data, det_inp_sources)
-    )
+    cell_ids_pattern_s = make_cell_order_condition(use_cell_order, mem_cell_pattern)
 print("Memory cells order:", cell_ids_pattern_s)

 conditions = LPDConditions(
    sensor_bias_voltage=bias_voltage,
    memory_cells=mem_cells,
    feedback_capacitor=capacitor,
    source_energy=photon_energy,
    memory_cell_order=cell_ids_pattern_s,
+    parallel_gain=parallel_gain,
    category=category,
 )

 expected_constants = {'Offset', 'BadPixelsDark'}
 if rel_gain:
    expected_constants.add('RelativeGain')
 if ff_map:
    expected_constants.update(['FFMap', 'BadPixelsFF'])
 if gain_amp_map:
    expected_constants.add('GainAmpMap')
+if parallel_gain and combine_parallel_gain:
+    expected_constants.add('Noise')

 lpd_consts = CalibrationData.from_condition(
    conditions,
    calibrations=expected_constants,
    detector_name=karabo_id,
    event_at=creation_time,
    client=rest_cfg.extra_calibration_client(),
 ).select_modules(
    aggregator_names=karabo_da
 ).require_calibrations(
    ['Offset']
 )

 total_time = perf_counter() - start
 print(f'Looking up constants {total_time:.1f}s')

 lpd_consts.summary_table()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Validate the constants availability and raise/warn accordingly.
 if not lpd_consts.aggregator_names:  # Offset was required above
    raise Exception("Could not find offset constants for any modules, will not correct data.")

 for mod in karabo_da.copy():
    if mod not in lpd_consts["Offset"].aggregator_names:
-        warning(f"Offset constant is not available to correct {mod}.")
+        warn(f"Offset constant is not available to correct {mod}.")
        karabo_da.remove(mod)

    missing_constants = {c for c in expected_constants
                        if (c not in lpd_consts) or (mod not in lpd_consts[c].aggregator_names)}
    if missing_constants:
-        warning(f"Constants {sorted(missing_constants)} were not retrieved for {mod}.")
+        warn(f"Constants {sorted(missing_constants)} were not retrieved for {mod}.")

 # Remove skipped correction modules from data_to_process
 data_to_process = [(mod, in_f, out_f) for mod, in_f, out_f in data_to_process if mod in karabo_da]
 ```

 %% Cell type:code id: tags:

 ``` python
 # write constants metadata to fragment YAML
 write_constants_fragment_extracal(
    out_folder=(metadata_folder or out_folder),
    calib_data=lpd_consts,
    caldb_root=cal_db_root,
 )

 # Load constants data for all constants
 start = perf_counter()
 const_data = {kda: {} for kda in lpd_consts.aggregator_names}
 for cname, multimodconst in lpd_consts.items():
    arr = multimodconst.ndarray(cal_db_root, parallel=8)
    for i, kda in enumerate(multimodconst.aggregator_names):
        const_data[kda][cname] = arr[i]

 total_time = perf_counter() - start
 print(f'Loading constants {total_time:.1f}s')
 ```

 %% Cell type:code id: tags:

 ``` python
 # These are intended in order cell, X, Y, gain
 ccv_offsets = {}
+ccv_noise = {}
 ccv_gains = {}
 ccv_masks = {}

 ccv_shape = (mem_cells, 256, 256, 3)

 constant_order = {
    'Offset':        (2, 1, 0, 3),
+    'Noise':         (2, 1, 0, 3),
    'BadPixelsDark': (2, 1, 0, 3),
    'RelativeGain':  (2, 0, 1, 3),
    'FFMap':         (2, 0, 1, 3),
    'BadPixelsFF':   (2, 0, 1, 3),
    'GainAmpMap':    (2, 0, 1, 3),
 }

 def prepare_constants(wid, index, aggregator):
    consts = const_data.get(aggregator, {})
    def _prepare_data(calibration_name, dtype):
        # Some old BadPixels constants have <f8 dtype.
        # Convert nan to float 0 to avoid having 2147483648 after
        # converting float64 to uint32.
        if "BadPixels" in calibration_name and consts[calibration_name].dtype != np.uint32:
            consts[calibration_name] = np.nan_to_num(
                consts[calibration_name], nan=0.0)
        return consts[calibration_name] \
            .transpose(constant_order[calibration_name]) \
            .astype(dtype, copy=True)  # Make sure array is contiguous.

    if offset_corr and 'Offset' in consts:
        ccv_offsets[aggregator] = _prepare_data('Offset', np.float32)
    else:
        ccv_offsets[aggregator] = np.zeros(ccv_shape, dtype=np.float32)

    ccv_gains[aggregator] = np.ones(ccv_shape, dtype=np.float32)

+    if parallel_gain and combine_parallel_gain:
+        if 'Noise' in consts:
+            ccv_noise[aggregator] = _prepare_data('Noise', np.float32)
+        else:
+            raise RuntimeError('parallel gain combination requires available noise constant')
+    else:
+        ccv_noise[aggregator] = None
+
    if 'BadPixelsDark' in consts:
        ccv_masks[aggregator] = _prepare_data('BadPixelsDark', np.uint32)
    else:
        ccv_masks[aggregator] = np.zeros(ccv_shape, dtype=np.uint32)

    if 'RelativeGain' in consts:
        ccv_gains[aggregator] *= _prepare_data('RelativeGain', np.float32)

    if 'FFMap' in consts:
        ccv_gains[aggregator] *= _prepare_data('FFMap', np.float32)

        if 'BadPixelsFF' in consts:
            np.bitwise_or(ccv_masks[aggregator], _prepare_data('BadPixelsFF', np.uint32),
                          out=ccv_masks[aggregator])

    if 'GainAmpMap' in consts:
        ccv_gains[aggregator] *= _prepare_data('GainAmpMap', np.float32)

-    print('.', end='', flush=True)

-
-print('Preparing constants', end='', flush=True)
 start = perf_counter()
 psh.ThreadContext(num_workers=len(karabo_da)).map(prepare_constants, karabo_da)
 total_time = perf_counter() - start
-print(f'{total_time:.1f}s')
+print(f'Preparing constants {total_time:.1f}s')

 const_data.clear()  # Clear raw constants data now to save memory.
 gc.collect();
 ```

 %% Cell type:code id: tags:

 ``` python
 def correct_file(wid, index, work):
    aggregator, inp_path, outp_path = work
    module_index = int(aggregator[-2:])

    start = perf_counter()
    dc = xd.H5File(inp_path, inc_suspect_trains=False).select('*', 'image.*', require_all=True)
    inp_source_name = input_source.format(karabo_id=karabo_id, module_index=module_index)
    inp_source = dc[inp_source_name]
    open_time = perf_counter() - start

    # Load raw data for this file.
    # Reshaping gets rid of the extra 1-len dimensions without
    # mangling the frame axis for an actual frame count of 1.
    start = perf_counter()
    in_raw = inp_source['image.data'].ndarray().reshape(-1, 256, 256)
    in_cell = inp_source['image.cellId'].ndarray().reshape(-1)
    in_pulse = inp_source['image.pulseId'].ndarray().reshape(-1)
+    frame_counts = inp_source['image.data'].data_counts(labelled=False).astype(np.int32)
    read_time = perf_counter() - start

+    parallel_gain_indices = None
+
+    if parallel_gain:
+        assert (frame_counts % 3 == 0).all(), 'frame count not divisible by 3 in parallel gain mode'
+        actual_frame_counts = frame_counts // 3 - drop_last_frames_parallelgain
+
+        if combine_parallel_gain:
+
+            # Indices map where to find each of the high/medium/low gain images for each actual
+            # frame event.
+            parallel_gain_indices = np.zeros((actual_frame_counts.sum(), 3), dtype=np.int32)
+
+            cursor_in = cursor_out = 0
+            for afc in actual_frame_counts:
+                for gg in range(3):
+                    indices = np.arange(cursor_in, cursor_in+afc)
+                    parallel_gain_indices[cursor_out : cursor_out+afc, gg] = indices
+                    # Skip over N (default 1) trailing frames in each gain stage
+                    cursor_in += afc + drop_last_frames_parallelgain
+                cursor_out += afc
+
+            assert parallel_gain_indices.max() <= in_raw.shape[0], 'gain image indices exceed raw data size'
+
+            # Pick cell and pulse IDs from high gain. This is also done if frames are not combined
+            # in order to correct corrupt tables in medium and low gain, and if needed brought back
+            # to the original shape further below.
+            in_cell = np.take(in_cell, parallel_gain_indices[:, 0])
+            in_pulse = np.take(in_pulse, parallel_gain_indices[:, 0])
+
+            # Replace supposed frame counts by actual frame counts.
+            frame_counts = actual_frame_counts
+        else:
+            # Parallel gain, but kept separate in output
+
+            # Replicate corrected cell and pulse IDs from high gain to other gains.
+            tmp_cell = np.zeros_like(in_cell)
+            tmp_pulse = np.zeros_like(in_pulse)
+            sel = np.zeros_like(in_cell)
+
+            cursor_in = cursor_out = 0
+            for afc in actual_frame_count:
+                for gg in range(3):
+                    tmp_cell[cursor_out : cursor_out+afc] = in_cell[cursor_in : cursor_in+afc]
+                    tmp_pulse[cursor_out : cursor_out+afc] = in_pulse[cursor_in : cursor_in+afc]
+                    sel[cursor_out : cursor_out+afc] = 1
+                    cursor_out += afc + drop_last_frames_parallelgain
+
+                cursor_in += afc + drop_last_frames_parallelgain
+
+            # Apply the selection to drop trailing frames
+            in_cell = tmp_cell[sel]
+            in_pulse = tmp_pulse[sel]
+            in_raw = in_raw[sel]
+
+            frame_counts -= (3 * drop_last_frames_parallelgain)
+
+            # Disable gain indices to not combine.
+            parallel_gain_indices = None
+
    # Allocate output arrays.
-    out_data = np.zeros((in_raw.shape[0], 256, 256), dtype=np.float32)
-    out_gain = np.zeros((in_raw.shape[0], 256, 256), dtype=np.uint8)
-    out_mask = np.zeros((in_raw.shape[0], 256, 256), dtype=np.uint32)
+    num_frames = frame_counts.sum()
+    out_data = np.zeros((num_frames, 256, 256), dtype=np.float32)
+    out_gain = np.zeros((num_frames, 256, 256), dtype=np.uint8)
+    out_mask = np.zeros((num_frames, 256, 256), dtype=np.uint32)

    start = perf_counter()
    correct_lpd_frames(in_raw, in_cell,
                       out_data, out_gain, out_mask,
-                       ccv_offsets[aggregator], ccv_gains[aggregator], ccv_masks[aggregator],
-                       num_threads=num_threads_per_worker)
+                       ccv_offsets[aggregator], ccv_noise[aggregator], ccv_gains[aggregator], ccv_masks[aggregator],
+                       parallel_gain_indices, threshold_sigma_high, threshold_sigma_mid,
+                       num_threads=16)
    correct_time = perf_counter() - start

-    image_counts = inp_source['image.data'].data_counts(labelled=False)
-
    start = perf_counter()
-    if (not outp_path.exists() or overwrite) and image_counts.sum() > 0:
+    if (not outp_path.exists() or overwrite) and num_frames > 0:
        outp_source_name = output_source.format(karabo_id=karabo_id, module_index=module_index)

        with DataFile(outp_path, 'w') as outp_file:
            outp_file.create_index(dc.train_ids, from_file=dc.files[0])
            outp_file.create_metadata(like=dc, instrument_channels=sorted({
                f'{outp_source_name}/image', f'{inp_source_name}/image'
            }))

            outp_source = outp_file.create_instrument_source(outp_source_name)

-            outp_source.create_index(image=image_counts)
+            outp_source.create_index(image=frame_counts)
            outp_source.create_key('image.cellId', data=in_cell,
                                   chunks=(min(chunks_ids, in_cell.shape[0]),))
            outp_source.create_key('image.pulseId', data=in_pulse,
                                   chunks=(min(chunks_ids, in_pulse.shape[0]),))
            outp_source.create_key('image.data', data=out_data,
                                   chunks=(min(chunks_data, out_data.shape[0]), 256, 256))
            outp_source.create_compressed_key('image.gain', data=out_gain)
            outp_source.create_compressed_key('image.mask', data=out_mask)

            if output_source != input_source:
                outp_file.create_legacy_source(inp_source_name, outp_source_name)

    write_time = perf_counter() - start

    total_time = open_time + read_time + correct_time + write_time
-    frame_rate = in_raw.shape[0] / total_time
+    frame_rate = num_frames / total_time

    print('{}\t{}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{}\t{:.1f}'.format(
        wid, aggregator, open_time, read_time, correct_time, write_time, total_time,
-        in_raw.shape[0], frame_rate))
+        num_frames, frame_rate))
+
+    worker_frame_counts[wid] += num_frames

    in_raw = None
    in_cell = None
    in_pulse = None
    out_data = None
    out_gain = None
    out_mask = None
    gc.collect()

 print('worker\tDA\topen\tread\tcorrect\twrite\ttotal\tframes\trate')
+ctx = psh.ProcessContext(num_workers=num_workers)
+
+worker_frame_counts = ctx.alloc(shape=(), dtype=np.int32, per_worker=True)
 start = perf_counter()
-psh.ProcessContext(num_workers=num_workers).map(correct_file, data_to_process)
+ctx.map(correct_file, data_to_process)
 total_time = perf_counter() - start
-print(f'Total time: {total_time:.1f}s')
+total_frames = worker_frame_counts.sum()
+
+print(f'Total time: {total_time:.1f}s, Mean rate: {(total_frames / total_time):.1f}s⁻¹')
 ```

 %% Cell type:markdown id: tags:

 # Data preview for first train

 %% Cell type:code id: tags:

 ``` python
 geom = xg.LPD_1MGeometry.from_quad_positions(
    [(11.4, 299), (-11.5, 8), (254.5, -16), (278.5, 275)])

 output_paths = [outp_path for _, _, outp_path in data_to_process if outp_path.exists()]

 if not output_paths:
-    warning('Data preview is skipped as there are no existing output paths')
+    warn('Data preview is skipped as there are no existing output paths')
    from sys import exit
    exit(0)

 dc = xd.DataCollection.from_paths(output_paths).select_trains(np.s_[0])

 det = LPD1M(dc, detector_name=karabo_id)
-data = det.get_array('image.data')
+data = det.get_array('image.data', unstack_pulses=False)
 ```

 %% Cell type:markdown id: tags:

 ### Intensity histogram across all cells

 %% Cell type:code id: tags:

 ``` python
 left_edge_ratio = 0.01
 right_edge_ratio = 0.99

 fig, ax = plt.subplots(num=1, clear=True, figsize=(15, 6))
 values, bins, _ = ax.hist(np.ravel(data.data), bins=2000, range=(-1500, 2000))

 def find_nearest_index(array, value):
    return (np.abs(array - value)).argmin()

 cum_values = np.cumsum(values)
 vmin = bins[find_nearest_index(cum_values, cum_values[-1]*left_edge_ratio)]
 vmax = bins[find_nearest_index(cum_values, cum_values[-1]*right_edge_ratio)]

 max_value = values.max()
 ax.vlines([vmin, vmax], 0, max_value, color='red', linewidth=5, alpha=0.2)
 ax.text(vmin, max_value, f'{left_edge_ratio*100:.0f}%',
        color='red', ha='center', va='bottom', size='large')
 ax.text(vmax, max_value, f'{right_edge_ratio*100:.0f}%',
        color='red', ha='center', va='bottom', size='large')
 ax.text(vmax+(vmax-vmin)*0.01, max_value/2, 'Colormap interval',
        color='red', rotation=90, ha='left', va='center', size='x-large')

 ax.set_xlim(vmin-(vmax-vmin)*0.1, vmax+(vmax-vmin)*0.1)
 ax.set_ylim(0, max_value*1.1)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### First memory cell

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(num=2, figsize=(15, 15), clear=True, nrows=1, ncols=1)
-geom.plot_data_fast(data[:, 0, 0], ax=ax, vmin=vmin, vmax=vmax)
+geom.plot_data_fast(data[:, 0], ax=ax, vmin=vmin, vmax=vmax)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Train average

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(num=3, figsize=(15, 15), clear=True, nrows=1, ncols=1)
-geom.plot_data_fast(data[:, 0].mean(axis=1), ax=ax, vmin=vmin, vmax=vmax)
+geom.plot_data_fast(data.mean(axis=1), ax=ax, vmin=vmin, vmax=vmax)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Lowest gain stage per pixel

 %% Cell type:code id: tags:

 ``` python
-highest_gain_stage = det.get_array('image.gain', pulses=np.s_[:]).max(axis=(1, 2))
+highest_gain_stage = det.get_array('image.gain', unstack_pulses=False).max(axis=1)

 fig, ax = plt.subplots(num=4, figsize=(15, 15), clear=True, nrows=1, ncols=1)
 p = geom.plot_data_fast(highest_gain_stage, ax=ax, vmin=0, vmax=2);

 cb = ax.images[0].colorbar
 cb.set_ticks([0, 1, 2])
 cb.set_ticklabels(['High gain', 'Medium gain', 'Low gain'])
 ```

 %% Cell type:markdown id: tags:

 ### Create virtual CXI file

 %% Cell type:code id: tags:

 ``` python
-if create_virtual_cxi_in:
+if create_virtual_cxi_in and not (parallel_gain and not combine_parallel_gain):
    vcxi_folder = Path(create_virtual_cxi_in.format(
        run=run, proposal_folder=str(Path(in_folder).parent)))
    vcxi_folder.mkdir(parents=True, exist_ok=True)

    def sort_files_by_seq(by_seq, outp_path):
        by_seq.setdefault(int(outp_path.stem[-5:]), []).append(outp_path)
        return by_seq

    from functools import reduce
    reduce(sort_files_by_seq, output_paths, output_by_seq := {})

    for seq_number, seq_output_paths in output_by_seq.items():
        # Create data collection and detector components only for this sequence.
        try:
            det = LPD1M(xd.DataCollection.from_paths(seq_output_paths), detector_name=karabo_id, min_modules=4)
        except ValueError:  # Couldn't find enough data for min_modules
            continue
        det.write_virtual_cxi(vcxi_folder / f'VCXI-LPD-R{run:04d}-S{seq_number:05d}.cxi')
 ```

 %% Cell type:markdown id: tags:

 # LPD Offline Correction #

 Author: European XFEL Data Analysis Group

 %% Cell type:code id: tags:

 ``` python
 # Input parameters
 in_folder = "/gpfs/exfel/exp/FXE/202401/p005436/raw/"  # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/kluyvert/lpd-corr-p5436-r167"  # the folder to output to, required
 metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate.
 sequences = [-1]  # Sequences to correct, use [-1] for all
 modules = [-1]  # Modules indices to correct, use [-1] for all, only used when karabo_da is empty
 karabo_da = ['']  # Data aggregators names to correct, use [''] for all
 run = 167  # run to process, required

 # Source parameters
 karabo_id = 'FXE_DET_LPD1M-1'  # Karabo domain for detector.
 input_source = '{karabo_id}/DET/{module_index}CH0:xtdf'  # Input fast data source.
 output_source = '{karabo_id}/CORR/{module_index}CH0:output'  # Output fast data source, empty to use same as input.
+control_source = '{karabo_id}/COMP/FEM_MDL_COMP'  # Control data source.
 xgm_source = 'SA1_XTD2_XGM/DOOCS/MAIN'
 xgm_pulse_count_key = 'pulseEnergy.numberOfSa1BunchesActual'

 # CalCat parameters
 creation_time = ""  # The timestamp to use with Calibration DB. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41
 cal_db_interface = ''  # Not needed, compatibility with current webservice.
 cal_db_timeout = 0  # Not needed, compatbility with current webservice.
 cal_db_root = '/gpfs/exfel/d/cal/caldb_store'  # The calibration database root path to access constant files. For example accessing constants from the test database.

 # Operating conditions
 mem_cells = 512  # Memory cells, LPD constants are always taken with 512 cells.
 bias_voltage = 250.0  # Detector bias voltage.
 capacitor = '5pF'  # Capacitor setting: 5pF or 50pF
 photon_energy = 9.2  # Photon energy in keV.
 category = 0  # Whom to blame.
 use_cell_order = 'auto'  # Whether to use memory cell order as a detector condition; auto/always/never

 # Correction parameters
 offset_corr = True  # Offset correction.
 rel_gain = True  # Gain correction based on RelativeGain constant.
 ff_map = True  # Gain correction based on FFMap constant.
 gain_amp_map = True  # Gain correction based on GainAmpMap constant.
+combine_parallel_gain = True  # Combine parallel gain images into a single frame.
+threshold_sigma_high = 5.0  # Sigma level for threshold between high and medium gain.
+threshold_sigma_mid = 100.0  # Sigma level for threshold between medium and low gain.
+drop_last_frames_parallelgain = 1  # Discard last N frames of each gain stage in parallel gain mode

 # Output options
 ignore_no_frames_no_pulses = False  # Whether to run without SA1 pulses AND frames.
 overwrite = True  # set to True if existing data should be overwritten
 chunks_data = 1  # HDF chunk size for pixel data in number of frames.
 chunks_ids = 32  # HDF chunk size for cellId and pulseId datasets.
 create_virtual_cxi_in = ''  # Folder to create virtual CXI files in (for each sequence).

 # Parallelization options
 sequences_per_node = 1  # Sequence files to process per node
 max_nodes = 8  # Maximum number of SLURM jobs to split correction work into
 num_workers = 8  # Worker processes per node, 8 is safe on 768G nodes but won't work on 512G.
 num_threads_per_worker = 32  # Number of threads per worker.

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
 ```

 %% Cell type:code id: tags:

 ``` python
-from logging import warning
 from pathlib import Path
 from time import perf_counter
+from warnings import warn
 import gc
 import re

 import numpy as np
 import h5py

 import matplotlib
 matplotlib.use('agg')
 import matplotlib.pyplot as plt
 %matplotlib inline

 import extra_data as xd
 import extra_geom as xg
 import pasha as psh
 from extra_data.components import LPD1M

 from cal_tools.calcat_interface2 import CalibrationData, LPDConditions
 import cal_tools.restful_config as rest_cfg
 from cal_tools.lpdalgs import correct_lpd_frames
 from cal_tools.lpdlib import get_mem_cell_pattern, make_cell_order_condition
 from cal_tools.tools import (
    calcat_creation_time,
    write_constants_fragment_extracal,
 )
 from cal_tools.files import DataFile
 ```

 %% Cell type:markdown id: tags:

 # Prepare environment

 %% Cell type:code id: tags:

 ``` python
 file_re = re.compile(r'^RAW-R(\d{4})-(\w+\d+)-S(\d{5})$')  # This should probably move to cal_tools

 run_folder = Path(in_folder) / f'r{run:04d}'
 out_folder = Path(out_folder)
 out_folder.mkdir(exist_ok=True)

 output_source = output_source or input_source

 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f'Using {creation_time.isoformat()} as creation time')

 # Pick all modules/aggregators or those selected.
 if karabo_da == ['']:
    if modules == [-1]:
        modules = list(range(16))
    karabo_da = [f'LPD{i:02d}' for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 # Pick all sequences or those selected.
 if not sequences or sequences == [-1]:
    do_sequence = lambda seq: True
 else:
    do_sequence = [int(x) for x in sequences].__contains__

 # List of detector sources.
 det_inp_sources = [input_source.format(karabo_id=karabo_id, module_index=int(da[-2:])) for da in karabo_da]

 if use_cell_order not in {'auto', 'always', 'never'}:
    raise ValueError("use_cell_order must be auto/always/never")
 ```

 %% Cell type:markdown id: tags:

 # Select data to process

 %% Cell type:code id: tags:

 ``` python
 data_to_process = []

 for inp_path in run_folder.glob('RAW-*.h5'):
    match = file_re.match(inp_path.stem)

    if match[2] not in karabo_da or not do_sequence(int(match[3])):
        continue

    outp_path = out_folder / 'CORR-R{run:04d}-{aggregator}-S{seq:05d}.h5'.format(
        run=int(match[1]), aggregator=match[2], seq=int(match[3]))

    data_to_process.append((match[2], inp_path, outp_path))

 print('Files to process:')
 for data_descr in sorted(data_to_process, key=lambda x: f'{x[0]}{x[1]}'):
    print(f'{data_descr[0]}\t{data_descr[1]}')

 # Collect the train ID contained in the input LPD files.
 inp_lpd_dc = xd.DataCollection.from_paths([x[1] for x in data_to_process])

 frame_count = sum([
    int(inp_lpd_dc[source, 'image.data'].data_counts(labelled=False).sum())
    for source in inp_lpd_dc.all_sources], 0)

 if frame_count == 0:
    inp_dc = xd.RunDirectory(run_folder) \
        .select_trains(xd.by_id[inp_lpd_dc.train_ids])

    try:
        pulse_count = int(inp_dc[xgm_source, xgm_pulse_count_key].ndarray().sum())
    except xd.SourceNameError:
-        warning(f'Missing XGM source `{xgm_source}`')
+        warn(f'Missing XGM source `{xgm_source}`')
        pulse_count = None
    except xd.PropertyNameError:
-        warning(f'Missing XGM pulse count key `{xgm_pulse_count_key}`')
+        warn(f'Missing XGM pulse count key `{xgm_pulse_count_key}`')
        pulse_count = None

    if pulse_count == 0 and not ignore_no_frames_no_pulses:
-        warning(f'Affected files contain neither LPD frames nor SA1 pulses '
-                f'according to {xgm_source}, processing is skipped. If this '
-                f'incorrect, please contact da-support@xfel.eu')
+        warn(f'Affected files contain neither LPD frames nor SA1 pulses '
+             f'according to {xgm_source}, processing is skipped. If this '
+             f'incorrect, please contact da-support@xfel.eu')
        from sys import exit
        exit(0)
    elif pulse_count is None:
        raise ValueError('Affected files contain no LPD frames and SA1 pulses '
                         'could not be inferred from XGM data')
    else:
        raise ValueError('Affected files contain no LPD frames but SA1 pulses')

 else:
    print(f'Total number of LPD pulses across all modules: {frame_count}')
 ```

 %% Cell type:markdown id: tags:

 # Obtain and prepare calibration constants

 %% Cell type:code id: tags:

 ``` python
 start = perf_counter()

+raw_data = xd.RunDirectory(run_folder)
+
+try:
+    parallel_gain = bool(raw_data[control_source.format(karabo_id=karabo_id)].run_value('femAsicGainOverride'))
+except KeyError:
+    warn('Missing femAsicGainOverride property FEM control device, assuming auto gain')
+    parallel_gain = False
+print('Parallel gain mode:', parallel_gain)
+
 cell_ids_pattern_s = None
 if use_cell_order != 'never':
+    mem_cell_pattern = get_mem_cell_pattern(raw_data, det_inp_sources)
+
+    if parallel_gain:
+        mem_cell_pattern = mem_cell_pattern[:len(mem_cell_pattern) // 3]
+        if drop_last_frames_parallelgain:
+            mem_cell_pattern = mem_cell_pattern[:-drop_last_frames_parallelgain]
+
    # Read the order of memory cells used
-    raw_data = xd.DataCollection.from_paths([e[1] for e in data_to_process])
-    cell_ids_pattern_s = make_cell_order_condition(
-        use_cell_order, get_mem_cell_pattern(raw_data, det_inp_sources)
-    )
+    cell_ids_pattern_s = make_cell_order_condition(use_cell_order, mem_cell_pattern)
 print("Memory cells order:", cell_ids_pattern_s)

 conditions = LPDConditions(
    sensor_bias_voltage=bias_voltage,
    memory_cells=mem_cells,
    feedback_capacitor=capacitor,
    source_energy=photon_energy,
    memory_cell_order=cell_ids_pattern_s,
+    parallel_gain=parallel_gain,
    category=category,
 )

 expected_constants = {'Offset', 'BadPixelsDark'}
 if rel_gain:
    expected_constants.add('RelativeGain')
 if ff_map:
    expected_constants.update(['FFMap', 'BadPixelsFF'])
 if gain_amp_map:
    expected_constants.add('GainAmpMap')
+if parallel_gain and combine_parallel_gain:
+    expected_constants.add('Noise')

 lpd_consts = CalibrationData.from_condition(
    conditions,
    calibrations=expected_constants,
    detector_name=karabo_id,
    event_at=creation_time,
    client=rest_cfg.extra_calibration_client(),
 ).select_modules(
    aggregator_names=karabo_da
 ).require_calibrations(
    ['Offset']
 )

 total_time = perf_counter() - start
 print(f'Looking up constants {total_time:.1f}s')

 lpd_consts.summary_table()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Validate the constants availability and raise/warn accordingly.
 if not lpd_consts.aggregator_names:  # Offset was required above
    raise Exception("Could not find offset constants for any modules, will not correct data.")

 for mod in karabo_da.copy():
    if mod not in lpd_consts["Offset"].aggregator_names:
-        warning(f"Offset constant is not available to correct {mod}.")
+        warn(f"Offset constant is not available to correct {mod}.")
        karabo_da.remove(mod)

    missing_constants = {c for c in expected_constants
                        if (c not in lpd_consts) or (mod not in lpd_consts[c].aggregator_names)}
    if missing_constants:
-        warning(f"Constants {sorted(missing_constants)} were not retrieved for {mod}.")
+        warn(f"Constants {sorted(missing_constants)} were not retrieved for {mod}.")

 # Remove skipped correction modules from data_to_process
 data_to_process = [(mod, in_f, out_f) for mod, in_f, out_f in data_to_process if mod in karabo_da]
 ```

 %% Cell type:code id: tags:

 ``` python
 # write constants metadata to fragment YAML
 write_constants_fragment_extracal(
    out_folder=(metadata_folder or out_folder),
    calib_data=lpd_consts,
    caldb_root=cal_db_root,
 )

 # Load constants data for all constants
 start = perf_counter()
 const_data = {kda: {} for kda in lpd_consts.aggregator_names}
 for cname, multimodconst in lpd_consts.items():
    arr = multimodconst.ndarray(cal_db_root, parallel=8)
    for i, kda in enumerate(multimodconst.aggregator_names):
        const_data[kda][cname] = arr[i]

 total_time = perf_counter() - start
 print(f'Loading constants {total_time:.1f}s')
 ```

 %% Cell type:code id: tags:

 ``` python
 # These are intended in order cell, X, Y, gain
 ccv_offsets = {}
+ccv_noise = {}
 ccv_gains = {}
 ccv_masks = {}

 ccv_shape = (mem_cells, 256, 256, 3)

 constant_order = {
    'Offset':        (2, 1, 0, 3),
+    'Noise':         (2, 1, 0, 3),
    'BadPixelsDark': (2, 1, 0, 3),
    'RelativeGain':  (2, 0, 1, 3),
    'FFMap':         (2, 0, 1, 3),
    'BadPixelsFF':   (2, 0, 1, 3),
    'GainAmpMap':    (2, 0, 1, 3),
 }

 def prepare_constants(wid, index, aggregator):
    consts = const_data.get(aggregator, {})
    def _prepare_data(calibration_name, dtype):
        # Some old BadPixels constants have <f8 dtype.
        # Convert nan to float 0 to avoid having 2147483648 after
        # converting float64 to uint32.
        if "BadPixels" in calibration_name and consts[calibration_name].dtype != np.uint32:
            consts[calibration_name] = np.nan_to_num(
                consts[calibration_name], nan=0.0)
        return consts[calibration_name] \
            .transpose(constant_order[calibration_name]) \
            .astype(dtype, copy=True)  # Make sure array is contiguous.

    if offset_corr and 'Offset' in consts:
        ccv_offsets[aggregator] = _prepare_data('Offset', np.float32)
    else:
        ccv_offsets[aggregator] = np.zeros(ccv_shape, dtype=np.float32)

    ccv_gains[aggregator] = np.ones(ccv_shape, dtype=np.float32)

+    if parallel_gain and combine_parallel_gain:
+        if 'Noise' in consts:
+            ccv_noise[aggregator] = _prepare_data('Noise', np.float32)
+        else:
+            raise RuntimeError('parallel gain combination requires available noise constant')
+    else:
+        ccv_noise[aggregator] = None
+
    if 'BadPixelsDark' in consts:
        ccv_masks[aggregator] = _prepare_data('BadPixelsDark', np.uint32)
    else:
        ccv_masks[aggregator] = np.zeros(ccv_shape, dtype=np.uint32)

    if 'RelativeGain' in consts:
        ccv_gains[aggregator] *= _prepare_data('RelativeGain', np.float32)

    if 'FFMap' in consts:
        ccv_gains[aggregator] *= _prepare_data('FFMap', np.float32)

        if 'BadPixelsFF' in consts:
            np.bitwise_or(ccv_masks[aggregator], _prepare_data('BadPixelsFF', np.uint32),
                          out=ccv_masks[aggregator])

    if 'GainAmpMap' in consts:
        ccv_gains[aggregator] *= _prepare_data('GainAmpMap', np.float32)

-    print('.', end='', flush=True)

-
-print('Preparing constants', end='', flush=True)
 start = perf_counter()
 psh.ThreadContext(num_workers=len(karabo_da)).map(prepare_constants, karabo_da)
 total_time = perf_counter() - start
-print(f'{total_time:.1f}s')
+print(f'Preparing constants {total_time:.1f}s')

 const_data.clear()  # Clear raw constants data now to save memory.
 gc.collect();
 ```

 %% Cell type:code id: tags:

 ``` python
 def correct_file(wid, index, work):
    aggregator, inp_path, outp_path = work
    module_index = int(aggregator[-2:])

    start = perf_counter()
    dc = xd.H5File(inp_path, inc_suspect_trains=False).select('*', 'image.*', require_all=True)
    inp_source_name = input_source.format(karabo_id=karabo_id, module_index=module_index)
    inp_source = dc[inp_source_name]
    open_time = perf_counter() - start

    # Load raw data for this file.
    # Reshaping gets rid of the extra 1-len dimensions without
    # mangling the frame axis for an actual frame count of 1.
    start = perf_counter()
    in_raw = inp_source['image.data'].ndarray().reshape(-1, 256, 256)
    in_cell = inp_source['image.cellId'].ndarray().reshape(-1)
    in_pulse = inp_source['image.pulseId'].ndarray().reshape(-1)
+    frame_counts = inp_source['image.data'].data_counts(labelled=False).astype(np.int32)
    read_time = perf_counter() - start

+    parallel_gain_indices = None
+
+    if parallel_gain:
+        assert (frame_counts % 3 == 0).all(), 'frame count not divisible by 3 in parallel gain mode'
+        actual_frame_counts = frame_counts // 3 - drop_last_frames_parallelgain
+
+        if combine_parallel_gain:
+
+            # Indices map where to find each of the high/medium/low gain images for each actual
+            # frame event.
+            parallel_gain_indices = np.zeros((actual_frame_counts.sum(), 3), dtype=np.int32)
+
+            cursor_in = cursor_out = 0
+            for afc in actual_frame_counts:
+                for gg in range(3):
+                    indices = np.arange(cursor_in, cursor_in+afc)
+                    parallel_gain_indices[cursor_out : cursor_out+afc, gg] = indices
+                    # Skip over N (default 1) trailing frames in each gain stage
+                    cursor_in += afc + drop_last_frames_parallelgain
+                cursor_out += afc
+
+            assert parallel_gain_indices.max() <= in_raw.shape[0], 'gain image indices exceed raw data size'
+
+            # Pick cell and pulse IDs from high gain. This is also done if frames are not combined
+            # in order to correct corrupt tables in medium and low gain, and if needed brought back
+            # to the original shape further below.
+            in_cell = np.take(in_cell, parallel_gain_indices[:, 0])
+            in_pulse = np.take(in_pulse, parallel_gain_indices[:, 0])
+
+            # Replace supposed frame counts by actual frame counts.
+            frame_counts = actual_frame_counts
+        else:
+            # Parallel gain, but kept separate in output
+
+            # Replicate corrected cell and pulse IDs from high gain to other gains.
+            tmp_cell = np.zeros_like(in_cell)
+            tmp_pulse = np.zeros_like(in_pulse)
+            sel = np.zeros_like(in_cell)
+
+            cursor_in = cursor_out = 0
+            for afc in actual_frame_count:
+                for gg in range(3):
+                    tmp_cell[cursor_out : cursor_out+afc] = in_cell[cursor_in : cursor_in+afc]
+                    tmp_pulse[cursor_out : cursor_out+afc] = in_pulse[cursor_in : cursor_in+afc]
+                    sel[cursor_out : cursor_out+afc] = 1
+                    cursor_out += afc + drop_last_frames_parallelgain
+
+                cursor_in += afc + drop_last_frames_parallelgain
+
+            # Apply the selection to drop trailing frames
+            in_cell = tmp_cell[sel]
+            in_pulse = tmp_pulse[sel]
+            in_raw = in_raw[sel]
+
+            frame_counts -= (3 * drop_last_frames_parallelgain)
+
+            # Disable gain indices to not combine.
+            parallel_gain_indices = None
+
    # Allocate output arrays.
-    out_data = np.zeros((in_raw.shape[0], 256, 256), dtype=np.float32)
-    out_gain = np.zeros((in_raw.shape[0], 256, 256), dtype=np.uint8)
-    out_mask = np.zeros((in_raw.shape[0], 256, 256), dtype=np.uint32)
+    num_frames = frame_counts.sum()
+    out_data = np.zeros((num_frames, 256, 256), dtype=np.float32)
+    out_gain = np.zeros((num_frames, 256, 256), dtype=np.uint8)
+    out_mask = np.zeros((num_frames, 256, 256), dtype=np.uint32)

    start = perf_counter()
    correct_lpd_frames(in_raw, in_cell,
                       out_data, out_gain, out_mask,
-                       ccv_offsets[aggregator], ccv_gains[aggregator], ccv_masks[aggregator],
-                       num_threads=num_threads_per_worker)
+                       ccv_offsets[aggregator], ccv_noise[aggregator], ccv_gains[aggregator], ccv_masks[aggregator],
+                       parallel_gain_indices, threshold_sigma_high, threshold_sigma_mid,
+                       num_threads=16)
    correct_time = perf_counter() - start

-    image_counts = inp_source['image.data'].data_counts(labelled=False)
-
    start = perf_counter()
-    if (not outp_path.exists() or overwrite) and image_counts.sum() > 0:
+    if (not outp_path.exists() or overwrite) and num_frames > 0:
        outp_source_name = output_source.format(karabo_id=karabo_id, module_index=module_index)

        with DataFile(outp_path, 'w') as outp_file:
            outp_file.create_index(dc.train_ids, from_file=dc.files[0])
            outp_file.create_metadata(like=dc, instrument_channels=sorted({
                f'{outp_source_name}/image', f'{inp_source_name}/image'
            }))

            outp_source = outp_file.create_instrument_source(outp_source_name)

-            outp_source.create_index(image=image_counts)
+            outp_source.create_index(image=frame_counts)
            outp_source.create_key('image.cellId', data=in_cell,
                                   chunks=(min(chunks_ids, in_cell.shape[0]),))
            outp_source.create_key('image.pulseId', data=in_pulse,
                                   chunks=(min(chunks_ids, in_pulse.shape[0]),))
            outp_source.create_key('image.data', data=out_data,
                                   chunks=(min(chunks_data, out_data.shape[0]), 256, 256))
            outp_source.create_compressed_key('image.gain', data=out_gain)
            outp_source.create_compressed_key('image.mask', data=out_mask)

            if output_source != input_source:
                outp_file.create_legacy_source(inp_source_name, outp_source_name)

    write_time = perf_counter() - start

    total_time = open_time + read_time + correct_time + write_time
-    frame_rate = in_raw.shape[0] / total_time
+    frame_rate = num_frames / total_time

    print('{}\t{}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{}\t{:.1f}'.format(
        wid, aggregator, open_time, read_time, correct_time, write_time, total_time,
-        in_raw.shape[0], frame_rate))
+        num_frames, frame_rate))
+
+    worker_frame_counts[wid] += num_frames

    in_raw = None
    in_cell = None
    in_pulse = None
    out_data = None
    out_gain = None
    out_mask = None
    gc.collect()

 print('worker\tDA\topen\tread\tcorrect\twrite\ttotal\tframes\trate')
+ctx = psh.ProcessContext(num_workers=num_workers)
+
+worker_frame_counts = ctx.alloc(shape=(), dtype=np.int32, per_worker=True)
 start = perf_counter()
-psh.ProcessContext(num_workers=num_workers).map(correct_file, data_to_process)
+ctx.map(correct_file, data_to_process)
 total_time = perf_counter() - start
-print(f'Total time: {total_time:.1f}s')
+total_frames = worker_frame_counts.sum()
+
+print(f'Total time: {total_time:.1f}s, Mean rate: {(total_frames / total_time):.1f}s⁻¹')
 ```

 %% Cell type:markdown id: tags:

 # Data preview for first train

 %% Cell type:code id: tags:

 ``` python
 geom = xg.LPD_1MGeometry.from_quad_positions(
    [(11.4, 299), (-11.5, 8), (254.5, -16), (278.5, 275)])

 output_paths = [outp_path for _, _, outp_path in data_to_process if outp_path.exists()]

 if not output_paths:
-    warning('Data preview is skipped as there are no existing output paths')
+    warn('Data preview is skipped as there are no existing output paths')
    from sys import exit
    exit(0)

 dc = xd.DataCollection.from_paths(output_paths).select_trains(np.s_[0])

 det = LPD1M(dc, detector_name=karabo_id)
-data = det.get_array('image.data')
+data = det.get_array('image.data', unstack_pulses=False)
 ```

 %% Cell type:markdown id: tags:

 ### Intensity histogram across all cells

 %% Cell type:code id: tags:

 ``` python
 left_edge_ratio = 0.01
 right_edge_ratio = 0.99

 fig, ax = plt.subplots(num=1, clear=True, figsize=(15, 6))
 values, bins, _ = ax.hist(np.ravel(data.data), bins=2000, range=(-1500, 2000))

 def find_nearest_index(array, value):
    return (np.abs(array - value)).argmin()

 cum_values = np.cumsum(values)
 vmin = bins[find_nearest_index(cum_values, cum_values[-1]*left_edge_ratio)]
 vmax = bins[find_nearest_index(cum_values, cum_values[-1]*right_edge_ratio)]

 max_value = values.max()
 ax.vlines([vmin, vmax], 0, max_value, color='red', linewidth=5, alpha=0.2)
 ax.text(vmin, max_value, f'{left_edge_ratio*100:.0f}%',
        color='red', ha='center', va='bottom', size='large')
 ax.text(vmax, max_value, f'{right_edge_ratio*100:.0f}%',
        color='red', ha='center', va='bottom', size='large')
 ax.text(vmax+(vmax-vmin)*0.01, max_value/2, 'Colormap interval',
        color='red', rotation=90, ha='left', va='center', size='x-large')

 ax.set_xlim(vmin-(vmax-vmin)*0.1, vmax+(vmax-vmin)*0.1)
 ax.set_ylim(0, max_value*1.1)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### First memory cell

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(num=2, figsize=(15, 15), clear=True, nrows=1, ncols=1)
-geom.plot_data_fast(data[:, 0, 0], ax=ax, vmin=vmin, vmax=vmax)
+geom.plot_data_fast(data[:, 0], ax=ax, vmin=vmin, vmax=vmax)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Train average

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(num=3, figsize=(15, 15), clear=True, nrows=1, ncols=1)
-geom.plot_data_fast(data[:, 0].mean(axis=1), ax=ax, vmin=vmin, vmax=vmax)
+geom.plot_data_fast(data.mean(axis=1), ax=ax, vmin=vmin, vmax=vmax)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Lowest gain stage per pixel

 %% Cell type:code id: tags:

 ``` python
-highest_gain_stage = det.get_array('image.gain', pulses=np.s_[:]).max(axis=(1, 2))
+highest_gain_stage = det.get_array('image.gain', unstack_pulses=False).max(axis=1)

 fig, ax = plt.subplots(num=4, figsize=(15, 15), clear=True, nrows=1, ncols=1)
 p = geom.plot_data_fast(highest_gain_stage, ax=ax, vmin=0, vmax=2);

 cb = ax.images[0].colorbar
 cb.set_ticks([0, 1, 2])
 cb.set_ticklabels(['High gain', 'Medium gain', 'Low gain'])
 ```

 %% Cell type:markdown id: tags:

 ### Create virtual CXI file

 %% Cell type:code id: tags:

 ``` python
-if create_virtual_cxi_in:
+if create_virtual_cxi_in and not (parallel_gain and not combine_parallel_gain):
    vcxi_folder = Path(create_virtual_cxi_in.format(
        run=run, proposal_folder=str(Path(in_folder).parent)))
    vcxi_folder.mkdir(parents=True, exist_ok=True)

    def sort_files_by_seq(by_seq, outp_path):
        by_seq.setdefault(int(outp_path.stem[-5:]), []).append(outp_path)
        return by_seq

    from functools import reduce
    reduce(sort_files_by_seq, output_paths, output_by_seq := {})

    for seq_number, seq_output_paths in output_by_seq.items():
        # Create data collection and detector components only for this sequence.
        try:
            det = LPD1M(xd.DataCollection.from_paths(seq_output_paths), detector_name=karabo_id, min_modules=4)
        except ValueError:  # Couldn't find enough data for min_modules
            continue
        det.write_virtual_cxi(vcxi_folder / f'VCXI-LPD-R{run:04d}-S{seq_number:05d}.cxi')
 ```

--- a/setup.py
+++ b/setup.py
@@ -61,7 +61,7 @@ install_requires = [
        "dill==0.3.8",
        "docutils==0.20.1",
        "dynaconf==3.2.4",
-        "dynflatfield==1.0.0",
+        "dynflatfield==1.1.0",
        "env_cache==0.1",
        "extra_data==1.16.0",
        "extra_geom==1.11.0",

--- a/src/cal_tools/calcat_interface2.py
+++ b/src/cal_tools/calcat_interface2.py
@@ -160,6 +160,11 @@ global_client = None


 def get_client():
+    """Get the global CalCat API client.
+
+    The default assumes we're running in the DESY network; this is used unless
+    `setup_client()` has been called to specify otherwise.
+    """
    global global_client
    if global_client is None:
        setup_client(CALCAT_PROXY_URL, None, None, None)
@@ -177,6 +182,7 @@ def setup_client(
    oauth_timeout=12,
    ssl_verify=True,
 ):
+    """Configure the global CalCat API client."""
    global global_client
    if client_id is not None:
        oauth_client = Oauth2ClientBackend(
@@ -279,6 +285,7 @@ class SingleConstant:
        return f[self.dataset]["data"]

    def ndarray(self, caldb_root=None):
+        """Load the constant data as a Numpy array"""
        return self.dataset_obj(caldb_root)[:]

    def _load_calcat_metadata(self, client=None):
@@ -340,7 +347,13 @@ def prepare_selection(

 @dataclass
 class MultiModuleConstant(Mapping):
-    """A group of similar constants for several modules of one detector"""
+    """A group of similar constants for several modules of one detector.
+
+    This works as a mapping holding `SingleConstant` objects.
+    Keys can be module numbers (`offset[0]`), data aggregator names
+    (`offset['LPD00']`), QxMy names (`offset['Q1M1']`) or Physical Detector Unit
+    (PDU) names.
+    """

    constants: Dict[str, SingleConstant]  # Keys e.g. 'LPD00'
    module_details: List[Dict]
@@ -382,6 +395,10 @@ class MultiModuleConstant(Mapping):
    def select_modules(
        self, module_nums=None, *, aggregator_names=None, qm_names=None
    ) -> "MultiModuleConstant":
+        """Return a new `MultiModuleConstant` object with only the selected modules
+
+        One of `module_nums`, `aggregator_names` or `qm_names` must be specified.
+        """
        aggs = prepare_selection(
            self.module_details, module_nums, aggregator_names, qm_names
        )
@@ -393,10 +410,12 @@ class MultiModuleConstant(Mapping):
    # be a subset of what's in module_details
    @property
    def aggregator_names(self):
+        "Data aggregator names for the modules where we have this constant"
        return sorted(self.constants)

    @property
    def module_nums(self):
+        "Module numbers for the modules where we have this constant"
        return [
            m["module_number"]
            for m in self.module_details
@@ -405,6 +424,7 @@ class MultiModuleConstant(Mapping):

    @property
    def qm_names(self):
+        "Names like Q1M3 for the modules where we have this constant, if applicable"
        return [
            m["virtual_device_name"]
            for m in self.module_details
@@ -413,6 +433,9 @@ class MultiModuleConstant(Mapping):

    @property
    def pdu_names(self):
+        """Names of the specific detector units making up the detector.
+
+        Only includes modules where we have this constant."""
        return [
            m["physical_name"]
            for m in self.module_details
@@ -420,6 +443,11 @@ class MultiModuleConstant(Mapping):
        ]

    def ndarray(self, caldb_root=None, *, parallel=0):
+        """Load this constant as a Numpy array.
+
+        If `parallel` is specified, the per-module constants are loaded in
+        parallel using N worker processes.
+        """
        eg_dset = self.constants[self.aggregator_names[0]].dataset_obj(caldb_root)
        shape = (len(self.constants),) + eg_dset.shape

@@ -438,6 +466,14 @@ class MultiModuleConstant(Mapping):
        return arr

    def xarray(self, module_naming="modnum", caldb_root=None, *, parallel=0):
+        """Load this constant as an xarray DataArray.
+
+        `module_naming` may be "modnum", "aggregator" or "qm" to use different
+        styles of labelling for the modules dimension.
+
+        If `parallel` is specified, the per-module constants are loaded in
+        parallel using N worker processes.
+        """
        import xarray

        if module_naming == "aggregator":
@@ -448,7 +484,7 @@ class MultiModuleConstant(Mapping):
            modules = self.qm_names
        else:
            raise ValueError(
-                f"{module_naming=} (must be 'aggregator', 'modnum' or 'qm'"
+                f"{module_naming=} (must be 'aggregator', 'modnum' or 'qm')"
            )

        ndarr = self.ndarray(caldb_root, parallel=parallel)
@@ -462,7 +498,12 @@ class MultiModuleConstant(Mapping):


 class CalibrationData(Mapping):
-    """Collected constants for a given detector"""
+    """Collected constants for a given detector
+
+    This can represent multiple constant types (offset, gain, bad pixels, etc.)
+    across multiple modules. It works as a mapping keyed by constant type
+    (e.g. `cd["Offset"]`), giving you `MultiModuleConstant` objects.
+    """

    def __init__(self, constant_groups, module_details, detector_name):
        # {calibration: {karabo_da: SingleConstant}}
@@ -504,6 +545,11 @@ class CalibrationData(Mapping):
        pdu_snapshot_at=None,
        begin_at_strategy="closest",
    ):
+        """Look up constants for the given detector conditions & timestamp.
+
+        `condition` should be a conditions object for the relevant detector type,
+        e.g. `DSSCConditions`.
+        """
        accepted_strategies = ["closest", "prior"]
        if begin_at_strategy not in accepted_strategies:
            raise ValueError(
@@ -573,6 +619,12 @@ class CalibrationData(Mapping):
        report_id_or_path: Union[int, str],
        client=None,
    ):
+        """Look up constants by a report ID or path.
+
+        Constants produced together in the same characterisation are grouped
+        in CalCat by their report. This method accepts either the integer report
+        ID or the full filesystem path of the report.
+        """
        client = client or get_client()

        # Use max page size, hopefully always enough for CCVs from 1 report
@@ -654,21 +706,27 @@ class CalibrationData(Mapping):
    # the detector (at the specified time).
    @property
    def module_nums(self):
+        "Module numbers in the detector. May include missing modules."
        return [m["module_number"] for m in self.module_details]

    @property
    def aggregator_names(self):
+        "Data aggregator names for modules. May include missing modules."
        return [m["karabo_da"] for m in self.module_details]

    @property
    def qm_names(self):
+        "Module names like Q1M3, if present. May include missing modules."
        return [m["virtual_device_name"] for m in self.module_details]

    @property
    def pdu_names(self):
+        """Names of the specific detector units making up the detector.
+
+        May include missing modules."""
        return [m["physical_name"] for m in self.module_details]

-    def require_calibrations(self, calibrations):
+    def require_calibrations(self, calibrations) -> "CalibrationData":
        """Drop any modules missing the specified constant types"""
        mods = set(self.aggregator_names)
        for cal_type in calibrations:
@@ -678,6 +736,10 @@ class CalibrationData(Mapping):
    def select_modules(
        self, module_nums=None, *, aggregator_names=None, qm_names=None
    ) -> "CalibrationData":
+        """Return a new `CalibrationData` object with only the selected modules
+
+        One of `module_nums`, `aggregator_names` or `qm_names` must be specified.
+        """
        # Validate the specified modules against those we know about.
        # Each specific constant type may have only a subset of these modules.
        aggs = prepare_selection(
@@ -696,10 +758,17 @@ class CalibrationData(Mapping):
        return type(self)(constant_groups, module_details, self.detector_name)

    def select_calibrations(self, calibrations) -> "CalibrationData":
+        """Return a new `CalibrationData` object with only the selected constant types"""
        const_groups = {c: self.constant_groups[c] for c in calibrations}
        return type(self)(const_groups, self.module_details, self.detector_name)

    def merge(self, *others: "CalibrationData") -> "CalibrationData":
+        """Combine two or more `CalibrationData` objects for the same detector.
+
+        Where the inputs have different constant types or different modules,
+        the output will include all of them (set union). Where they overlap,
+        later inputs override earlier ones.
+        """
        det_names = set(cd.detector_name for cd in (self,) + others)
        if len(det_names) > 1:
            raise Exception(
@@ -741,12 +810,15 @@ class CalibrationData(Mapping):
        return type(self)(constant_groups, module_details, det_name)

    def summary_table(self, module_naming="modnum"):
-        """Make a markdown table overview of the constants found.
+        """Make a table overview of the constants found.

        Columns are calibration types, rows are modules.
        If there are >4 calibrations, the table will be split up into several
        pieces with up to 4 calibrations in each.

+	The table(s) returned should be rendered within Jupyter notebooks,
+	including when converting them to Latex & PDF.
+
        Args:
            module_naming (str): modnum, aggregator or qm, to change how the
                modules are labelled in the table. Defaults to modnum.
@@ -822,7 +894,7 @@ class CalibrationData(Mapping):
            modules = self.qm_names
        else:
            raise ValueError(
-                f"{module_naming=} (must be 'aggregator', 'modnum' or 'qm'"
+                f"{module_naming=} (must be 'aggregator', 'modnum' or 'qm')"
            )

        cal_groups = [
@@ -863,16 +935,13 @@ class CalibrationData(Mapping):
        return '\n\n'.join(md_tables)

    def display_markdown_table(self, module_naming="modnum"):
-        """Make a markdown table overview of the constants found.
+        """Display a table of the constants found (in a Jupyter notebook).

        Columns are calibration types, rows are modules.
        If there are >4 calibrations, the table will be split up into several
        pieces with up to 4 calibrations in each.

        Args:
-            ccvs_url (str, optional): URL for calibration constant versions.
-                Defaults to
-                "https://in.xfel.eu/calibration/calibration_constant_versions/".
            module_naming (str): modnum, aggregator or qm, to change how the
                modules are labelled in the table. Defaults to modnum.
        """
@@ -898,6 +967,7 @@ class ConditionsBase:

 @dataclass
 class AGIPDConditions(ConditionsBase):
+    """Conditions for AGIPD detectors"""
    sensor_bias_voltage: float
    memory_cells: int
    acquisition_rate: float
@@ -962,10 +1032,9 @@ class LPDConditions(ConditionsBase):
        "Pixels X",
        "Pixels Y",
        "Feedback capacitor",
-        "Parallel gain",
    ]
    _dark_parameters = _base_params + [
-        "Memory cell order",
+        "Memory cell order", "Parallel gain"
    ]
    _illuminated_parameters = _base_params + ["Source Energy", "category"]

@@ -992,6 +1061,7 @@ class LPDConditions(ConditionsBase):

 @dataclass
 class DSSCConditions(ConditionsBase):
+    """Conditions for DSSC detectors"""
    sensor_bias_voltage: float
    memory_cells: int
    pulse_id_checksum: Optional[float] = None
@@ -1023,9 +1093,9 @@ class JUNGFRAUConditions(ConditionsBase):
    sensor_bias_voltage: float
    memory_cells: int
    integration_time: float
-    exposure_timeout: int
    gain_setting: int
    gain_mode: Optional[int] = None
+    exposure_timeout: int = 25
    sensor_temperature: float = 291
    pixels_x: int = 1024
    pixels_y: int = 512

--- a/src/cal_tools/gotthard2/gotthard2lib.py
+++ b/src/cal_tools/gotthard2/gotthard2lib.py
@@ -3,6 +3,16 @@ from pathlib import Path
 from typing import List, Union

 import extra_data
+import numpy as np
+
+
+def validate_sensor_type(sensor_type):
+    choices = ["25um", "50um"]
+    if sensor_type and sensor_type not in choices:
+        raise ValueError(
+            f"Unexpected Gotthard2 `sensor_type` given: '{sensor_type}'. "
+            f"Valid options are: {', '.join(choices)}."
+        )


 def map_da_to_source(dc, das, source_template, karabo_id, affixes):
@@ -63,16 +73,29 @@ class Gotthard2Ctrl():
            return bool(
                self.run_dc[self.ctrl_src, "singlePhoton"].as_single_value())

-    def get_det_type(self):
+    def get_sensor_type(self):
        """GH2 rxHostname is a vector of bytes objects.
        GH2 25um has two host names unlike 50um which has one.

        Returns:
            str: return if its a 25um or 50um detector.
        """
-
-        hostname = self.run_dc.get_run_value(self.ctrl_src, "rxHostname")
-        return "25um" if hostname[1].decode("utf-8") else "50um"
+        hostnames = self.run_dc.get_run_value(self.ctrl_src, "rxHostname")
+        num_hosts = np.count_nonzero(hostnames != b'')
+        if num_hosts == 0:
+            raise ValueError(
+                "No hosts are present in RUN/rxHostname. "
+                "Unable to determine the sensor type for this data. ")
+
+        if num_hosts == 1:
+            return "50um"
+        elif num_hosts == 2:
+            return "25um"
+        else:
+            raise ValueError(
+                f"RUN/rxHostname has more host names ({num_hosts}) than expected. "
+                "Expected either 1 hostname for 50um or 2 host names for 25um."
+            )

    def second_module_reversed(self):
        """Check if reverseSlaveReadOutMode is True."""

--- a/src/cal_tools/lpdalgs.pyx
+++ b/src/cal_tools/lpdalgs.pyx
+
 from cython cimport boundscheck, wraparound, cdivision
 from cython.view cimport contiguous
 from cython.parallel cimport prange

 from cal_tools.enums import BadPixels

-ctypedef unsigned short cell_t
 ctypedef unsigned short raw_t
 ctypedef float data_t
 ctypedef unsigned char gain_t
+ctypedef unsigned short cell_t
 ctypedef unsigned int mask_t

 cdef mask_t WRONG_GAIN_VALUE = BadPixels.WRONG_GAIN_VALUE, \
@@ -17,7 +18,6 @@ cdef mask_t WRONG_GAIN_VALUE = BadPixels.WRONG_GAIN_VALUE, \

 @boundscheck(False)
 @wraparound(False)
-@cdivision(True)
 def correct_lpd_frames(
    # (frame, x, y)
    raw_t[:, :, ::contiguous] in_raw,
@@ -29,10 +29,17 @@ def correct_lpd_frames(
    mask_t[:, :, ::contiguous] out_mask,

    # (cell, x, y, gain)
-    float[:, :, :, ::contiguous] ccv_offset,
-    float[:, :, :, ::contiguous] ccv_gain,
+    data_t[:, :, :, ::contiguous] ccv_offset,
+    data_t[:, :, :, ::contiguous] ccv_noise,
+    data_t[:, :, :, ::contiguous] ccv_gain,
    mask_t[:, :, :, ::contiguous] ccv_mask,

+    # (frame, gain)
+    int[:, ::contiguous] parallel_gain_indices,
+
+    data_t threshold_sigma_high,
+    data_t threshold_sigma_mid,
+
    int num_threads=1,
 ):
    cdef int frame, ss, fs
@@ -41,25 +48,34 @@ def correct_lpd_frames(
    cdef gain_t gain
    cdef mask_t mask

-    for frame in prange(in_raw.shape[0], nogil=True, num_threads=num_threads):
+    cdef bint adaptive_gain = parallel_gain_indices is None
+
+    for frame in prange(out_data.shape[0], nogil=True, num_threads=num_threads):
        cell = in_cell[frame]

        for ss in range(in_raw.shape[1]):
            for fs in range(in_raw.shape[2]):
-                # Decode intensity and gain from raw data.
-                data = <data_t>(in_raw[frame, ss, fs] & 0xFFF)
-                gain = <gain_t>((in_raw[frame, ss, fs] & 0x3000) >> 12)
+                if adaptive_gain:
+                    # Decode intensity and gain from raw data.
+                    data = <data_t>(in_raw[frame, ss, fs] & 0xFFF)
+                    gain = <gain_t>((in_raw[frame, ss, fs] & 0x3000) >> 12)
+
+                else:
+                    _parallel_gain_thresholding(
+                        in_raw, parallel_gain_indices, ccv_noise,
+                        threshold_sigma_high, threshold_sigma_mid,
+                        frame, cell, ss, fs,
+                        &data, &gain)

                if gain <= 2:
+                    data = data - ccv_offset[cell, ss, fs, gain]
+                    data = data * ccv_gain[cell, ss, fs, gain]
                    mask = ccv_mask[cell, ss, fs, gain]
                else:
                    data = 0.0
                    gain = 0
                    mask = WRONG_GAIN_VALUE

-                data = data - ccv_offset[cell, ss, fs, gain]
-                data = data * ccv_gain[cell, ss, fs, gain]
-
                if data > 1e7 or data < -1e7:
                    data = 0.0
                    mask = mask | VALUE_OUT_OF_RANGE
@@ -67,3 +83,58 @@ def correct_lpd_frames(
                out_data[frame, ss, fs] = data
                out_gain[frame, ss, fs] = gain
                out_mask[frame, ss, fs] = mask
+
+
+@boundscheck(False)
+@wraparound(False)
+cdef inline void _parallel_gain_thresholding(
+    raw_t[:, :, ::contiguous] in_raw,
+    int[:, ::contiguous] parallel_gain_indices,
+    data_t[:, :, :, ::contiguous] ccv_noise,
+    data_t sigma_high, data_t sigma_mid,
+    int frame, int cell, int ss, int fs,
+    data_t* data_ptr, gain_t* gain_ptr
+) noexcept nogil:
+    cdef int frame_high, frame_mid, frame_low
+    cdef data_t data_high, data_mid, data_low
+    cdef data_t threshold_high, threshold_mid
+
+    # Obtain indices to this pixel in each of three gain images.
+    frame_high = parallel_gain_indices[frame, 0]
+    frame_mid = parallel_gain_indices[frame, 1]
+    frame_low = parallel_gain_indices[frame, 2]
+
+    if (
+        ((in_raw[frame_high, ss, fs] & 0x3000) >> 12) == 0 and
+        ((in_raw[frame_mid, ss, fs] & 0x3000) >> 12) == 1 and
+        ((in_raw[frame_low, ss, fs] & 0x3000) >> 12) == 2
+    ):
+        # Verify that this pixel is recorded in the correct gain stage
+        # in each of the gain images. Memory cells in the transition
+        # regions between the gains can sometimes end up in the wrong
+        # one.
+
+        # Decode intensity in every gain stage.
+        data_high = <data_t>(in_raw[frame_high, ss, fs] & 0xFFF)
+        data_mid = <data_t>(in_raw[frame_mid, ss, fs] & 0xFFF)
+        data_low = <data_t>(in_raw[frame_low, ss, fs] & 0xFFF)
+
+        # Compute thresholds based on noise level.
+        threshold_high = 4096 - sigma_high * ccv_noise[cell, ss, fs, 0]
+        threshold_mid = 4096 - sigma_mid * ccv_noise[cell, ss, fs, 1]
+
+        # Pick the optimal gain stage for this pixel.
+        if data_mid > threshold_mid:
+            data_ptr[0] = data_low
+            gain_ptr[0] = 2
+        elif data_high > threshold_high:
+            data_ptr[0] = data_mid
+            gain_ptr[0] = 1
+        else:
+            data_ptr[0] = data_high
+            gain_ptr[0] = 0
+
+    else:
+        # Using an invalid gain stage triggers bad pixel masking later
+        # in the correction kernel.
+        gain_ptr[0] = 4
--- a/src/cal_tools/tools.py
+++ b/src/cal_tools/tools.py
@@ -99,20 +99,26 @@ def map_seq_files(
    return mapped_files, total_files


-def map_modules_from_folder(in_folder, run, path_template, karabo_da,
-                            sequences=None):
+def map_modules_from_folder(
+    in_folder, run, path_template, karabo_da,
+    sequences=None, key_name="qm"):
    """
    Prepare queues of files to process.
    Queues are stored in dictionary with module name Q{}M{} as a key

-    :param in_folder: Input folder with raw data
-    :param run: Run number
-    :param path_template: Template for file name
-                          e.g. `RAW-R{:04d}-{}-S{:05d}.h5`
-    :param karabo_da: List of data aggregators e.g. [AGIPD00, AGIPD01]
-    :param sequences: List of sequences to be considered
-    :return: Dictionary of queues of files, dictionary of module indexes,
-    total number of sequences, dictionary of number of sequences per module
+    Args:
+        in_folder: Input folder with raw data
+        run: Run number
+        path_template: Template for file name
+            e.g. `RAW-R{:04d}-{}-S{:05d}.h5`
+        karabo_da: List of data aggregators e.g. [AGIPD00, AGIPD01]
+        sequences: List of sequences to be considered
+        key_name: A string indicating if the return dict should use QM
+            naming convention for the keys or use the karabo_da names.
+
+    Returns:
+        Dictionary of queues of files, dictionary of module indexes,
+        total number of sequences, dictionary of number of sequences per module
    """
    module_files = OrderedDict()
    mod_ids = OrderedDict()
@@ -121,7 +127,14 @@ def map_modules_from_folder(in_folder, run, path_template, karabo_da,
    sequences_qm = {}
    for inset in karabo_da:
        module_idx = int(inset[-2:])
-        name = module_index_to_qm(module_idx)
+
+        if key_name == "qm":
+            name = module_index_to_qm(module_idx)
+        elif key_name == "da":
+            name = inset  # karabo_da
+        else:
+            raise ValueError("key_name must be 'da' or 'qm'")
+
        module_files[name] = Queue()
        sequences_qm[name] = 0
        mod_ids[name] = module_idx
@@ -805,6 +818,8 @@ def get_constant_from_db_and_time(karabo_id: str, karabo_da: str,
        return data, None


+# TODO: Decide what is the best approach for all AGIPD modules
+# and if we still need to keep this convention
 def module_index_to_qm(index: int, total_modules: int = 16):
    """Maps module index (0-indexed) to quadrant + module string (1-indexed)"""
    assert index < total_modules, f'{index} is greater than {total_modules}'

--- a/src/xfel_calibrate/calibrate.py
+++ b/src/xfel_calibrate/calibrate.py
@@ -75,7 +75,7 @@ def get_pycalib_version():
            'git', f'--git-dir={git_dir}', 'describe', '--tag',
        ], stderr=DEVNULL).decode('utf8')
        version = version.replace("\n", "")
-    except:
+    except Exception:
        from .VERSION import __version__
        version = __version__
    return version
No results found