Compare revisions

Philipp Schmidt · Philipp Schmidt · Philipp Schmidt · Karim Ahmed · Karim Ahmed · Karim Ahmed
--- a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
+++ b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
 %% Cell type:markdown id: tags:

 # AGIPD Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/MID/202201/p002834/raw" # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/esobolev/pycal_litfrm/p002834/r0225"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 overwrite = False  # IGNORED, NEEDED FOR COMPATIBILITY.
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 train_ids = [-1] # train IDs to correct, set to -1 for all, range allowed
 run = 225 # runs to process, required

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_template = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 index_source_template = 'INDEX/{}/DET/{}:xtdf/'  # path in the HDF5 file to images
 ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information
 karabo_id_control = "MID_EXP_AGIPD1M1" # karabo-id for control device

 slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants, loaded in precorrection notebook

 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
 cal_db_interface = "tcp://max-exfl-cal001:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milliseconds
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
 cal_db_root = '/gpfs/exfel/d/cal/caldb_store'  # The calibration database root path to access constant files. For example accessing constants from the test database.

 mem_cells = -1  # Number of memory cells used, set to 0 to automatically infer
 bias_voltage = -1  # bias voltage, set to 0 to use stored value in slow data.
 acq_rate = -1. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)
 max_pulses = [0, 352, 1] # range list [st, end, step] of memory cell indices to be processed within a train. 3 allowed maximum list input elements.
 mem_cells_db = -1  # set to a value different than 0 to use this value for DB queries
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction parameters
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
 cm_dark_fraction = 0.66 # threshold for fraction of  empty pixels to consider module enough dark to perform CM correction
 cm_dark_range = [-50.,30] # range for signal value ADU for pixel to be consider as a dark pixel
 cm_n_itr = 4 # number of iterations for common mode correction
 hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
 mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
 noisy_adc_threshold = 0.25 # threshold to mask complete adc
 ff_gain = 7.2 # conversion gain for absolute FlatField constants, while applying xray_gain
 photon_energy = -1.0 # photon energy in keV, non-positive value for XGM autodetection

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 zero_nans = False # set NaN values in corrected data to 0
 zero_orange = False # set to 0 very negative and very large values in corrected data
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
 corr_asic_diag = False # if set, diagonal drop offs on ASICs are corrected
 force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold
 force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold
 mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
 common_mode = False # Common mode correction
 melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
 mask_zero_std = False # Mask pixels with zero standard deviation across train
 low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain
 round_photons = False  # Round to absolute number of photons, only use with gain corrections

 # Optional auxiliary devices
 use_ppu_device = ''  # Device ID for a pulse picker device to only process picked trains, empty string to disable
 ppu_train_offset = 0  # When using the pulse picker, offset between the PPU's sequence start and actually picked train
+require_ppu_trigger = False  # Optional protection against running without PPU or without triggering trains.

 use_litframe_finder = 'off' # Process only illuminated frames: 'off' - disable, 'device' - use online device data, 'offline' - use offline algorithm, 'auto' - choose online/offline source automatically (default)
 litframe_device_id = '' # Device ID for a lit frame finder device, empty string to auto detection
 energy_threshold = -1000 # The low limit for the energy (uJ) exposed by frames subject to processing. If -1000, selection by pulse energy is disabled
 use_super_selection = 'cm' # Make a common selection for entire run: 'off' - disable, 'final' - enable for final selection, 'cm' - enable only for common mode correction

 use_xgm_device = ''  # DoocsXGM device ID to obtain actual photon energy, operating condition else.

 # Output parameters
 recast_image_data = ''  # Cast data to a different dtype before saving
 compress_fields = ['gain', 'mask']  # Datasets in image group to compress.

 # Plotting parameters
 skip_plots = False # exit after writing corrected files and metadata
 cell_id_preview = 1 # cell Id used for preview in single-shot plots

 # Parallelization parameters
 chunk_size = 1000  # Size of chunk for image-wise correction
 n_cores_correct = 16 # Number of chunks to be processed in parallel
 n_cores_files = 4 # Number of files to be processed in parallel
 sequences_per_node = 2 # number of sequence files per cluster node if run as SLURM job, set to 0 to not run SLURM parallel
 max_nodes = 8 # Maximum number of SLURM jobs to split correction work into
 max_tasks_per_worker = 1  # the number of tasks a correction pool worker process can complete before it will exit and be replaced with a fresh worker process. Leave as -1 to keep worker alive as long as pool.

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import math
 import multiprocessing
 import os
 import warnings
 from datetime import timedelta
 from logging import warning
 from pathlib import Path

 import tabulate
 from dateutil import parser
 from IPython.display import Latex, Markdown, display

 warnings.filterwarnings('ignore')
 import h5py
 import matplotlib
 import matplotlib.pyplot as plt
 import yaml
 from extra_data import by_id, RunDirectory, stack_detector_data
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from matplotlib import cm as colormap
 from matplotlib.colors import LogNorm

 matplotlib.use("agg")
 %matplotlib inline
 import numpy as np
 import seaborn as sns

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")

 import cal_tools.restful_config as rest_cfg
 from cal_tools import agipdalgs as calgs
 from cal_tools.agipdlib import (
    AgipdCorrections,
    AgipdCtrl,
    CellRange,
    LitFrameSelection,
 )
 from cal_tools.ana_tools import get_range
 from cal_tools.calcat_interface import (
    AGIPD_CalibrationData,
    CalCatError,
 )
 from cal_tools.enums import AgipdGainMode, BadPixels
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    calcat_creation_time,
    map_modules_from_folder,
    module_index_to_qm,
    write_constants_fragment,
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_folder = in_folder / f'r{run:04d}'

 step_timer = StepTimer()
 ```

 %% Cell type:markdown id: tags:

 ## Evaluated parameters ##

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the hierarchy and dependability for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["zero_nans"] = zero_nans
    corr_bools["zero_orange"] = zero_orange
    corr_bools["mask_noisy_adc"] = mask_noisy_adc
    corr_bools["force_hg_if_below"] = force_hg_if_below
    corr_bools["force_mg_if_below"] = force_mg_if_below
    corr_bools["common_mode"] = common_mode
    corr_bools["melt_snow"] = melt_snow
    corr_bools["mask_zero_std"] = mask_zero_std
    corr_bools["low_medium_gap"] = low_medium_gap
    corr_bools["round_photons"] = round_photons

 # Many corrections don't apply to fixed gain mode; will explicitly disable later if detected
 disable_for_fixed_gain = [
    "adjust_mg_baseline",
    "blc_set_min",
    "force_hg_if_below",
    "force_mg_if_below",
    "low_medium_gap",
    "melt_snow",
    "rel_gain"
 ]
 ```

 %% Cell type:code id: tags:

 ``` python
 if sequences == [-1]:
    sequences = None

 dc = RunDirectory(run_folder)

 ctrl_src = ctrl_source_template.format(karabo_id_control)
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 index_src = index_source_template.format(karabo_id, receiver_template)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create output folder
 out_folder.mkdir(parents=True, exist_ok=True)

 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 # Evaluate requested modules
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 print("Process modules:", ', '.join(module_index_to_qm(x) for x in modules))
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
-if use_ppu_device:
-    # Obtain trains to process if using a pulse picker device.
+if use_ppu_device and use_ppu_device in dc.control_sources:
+    # Obtain trains to process if using a pulse picker device and it's present.

-    # Will throw an uncaught exception if the device is wrong.
    seq_start = dc[use_ppu_device, 'trainTrigger.sequenceStart.value'].ndarray()

    # The trains picked are the unique values of trainTrigger.sequenceStart
    # minus the first (previous trigger before this run).
    start_train_ids = np.unique(seq_start)[1:] + ppu_train_offset

    train_ids = []
    for train_id in start_train_ids:
        n_trains = dc[
            use_ppu_device, 'trainTrigger.numberOfTrains'
        ].select_trains(by_id[[train_id]]).ndarray()[0]
        train_ids.extend(list(range(train_id, train_id + n_trains)))

-    print(f'PPU device {use_ppu_device} triggered for {len(train_ids)} train(s)')
+    if train_ids:
+        print(f'PPU device {use_ppu_device} triggered for {len(train_ids)} train(s)')
+    elif require_ppu_trigger:
+        raise RuntimeError(f'PPU device {use_ppu_device} not triggered but required, aborting!')
+    else:
+        print(f'PPU device {use_ppu_device} not triggered, processing all valid trains')
+        train_ids = None
+
+elif use_ppu_device:
+    # PPU configured but not present.
+
+    if require_ppu_trigger:
+        raise RuntimeError(f'PPU device {use_ppu_device} required but not found, aborting!')
+    else:
+        print(f'PPU device {use_ppu_device} configured but not found, processing all valid trains')
+        train_ids = None

 elif train_ids != [-1]:
    # Specific trains passed by parameter, convert to ndarray.
    train_ids = np.array(train_ids)

    print(f'Processing up to {len(train_ids)} manually selected train(s)')
-else:
-    # Process all trains.
-    train_ids = None

+else:
+    # No PPU configured.
    print(f'Processing all valid trains')
+    train_ids = None
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mapped_files, _, total_sequences, _, _ =  map_modules_from_folder(
    str(in_folder), run, path_template, karabo_da, sequences
 )
 file_list = []

 # ToDo: Split table over pages
 print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
 table = []
 ti = 0
 for k, files in mapped_files.items():
    i = 0
    for f in list(files.queue):
        file_list.append(f)
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["#", "module", "# module", "file"])))
 file_list = sorted(file_list, key=lambda name: name[-10:])
 ```

 %% Cell type:code id: tags:

 ``` python
 first_mod_channel = sorted(modules)[0]

 instrument_src_mod = [
    s for s in list(dc.all_sources) if f"{first_mod_channel}CH" in s][0]

 agipd_cond = AgipdCtrl(
    run_dc=dc,
    image_src=instrument_src_mod,
    ctrl_src=ctrl_src,
    raise_error=False,  # to be able to process very old data without gain_setting value
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 offset = parser.parse(creation_date_offset)
 delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
 creation_time += delta
 print(f"Creation time: {creation_time}")

 if acq_rate == -1.:
    acq_rate = agipd_cond.get_acq_rate()
 if mem_cells == -1:
    mem_cells = agipd_cond.get_num_cells()
 # TODO: look for alternative for passing creation_time
 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == -1:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()
 if gain_mode == -1:
    gain_mode = agipd_cond.get_gain_mode()
 else:
    gain_mode = AgipdGainMode(gain_mode)
 ```

 %% Cell type:code id: tags:

 ``` python
 if mem_cells is None:
    raise ValueError(f"No raw images found for {instrument_src_mod}")

 mem_cells_db = mem_cells if mem_cells_db == -1 else mem_cells_db

 print(f"Maximum memory cells to calibrate: {mem_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Using {creation_time} as creation time")
 print("Operating conditions are:")
 print(f"• Bias voltage: {bias_voltage}")
 print(f"• Memory cells: {mem_cells_db}")
 print(f"• Acquisition rate: {acq_rate}")
 print(f"• Gain setting: {gain_setting}")
 print(f"• Gain mode: {gain_mode.name}")
 print(f"• Integration time: {integration_time}")
 print(f"• Photon Energy: 9.2")
 ```

 %% Cell type:code id: tags:

 ``` python
 if gain_mode:
    for to_disable in disable_for_fixed_gain:
        if corr_bools.get(to_disable, False):
            warning(f"{to_disable} correction was requested, but does not apply to fixed gain mode")
            corr_bools[to_disable] = False
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_litframe_finder != 'off':
    from extra_redu import make_litframe_finder, LitFrameFinderError

    if use_litframe_finder not in ['auto', 'offline', 'online']:
        raise ValueError("Unexpected value in 'use_litframe_finder'.")

    inst = karabo_id_control[:3]
    litfrm = make_litframe_finder(inst, dc, litframe_device_id)
    try:
        get_data = {'auto': litfrm.read_or_process, 'offline': litfrm.process, 'online': litfrm.read}
        r = get_data[use_litframe_finder]()
        cell_sel = LitFrameSelection(r, train_ids, max_pulses, energy_threshold, use_super_selection)
        cell_sel.print_report()
    except LitFrameFinderError as err:
        warning(f"Cannot use AgipdLitFrameFinder due to:\n{err}")
        cell_sel = CellRange(max_pulses, max_cells=mem_cells)
 else:
    # Use range selection
    cell_sel = CellRange(max_pulses, max_cells=mem_cells)

 print(cell_sel.msg())
 ```

 %% Cell type:code id: tags:

 ``` python
 if round_photons and photon_energy <= 0.0:
    if use_xgm_device:
        # Try to obtain photon energy from XGM device.
        wavelength_data = dc[use_xgm_device, 'pulseEnergy.wavelengthUsed']

        try:
            from scipy.constants import h, c, e

            # Read wavelength as a single value and convert to hv.
            photon_energy = (h * c / e) / (wavelength_data.as_single_value(rtol=1e-2) * 1e-6)
            print(f'Obtained photon energy {photon_energy:.3f} keV from {use_xgm_device}')
        except ValueError:
            warning('XGM source available but photon energy varies greater than 1%, '
                 'photon rounding disabled!')
            round_photons = False
    else:
        warning('Neither explicit photon energy nor XGM device configured, photon rounding disabled!')
        round_photons = False
 elif round_photons:
    print(f'Photon energy for rounding: {photon_energy:.3f} keV')
 ```

 %% Cell type:code id: tags:

 ``` python
 agipd_corr = AgipdCorrections(
    mem_cells,
    cell_sel,
    h5_data_path=instrument_src,
    h5_index_path=index_src,
    corr_bools=corr_bools,
    gain_mode=gain_mode,
    comp_threads=os.cpu_count() // n_cores_files,
    train_ids=train_ids
 )

 agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
 agipd_corr.hg_hard_threshold = hg_hard_threshold
 agipd_corr.mg_hard_threshold = mg_hard_threshold

 agipd_corr.cm_dark_min = cm_dark_range[0]
 agipd_corr.cm_dark_max = cm_dark_range[1]
 agipd_corr.cm_dark_fraction = cm_dark_fraction
 agipd_corr.cm_n_itr = cm_n_itr
 agipd_corr.noisy_adc_threshold = noisy_adc_threshold
 agipd_corr.ff_gain = ff_gain
 agipd_corr.photon_energy = photon_energy

 agipd_corr.compress_fields = compress_fields
 if recast_image_data:
    agipd_corr.recast_image_fields['data'] = np.dtype(recast_image_data)
 ```

 %% Cell type:markdown id: tags:

 ## Retrieving constants

 %% Cell type:code id: tags:

 ``` python
+def get_constants_and_update_metadata(cal_data, main_metadata, constants):
+    try:
+        metadata = cal_data.metadata(constants)
+        for key, value in metadata.items():
+            main_metadata.setdefault(key, {}).update(value)
+    except CalCatError as e:  # TODO: replace when API errors are improved.
+        warning(f"CalCatError: {e}")
+```
+
+%% Cell type:code id: tags:
+
+``` python
 step_timer.start()
 # Instantiate agipd_cal with the read operating conditions.
 agipd_cal = AGIPD_CalibrationData(
    detector_name=karabo_id,
    modules=karabo_da,
    sensor_bias_voltage=bias_voltage,
    memory_cells=mem_cells,
    acquisition_rate=acq_rate,
    integration_time=integration_time,
    source_energy=9.2,
    gain_mode=gain_mode,
    gain_setting=gain_setting,
    event_at=creation_time,
    client=rest_cfg.calibration_client(),
    caldb_root=Path(cal_db_root),
 )

 # Prepare lists of expected calibrations
 dark_constants = ["Offset", "Noise", "BadPixelsDark"]
 if not gain_mode:  # Adaptive gain
    dark_constants.append("ThresholdsDark")
-gain_constants = []
+
+agipd_metadata = agipd_cal.metadata(dark_constants)
+
+agipd_cal.gain_mode = None  # gain_mode is not used for gain constants
+pc_constants, ff_constants = [], []
 if any(agipd_corr.pc_bools):
-    gain_constants += ["SlopesPC", "BadPixelsPC"]
+    pc_constants = ["SlopesPC", "BadPixelsPC"]
+    get_constants_and_update_metadata(
+        agipd_cal, agipd_metadata, pc_constants)
+
 if agipd_corr.corr_bools.get('xray_corr'):
-    gain_constants += agipd_cal.illuminated_calibrations
+    ff_constants = list(agipd_cal.illuminated_calibrations)
+    get_constants_and_update_metadata(
+        agipd_cal, agipd_metadata, ff_constants)

-# First retrieve dark constants
-agipd_metadata = agipd_cal.metadata(dark_constants)
-if gain_constants:
-    # Then retrieve gain constants without
-    # using the `gain_mode` condition.
-    agipd_cal.gain_mode = None
-    try:
-        illum_metadata = agipd_cal.metadata(gain_constants)
-        for key, value in illum_metadata.items():
-            agipd_metadata.setdefault(key, {}).update(value)
-    except CalCatError as e:  # TODO: replace when API errors are improved.
-        warning(f"CalCatError: {e}")
 step_timer.done_step("Constants were retrieved in")

 print("Preparing constants ("
      f"FF: {agipd_corr.corr_bools.get('xray_corr', False)}, "
      f"PC: {any(agipd_corr.pc_bools)}, "
      f"BLC: {any(agipd_corr.blc_bools)})")
 # Display retrieved calibration constants timestamps
 agipd_cal.display_markdown_retrieved_constants(metadata=agipd_metadata)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Validate constants availability and exclude modules with no offsets.
 for da, calibrations in agipd_metadata.items():
    mod = modules[karabo_da.index(da)]
    # Constants to error out for when missing.
    error_missing_constants = {"Offset"}
    if not gain_mode:
        error_missing_constants |= {"ThresholdsDark"}

    error_missing_constants -= set(calibrations)
    if error_missing_constants:
        warning(f"Offset constant is not available to correct {da}.")
        # Remove module from files to process.
        del mapped_files[module_index_to_qm(mod)]
        karabo_da.drop(da)
        modules.drop(mod)

-    warn_missing_constants = set(dark_constants + gain_constants)
+    warn_missing_constants = set(dark_constants + pc_constants + ff_constants)
    warn_missing_constants -= error_missing_constants
    warn_missing_constants -= set(calibrations)
    if warn_missing_constants:
        warning(f"Constants {warn_missing_constants} were not retrieved for {da}.")

 if not mapped_files:  # Offsets are missing for all modules.
    raise Exception("Could not find offset constants for any modules, will not correct data.")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Record constant details in YAML metadata
 write_constants_fragment(
    out_folder=(metadata_folder or out_folder),
    det_metadata=agipd_metadata,
    caldb_root=agipd_cal.caldb_root)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Load calibration constants to RAM
 agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))

 def load_constants(da, module):
    """
    Initialize constants data from previously retrieved metadata.

    Args:
        da (str): Data Aggregator (Karabo DA)
        module (int): Module index

    Returns:
        (int, dict, str): Module index, {constant name: creation time}, Karabo DA
    """
    const_data = dict()
    variant = dict()
    for cname, mdata in agipd_metadata[da].items():
        dataset = mdata["dataset"]
        with h5py.File(agipd_cal.caldb_root / mdata["path"], "r") as cf:  # noqa
            const_data[cname] = np.copy(cf[f"{dataset}/data"])
            variant[cname] = cf[dataset].attrs["variant"] if cf[dataset].attrs.keys() else 0  # noqa
    agipd_corr.init_constants(const_data, module, variant)


 step_timer.start()
 with multiprocessing.Pool(processes=len(modules)) as pool:
    pool.starmap(load_constants, zip(karabo_da, modules))
 step_timer.done_step(f'Constants were loaded in ')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Store timestamps for Offset, SlopesPC, and SlopesFF
 # in YAML file for time-summary table.
 timestamps = {}

 for mod, mod_mdata in agipd_metadata.items():
    modno = int(mod[-2:])

    module_timestamps = {}

    # Store few time stamps if exists
    # Add NA to keep array structure
    for key in ['Offset', 'SlopesPC', 'SlopesFF']:
        if key in mod_mdata:
            module_timestamps[key] = mod_mdata[key]["begin_validity_at"]
        else:
            module_timestamps[key] = "NA"

    timestamps[module_index_to_qm(modno)] = module_timestamps

 seq = sequences[0] if sequences else 0

 with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fd:
    yaml.safe_dump({"time-summary": {f"S{seq}": timestamps}}, fd)
 ```

 %% Cell type:markdown id: tags:

 ## Data processing ##

 %% Cell type:code id: tags:

 ``` python
 # allocate memory for images and hists
 n_images_max = mem_cells * 256
 data_shape = (n_images_max, 512, 128)
 agipd_corr.allocate_images(data_shape, n_cores_files)
 ```

 %% Cell type:code id: tags:

 ``` python
 def batches(l, batch_size):
    """Group a list into batches of (up to) batch_size elements"""
    start = 0
    while start < len(l):
        yield l[start:start + batch_size]
        start += batch_size
 ```

 %% Cell type:code id: tags:

 ``` python
 def imagewise_chunks(img_counts):
    """Break up the loaded data into chunks of up to chunk_size

    Yields (file data slot, start index, stop index)
    """


    for i_proc, n_img in enumerate(img_counts):
        n_chunks = math.ceil(n_img / chunk_size)
        for i in range(n_chunks):
            yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 if max_tasks_per_worker == -1:
    max_tasks_per_worker = None
 with multiprocessing.Pool(maxtasksperchild=max_tasks_per_worker) as pool:
    step_timer.done_step('Started pool')

    for file_batch in batches(file_list, n_cores_files):
        # TODO: Move some printed output to logging or similar
        print(f"Processing next {len(file_batch)} files")
        step_timer.start()
        img_counts = pool.starmap(
            agipd_corr.read_file,
            zip(range(len(file_batch)), file_batch, [not common_mode]*len(file_batch))
        )
        step_timer.done_step(f'Loading data from files')

        if img_counts == 0:
            # Skip any further processing and output if there are no images to
            # correct in this file.
            continue

        if mask_zero_std:
            # Evaluate zero-data-std mask
            pool.starmap(
                agipd_corr.mask_zero_std, itertools.product(
                    range(len(file_batch)),
                    np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
                )
            )
            step_timer.done_step('Mask 0 std')

        # Perform offset image-wise correction
        pool.starmap(agipd_corr.offset_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Offset correction")

        if blc_noise or blc_stripes or blc_hmatch:
            # Perform image-wise correction
            pool.starmap(agipd_corr.baseline_correction, imagewise_chunks(img_counts))
            step_timer.done_step("Base-line shift correction")

        if common_mode:
            # In common mode corrected is enabled.
            # Cell selection is only activated after common mode correction.
            # Perform cross-file correction parallel over asics
            image_files_idx = [i_proc for i_proc, n_img in enumerate(img_counts) if n_img > 0]
            pool.starmap(agipd_corr.cm_correction, itertools.product(
                image_files_idx, range(16)  # 16 ASICs per module
            ))
            step_timer.done_step("Common-mode correction")

            img_counts = pool.map(agipd_corr.apply_selected_pulses, image_files_idx)
            step_timer.done_step("Applying selected cells after common mode correction")

        # Perform image-wise correction"
        pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Gain corrections")

        # Save corrected data
        pool.starmap(agipd_corr.write_file, [
            (i_proc, file_name, str(out_folder / Path(file_name).name.replace("RAW", "CORR")))
            for i_proc, file_name in enumerate(file_batch)
        ])
        step_timer.done_step("Save")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Correction of {len(file_list)} files is finished")
 print(f"Total processing time {step_timer.timespan():.01f} s")
 print(f"Timing summary per batch of {n_cores_files} files:")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print('Skipping plots')
    import sys
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.gca(projection='3d')

    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)
    Z = data.T

    # Plot the surface.
    ax.plot_surface(X, Y, Z, cmap=colormap.coolwarm, linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")


 def do_2d_plot(data, edges, y_axis, x_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),
              np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
                   norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 def get_trains_data(data_folder, source, include, detector_id, tid=None, modules=16, fillvalue=None):
    """Load single train for all module

    :param data_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param detector_id: The karabo id of the detector to get data for
    :param tid: Train Id to be loaded. First train is considered if None is given
    :param path: Path to find image data inside h5 file
    """
    try:
        run_data = RunDirectory(data_folder, include)
    except FileNotFoundError:
        warning(f'No corrected files for {include}. Skipping plots.')
        import sys
        sys.exit(0)
    if tid is not None:
        tid, data = run_data.select(
            f'{detector_id}/DET/*', source).train_from_id(tid, keep_dims=True)
    else:
        # A first full trainId for all available modules is of interest.
        tid, data = next(run_data.select(
            f'{detector_id}/DET/*', source).trains(require_all=True, keep_dims=True))

    stacked_data = stack_detector_data(
        train=data, data=source, fillvalue=fillvalue, modules=modules)

    return tid, stacked_data
 ```

 %% Cell type:code id: tags:

 ``` python
 if dinstance == "AGIPD500K":
    geom = AGIPD_500K2GGeometry.from_origin()
 else:
    geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
        (-525, 625),
        (-550, -10),
        (520, -160),
        (542.5, 475),
    ])
 ```

 %% Cell type:code id: tags:

 ``` python
 include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
 tid, corrected = get_trains_data(out_folder, 'image.data', include, karabo_id, modules=nmods)

 _, gains = get_trains_data(out_folder, 'image.gain', include, karabo_id, tid, modules=nmods)
 _, mask = get_trains_data(out_folder, 'image.mask', include, karabo_id, tid, modules=nmods)
 _, blshift = get_trains_data(out_folder, 'image.blShift', include, karabo_id, tid, modules=nmods)
 _, cellId = get_trains_data(out_folder, 'image.cellId', include, karabo_id, tid, modules=nmods)
 _, pulseId = get_trains_data(out_folder, 'image.pulseId', include, karabo_id, tid, modules=nmods, fillvalue=0)
 _, raw = get_trains_data(run_folder, 'image.data', include, karabo_id, tid, modules=nmods)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Analogue Gain ###

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(raw[:,0,...].flatten().astype(np.float32),
                                         raw[:,1,...].flatten().astype(np.float32),
                                         bins=(100, 100),
                                         range=[[4000, 8192], [4000, 8192]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Digitized Gain ###

 The following plot shows plots signal vs. digitized gain

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(corrected.flatten().astype(np.float32),
                                         gains.flatten().astype(np.float32), bins=(100, 3),
                                         range=[[-50, 8192], [0, 3]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Gain statistics in %")
 table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
          f'{gains[gains==1].size/gains.size*100:.03f}',
          f'{gains[gains==2].size/gains.size*100:.03f}']]
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["High", "Medium", "Low"])))
 ```

 %% Cell type:markdown id: tags:

 ### Intensity per Pulse ###

 %% Cell type:code id: tags:

 ``` python
 pulse_range = [np.min(pulseId[pulseId>=0]), np.max(pulseId[pulseId>=0])]

 # Modify pulse_range, if only one pulse is selected.
 if pulse_range[0] == pulse_range[1]:
    pulse_range = [0, pulse_range[1]+int(acq_rate)]

 mean_data = np.nanmean(corrected, axis=(2, 3))
 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100, int(pulse_range[1])),
                                      range=[[-50, 1000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")

 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100,  int(pulse_range[1])),
                                      range=[[-50, 200000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 ```

 %% Cell type:markdown id: tags:

 ### Baseline shift ###

 Estimated base-line shift with respect to the total ADU counts of corrected image.

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(blshift.flatten(), bins=100, log=True)
 _ = plt.xlabel('Baseline shift [ADU]')
 _ = plt.ylabel('Counts')
 _ = ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 10))
 corrected_ave = np.nansum(corrected, axis=(2, 3))
 plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)
 plt.xlim(-1, 1000)
 plt.grid()
 plt.xlabel('Illuminated corrected [MADU] ')
 _ = plt.ylabel('Estimated baseline shift [ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
 if cell_id_preview not in cellId[:, 0]:
    print(f"WARNING: The selected cell_id_preview value {cell_id_preview} is not available in the corrected data.")
    cell_id_preview = cellId[:, 0][0]
    cell_idx_preview = 0
    print(f"Previewing the first available cellId: {cell_id_preview}.")
 else:
    cell_idx_preview = np.where(cellId[:, 0] == cell_id_preview)[0][0]
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Raw preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown(f'Mean over images of the RAW data\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(raw[slice(*cell_sel.crange), 0, ...], axis=0)
    vmin, vmax = get_range(data, 5)
    ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 else:
    print("Skipping mean RAW preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'Single shot of the RAW data from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(raw[cell_idx_preview, 0, ...], 5)
 ax = geom.plot_data_fast(raw[cell_idx_preview, 0, ...], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Corrected preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown('### Mean CORRECTED Preview ###\n'))
    display(Markdown(f'A mean across train: {tid}\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(corrected, axis=0)
    vmin, vmax = get_range(data, 7)
    ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=-50, vmax=vmax)
 else:
    print("Skipping mean CORRECTED preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'A single shot of the CORRECTED image from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_idx_preview], 7, -50)
 vmin = - 50
 ax = geom.plot_data_fast(corrected[cell_idx_preview], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_idx_preview], 5, -50)
 nbins = np.int((vmax + 50) / 2)
 h = ax.hist(corrected[cell_idx_preview].flatten(),
            bins=nbins, range=(-50, vmax),
            histtype='stepfilled', log=True)
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected, 10, -100)
 vmax = np.nanmax(corrected)
 if vmax > 50000:
    vmax=50000
 nbins = np.int((vmax + 100) / 5)
 h = ax.hist(corrected.flatten(), bins=nbins,
            range=(-100, vmax), histtype='step', log=True, label = 'All')
 ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='High gain', color='green')
 ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Medium gain', color='red')
 ax.hist(corrected[gains == 2].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Low gain', color='yellow')
 ax.legend()
 ax.grid()
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Maximum GAIN Preview ###\n'))
 display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.max(gains, axis=0), ax=ax,
                         cmap="jet", vmin=-1, vmax=3)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'### Single Shot Bad Pixels ### \n'))
 display(Markdown(f'A single shot bad pixel map from cell {cell_id_preview} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(np.log2(mask[cell_idx_preview]), ax=ax, vmin=0, vmax=32, cmap="jet")
 ```

 %% Cell type:code id: tags:

 ``` python
 if round_photons:
    display(Markdown('### Photonization histograms ###'))

    x_preround = (agipd_corr.hist_bins_preround[1:] + agipd_corr.hist_bins_preround[:-1]) / 2
    x_postround = (agipd_corr.hist_bins_postround[1:] + agipd_corr.hist_bins_postround[:-1]) / 2
    x_photons = np.arange(0, (x_postround[-1] + 1) / photon_energy)

    fig, ax = plt.subplots(ncols=1, nrows=1, clear=True)
    ax.plot(x_preround, agipd_corr.shared_hist_preround, '.-', color='C0')
    ax.bar(x_postround, agipd_corr.shared_hist_postround, photon_energy, color='C1', alpha=0.5)
    ax.set_yscale('log')
    ax.set_ylim(0, max(agipd_corr.shared_hist_preround.max(), agipd_corr.shared_hist_postround.max())*3)
    ax.set_xlim(x_postround[0], x_postround[-1]+1)
    ax.set_xlabel('Photon energy / keV')
    ax.set_ylabel('Intensity')
    ax.vlines(x_photons * photon_energy, *ax.get_ylim(), color='k', linestyle='dashed')

    phx = ax.twiny()
    phx.set_xlim(x_postround[0] / photon_energy, (x_postround[-1]+1)/photon_energy)
    phx.set_xticks(x_photons)
    phx.set_xlabel('# Photons')
    pass
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train  ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(np.mean(mask>0, axis=0), vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train. Only Dark Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 cm = np.copy(mask)
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 ax = geom.plot_data_fast(np.mean(cm>0, axis=0),
                         vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 # AGIPD Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/MID/202201/p002834/raw" # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/esobolev/pycal_litfrm/p002834/r0225"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 overwrite = False  # IGNORED, NEEDED FOR COMPATIBILITY.
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 train_ids = [-1] # train IDs to correct, set to -1 for all, range allowed
 run = 225 # runs to process, required

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_template = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 index_source_template = 'INDEX/{}/DET/{}:xtdf/'  # path in the HDF5 file to images
 ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information
 karabo_id_control = "MID_EXP_AGIPD1M1" # karabo-id for control device

 slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants, loaded in precorrection notebook

 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
 cal_db_interface = "tcp://max-exfl-cal001:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milliseconds
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
 cal_db_root = '/gpfs/exfel/d/cal/caldb_store'  # The calibration database root path to access constant files. For example accessing constants from the test database.

 mem_cells = -1  # Number of memory cells used, set to 0 to automatically infer
 bias_voltage = -1  # bias voltage, set to 0 to use stored value in slow data.
 acq_rate = -1. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)
 max_pulses = [0, 352, 1] # range list [st, end, step] of memory cell indices to be processed within a train. 3 allowed maximum list input elements.
 mem_cells_db = -1  # set to a value different than 0 to use this value for DB queries
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction parameters
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
 cm_dark_fraction = 0.66 # threshold for fraction of  empty pixels to consider module enough dark to perform CM correction
 cm_dark_range = [-50.,30] # range for signal value ADU for pixel to be consider as a dark pixel
 cm_n_itr = 4 # number of iterations for common mode correction
 hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
 mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
 noisy_adc_threshold = 0.25 # threshold to mask complete adc
 ff_gain = 7.2 # conversion gain for absolute FlatField constants, while applying xray_gain
 photon_energy = -1.0 # photon energy in keV, non-positive value for XGM autodetection

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 zero_nans = False # set NaN values in corrected data to 0
 zero_orange = False # set to 0 very negative and very large values in corrected data
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
 corr_asic_diag = False # if set, diagonal drop offs on ASICs are corrected
 force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold
 force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold
 mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
 common_mode = False # Common mode correction
 melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
 mask_zero_std = False # Mask pixels with zero standard deviation across train
 low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain
 round_photons = False  # Round to absolute number of photons, only use with gain corrections

 # Optional auxiliary devices
 use_ppu_device = ''  # Device ID for a pulse picker device to only process picked trains, empty string to disable
 ppu_train_offset = 0  # When using the pulse picker, offset between the PPU's sequence start and actually picked train
+require_ppu_trigger = False  # Optional protection against running without PPU or without triggering trains.

 use_litframe_finder = 'off' # Process only illuminated frames: 'off' - disable, 'device' - use online device data, 'offline' - use offline algorithm, 'auto' - choose online/offline source automatically (default)
 litframe_device_id = '' # Device ID for a lit frame finder device, empty string to auto detection
 energy_threshold = -1000 # The low limit for the energy (uJ) exposed by frames subject to processing. If -1000, selection by pulse energy is disabled
 use_super_selection = 'cm' # Make a common selection for entire run: 'off' - disable, 'final' - enable for final selection, 'cm' - enable only for common mode correction

 use_xgm_device = ''  # DoocsXGM device ID to obtain actual photon energy, operating condition else.

 # Output parameters
 recast_image_data = ''  # Cast data to a different dtype before saving
 compress_fields = ['gain', 'mask']  # Datasets in image group to compress.

 # Plotting parameters
 skip_plots = False # exit after writing corrected files and metadata
 cell_id_preview = 1 # cell Id used for preview in single-shot plots

 # Parallelization parameters
 chunk_size = 1000  # Size of chunk for image-wise correction
 n_cores_correct = 16 # Number of chunks to be processed in parallel
 n_cores_files = 4 # Number of files to be processed in parallel
 sequences_per_node = 2 # number of sequence files per cluster node if run as SLURM job, set to 0 to not run SLURM parallel
 max_nodes = 8 # Maximum number of SLURM jobs to split correction work into
 max_tasks_per_worker = 1  # the number of tasks a correction pool worker process can complete before it will exit and be replaced with a fresh worker process. Leave as -1 to keep worker alive as long as pool.

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import math
 import multiprocessing
 import os
 import warnings
 from datetime import timedelta
 from logging import warning
 from pathlib import Path

 import tabulate
 from dateutil import parser
 from IPython.display import Latex, Markdown, display

 warnings.filterwarnings('ignore')
 import h5py
 import matplotlib
 import matplotlib.pyplot as plt
 import yaml
 from extra_data import by_id, RunDirectory, stack_detector_data
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from matplotlib import cm as colormap
 from matplotlib.colors import LogNorm

 matplotlib.use("agg")
 %matplotlib inline
 import numpy as np
 import seaborn as sns

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")

 import cal_tools.restful_config as rest_cfg
 from cal_tools import agipdalgs as calgs
 from cal_tools.agipdlib import (
    AgipdCorrections,
    AgipdCtrl,
    CellRange,
    LitFrameSelection,
 )
 from cal_tools.ana_tools import get_range
 from cal_tools.calcat_interface import (
    AGIPD_CalibrationData,
    CalCatError,
 )
 from cal_tools.enums import AgipdGainMode, BadPixels
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    calcat_creation_time,
    map_modules_from_folder,
    module_index_to_qm,
    write_constants_fragment,
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_folder = in_folder / f'r{run:04d}'

 step_timer = StepTimer()
 ```

 %% Cell type:markdown id: tags:

 ## Evaluated parameters ##

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the hierarchy and dependability for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["zero_nans"] = zero_nans
    corr_bools["zero_orange"] = zero_orange
    corr_bools["mask_noisy_adc"] = mask_noisy_adc
    corr_bools["force_hg_if_below"] = force_hg_if_below
    corr_bools["force_mg_if_below"] = force_mg_if_below
    corr_bools["common_mode"] = common_mode
    corr_bools["melt_snow"] = melt_snow
    corr_bools["mask_zero_std"] = mask_zero_std
    corr_bools["low_medium_gap"] = low_medium_gap
    corr_bools["round_photons"] = round_photons

 # Many corrections don't apply to fixed gain mode; will explicitly disable later if detected
 disable_for_fixed_gain = [
    "adjust_mg_baseline",
    "blc_set_min",
    "force_hg_if_below",
    "force_mg_if_below",
    "low_medium_gap",
    "melt_snow",
    "rel_gain"
 ]
 ```

 %% Cell type:code id: tags:

 ``` python
 if sequences == [-1]:
    sequences = None

 dc = RunDirectory(run_folder)

 ctrl_src = ctrl_source_template.format(karabo_id_control)
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 index_src = index_source_template.format(karabo_id, receiver_template)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create output folder
 out_folder.mkdir(parents=True, exist_ok=True)

 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 # Evaluate requested modules
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 print("Process modules:", ', '.join(module_index_to_qm(x) for x in modules))
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
-if use_ppu_device:
-    # Obtain trains to process if using a pulse picker device.
+if use_ppu_device and use_ppu_device in dc.control_sources:
+    # Obtain trains to process if using a pulse picker device and it's present.

-    # Will throw an uncaught exception if the device is wrong.
    seq_start = dc[use_ppu_device, 'trainTrigger.sequenceStart.value'].ndarray()

    # The trains picked are the unique values of trainTrigger.sequenceStart
    # minus the first (previous trigger before this run).
    start_train_ids = np.unique(seq_start)[1:] + ppu_train_offset

    train_ids = []
    for train_id in start_train_ids:
        n_trains = dc[
            use_ppu_device, 'trainTrigger.numberOfTrains'
        ].select_trains(by_id[[train_id]]).ndarray()[0]
        train_ids.extend(list(range(train_id, train_id + n_trains)))

-    print(f'PPU device {use_ppu_device} triggered for {len(train_ids)} train(s)')
+    if train_ids:
+        print(f'PPU device {use_ppu_device} triggered for {len(train_ids)} train(s)')
+    elif require_ppu_trigger:
+        raise RuntimeError(f'PPU device {use_ppu_device} not triggered but required, aborting!')
+    else:
+        print(f'PPU device {use_ppu_device} not triggered, processing all valid trains')
+        train_ids = None
+
+elif use_ppu_device:
+    # PPU configured but not present.
+
+    if require_ppu_trigger:
+        raise RuntimeError(f'PPU device {use_ppu_device} required but not found, aborting!')
+    else:
+        print(f'PPU device {use_ppu_device} configured but not found, processing all valid trains')
+        train_ids = None

 elif train_ids != [-1]:
    # Specific trains passed by parameter, convert to ndarray.
    train_ids = np.array(train_ids)

    print(f'Processing up to {len(train_ids)} manually selected train(s)')
-else:
-    # Process all trains.
-    train_ids = None

+else:
+    # No PPU configured.
    print(f'Processing all valid trains')
+    train_ids = None
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mapped_files, _, total_sequences, _, _ =  map_modules_from_folder(
    str(in_folder), run, path_template, karabo_da, sequences
 )
 file_list = []

 # ToDo: Split table over pages
 print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
 table = []
 ti = 0
 for k, files in mapped_files.items():
    i = 0
    for f in list(files.queue):
        file_list.append(f)
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["#", "module", "# module", "file"])))
 file_list = sorted(file_list, key=lambda name: name[-10:])
 ```

 %% Cell type:code id: tags:

 ``` python
 first_mod_channel = sorted(modules)[0]

 instrument_src_mod = [
    s for s in list(dc.all_sources) if f"{first_mod_channel}CH" in s][0]

 agipd_cond = AgipdCtrl(
    run_dc=dc,
    image_src=instrument_src_mod,
    ctrl_src=ctrl_src,
    raise_error=False,  # to be able to process very old data without gain_setting value
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 offset = parser.parse(creation_date_offset)
 delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
 creation_time += delta
 print(f"Creation time: {creation_time}")

 if acq_rate == -1.:
    acq_rate = agipd_cond.get_acq_rate()
 if mem_cells == -1:
    mem_cells = agipd_cond.get_num_cells()
 # TODO: look for alternative for passing creation_time
 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == -1:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()
 if gain_mode == -1:
    gain_mode = agipd_cond.get_gain_mode()
 else:
    gain_mode = AgipdGainMode(gain_mode)
 ```

 %% Cell type:code id: tags:

 ``` python
 if mem_cells is None:
    raise ValueError(f"No raw images found for {instrument_src_mod}")

 mem_cells_db = mem_cells if mem_cells_db == -1 else mem_cells_db

 print(f"Maximum memory cells to calibrate: {mem_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Using {creation_time} as creation time")
 print("Operating conditions are:")
 print(f"• Bias voltage: {bias_voltage}")
 print(f"• Memory cells: {mem_cells_db}")
 print(f"• Acquisition rate: {acq_rate}")
 print(f"• Gain setting: {gain_setting}")
 print(f"• Gain mode: {gain_mode.name}")
 print(f"• Integration time: {integration_time}")
 print(f"• Photon Energy: 9.2")
 ```

 %% Cell type:code id: tags:

 ``` python
 if gain_mode:
    for to_disable in disable_for_fixed_gain:
        if corr_bools.get(to_disable, False):
            warning(f"{to_disable} correction was requested, but does not apply to fixed gain mode")
            corr_bools[to_disable] = False
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_litframe_finder != 'off':
    from extra_redu import make_litframe_finder, LitFrameFinderError

    if use_litframe_finder not in ['auto', 'offline', 'online']:
        raise ValueError("Unexpected value in 'use_litframe_finder'.")

    inst = karabo_id_control[:3]
    litfrm = make_litframe_finder(inst, dc, litframe_device_id)
    try:
        get_data = {'auto': litfrm.read_or_process, 'offline': litfrm.process, 'online': litfrm.read}
        r = get_data[use_litframe_finder]()
        cell_sel = LitFrameSelection(r, train_ids, max_pulses, energy_threshold, use_super_selection)
        cell_sel.print_report()
    except LitFrameFinderError as err:
        warning(f"Cannot use AgipdLitFrameFinder due to:\n{err}")
        cell_sel = CellRange(max_pulses, max_cells=mem_cells)
 else:
    # Use range selection
    cell_sel = CellRange(max_pulses, max_cells=mem_cells)

 print(cell_sel.msg())
 ```

 %% Cell type:code id: tags:

 ``` python
 if round_photons and photon_energy <= 0.0:
    if use_xgm_device:
        # Try to obtain photon energy from XGM device.
        wavelength_data = dc[use_xgm_device, 'pulseEnergy.wavelengthUsed']

        try:
            from scipy.constants import h, c, e

            # Read wavelength as a single value and convert to hv.
            photon_energy = (h * c / e) / (wavelength_data.as_single_value(rtol=1e-2) * 1e-6)
            print(f'Obtained photon energy {photon_energy:.3f} keV from {use_xgm_device}')
        except ValueError:
            warning('XGM source available but photon energy varies greater than 1%, '
                 'photon rounding disabled!')
            round_photons = False
    else:
        warning('Neither explicit photon energy nor XGM device configured, photon rounding disabled!')
        round_photons = False
 elif round_photons:
    print(f'Photon energy for rounding: {photon_energy:.3f} keV')
 ```

 %% Cell type:code id: tags:

 ``` python
 agipd_corr = AgipdCorrections(
    mem_cells,
    cell_sel,
    h5_data_path=instrument_src,
    h5_index_path=index_src,
    corr_bools=corr_bools,
    gain_mode=gain_mode,
    comp_threads=os.cpu_count() // n_cores_files,
    train_ids=train_ids
 )

 agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
 agipd_corr.hg_hard_threshold = hg_hard_threshold
 agipd_corr.mg_hard_threshold = mg_hard_threshold

 agipd_corr.cm_dark_min = cm_dark_range[0]
 agipd_corr.cm_dark_max = cm_dark_range[1]
 agipd_corr.cm_dark_fraction = cm_dark_fraction
 agipd_corr.cm_n_itr = cm_n_itr
 agipd_corr.noisy_adc_threshold = noisy_adc_threshold
 agipd_corr.ff_gain = ff_gain
 agipd_corr.photon_energy = photon_energy

 agipd_corr.compress_fields = compress_fields
 if recast_image_data:
    agipd_corr.recast_image_fields['data'] = np.dtype(recast_image_data)
 ```

 %% Cell type:markdown id: tags:

 ## Retrieving constants

 %% Cell type:code id: tags:

 ``` python
+def get_constants_and_update_metadata(cal_data, main_metadata, constants):
+    try:
+        metadata = cal_data.metadata(constants)
+        for key, value in metadata.items():
+            main_metadata.setdefault(key, {}).update(value)
+    except CalCatError as e:  # TODO: replace when API errors are improved.
+        warning(f"CalCatError: {e}")
+```
+
+%% Cell type:code id: tags:
+
+``` python
 step_timer.start()
 # Instantiate agipd_cal with the read operating conditions.
 agipd_cal = AGIPD_CalibrationData(
    detector_name=karabo_id,
    modules=karabo_da,
    sensor_bias_voltage=bias_voltage,
    memory_cells=mem_cells,
    acquisition_rate=acq_rate,
    integration_time=integration_time,
    source_energy=9.2,
    gain_mode=gain_mode,
    gain_setting=gain_setting,
    event_at=creation_time,
    client=rest_cfg.calibration_client(),
    caldb_root=Path(cal_db_root),
 )

 # Prepare lists of expected calibrations
 dark_constants = ["Offset", "Noise", "BadPixelsDark"]
 if not gain_mode:  # Adaptive gain
    dark_constants.append("ThresholdsDark")
-gain_constants = []
+
+agipd_metadata = agipd_cal.metadata(dark_constants)
+
+agipd_cal.gain_mode = None  # gain_mode is not used for gain constants
+pc_constants, ff_constants = [], []
 if any(agipd_corr.pc_bools):
-    gain_constants += ["SlopesPC", "BadPixelsPC"]
+    pc_constants = ["SlopesPC", "BadPixelsPC"]
+    get_constants_and_update_metadata(
+        agipd_cal, agipd_metadata, pc_constants)
+
 if agipd_corr.corr_bools.get('xray_corr'):
-    gain_constants += agipd_cal.illuminated_calibrations
+    ff_constants = list(agipd_cal.illuminated_calibrations)
+    get_constants_and_update_metadata(
+        agipd_cal, agipd_metadata, ff_constants)

-# First retrieve dark constants
-agipd_metadata = agipd_cal.metadata(dark_constants)
-if gain_constants:
-    # Then retrieve gain constants without
-    # using the `gain_mode` condition.
-    agipd_cal.gain_mode = None
-    try:
-        illum_metadata = agipd_cal.metadata(gain_constants)
-        for key, value in illum_metadata.items():
-            agipd_metadata.setdefault(key, {}).update(value)
-    except CalCatError as e:  # TODO: replace when API errors are improved.
-        warning(f"CalCatError: {e}")
 step_timer.done_step("Constants were retrieved in")

 print("Preparing constants ("
      f"FF: {agipd_corr.corr_bools.get('xray_corr', False)}, "
      f"PC: {any(agipd_corr.pc_bools)}, "
      f"BLC: {any(agipd_corr.blc_bools)})")
 # Display retrieved calibration constants timestamps
 agipd_cal.display_markdown_retrieved_constants(metadata=agipd_metadata)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Validate constants availability and exclude modules with no offsets.
 for da, calibrations in agipd_metadata.items():
    mod = modules[karabo_da.index(da)]
    # Constants to error out for when missing.
    error_missing_constants = {"Offset"}
    if not gain_mode:
        error_missing_constants |= {"ThresholdsDark"}

    error_missing_constants -= set(calibrations)
    if error_missing_constants:
        warning(f"Offset constant is not available to correct {da}.")
        # Remove module from files to process.
        del mapped_files[module_index_to_qm(mod)]
        karabo_da.drop(da)
        modules.drop(mod)

-    warn_missing_constants = set(dark_constants + gain_constants)
+    warn_missing_constants = set(dark_constants + pc_constants + ff_constants)
    warn_missing_constants -= error_missing_constants
    warn_missing_constants -= set(calibrations)
    if warn_missing_constants:
        warning(f"Constants {warn_missing_constants} were not retrieved for {da}.")

 if not mapped_files:  # Offsets are missing for all modules.
    raise Exception("Could not find offset constants for any modules, will not correct data.")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Record constant details in YAML metadata
 write_constants_fragment(
    out_folder=(metadata_folder or out_folder),
    det_metadata=agipd_metadata,
    caldb_root=agipd_cal.caldb_root)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Load calibration constants to RAM
 agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))

 def load_constants(da, module):
    """
    Initialize constants data from previously retrieved metadata.

    Args:
        da (str): Data Aggregator (Karabo DA)
        module (int): Module index

    Returns:
        (int, dict, str): Module index, {constant name: creation time}, Karabo DA
    """
    const_data = dict()
    variant = dict()
    for cname, mdata in agipd_metadata[da].items():
        dataset = mdata["dataset"]
        with h5py.File(agipd_cal.caldb_root / mdata["path"], "r") as cf:  # noqa
            const_data[cname] = np.copy(cf[f"{dataset}/data"])
            variant[cname] = cf[dataset].attrs["variant"] if cf[dataset].attrs.keys() else 0  # noqa
    agipd_corr.init_constants(const_data, module, variant)


 step_timer.start()
 with multiprocessing.Pool(processes=len(modules)) as pool:
    pool.starmap(load_constants, zip(karabo_da, modules))
 step_timer.done_step(f'Constants were loaded in ')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Store timestamps for Offset, SlopesPC, and SlopesFF
 # in YAML file for time-summary table.
 timestamps = {}

 for mod, mod_mdata in agipd_metadata.items():
    modno = int(mod[-2:])

    module_timestamps = {}

    # Store few time stamps if exists
    # Add NA to keep array structure
    for key in ['Offset', 'SlopesPC', 'SlopesFF']:
        if key in mod_mdata:
            module_timestamps[key] = mod_mdata[key]["begin_validity_at"]
        else:
            module_timestamps[key] = "NA"

    timestamps[module_index_to_qm(modno)] = module_timestamps

 seq = sequences[0] if sequences else 0

 with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fd:
    yaml.safe_dump({"time-summary": {f"S{seq}": timestamps}}, fd)
 ```

 %% Cell type:markdown id: tags:

 ## Data processing ##

 %% Cell type:code id: tags:

 ``` python
 # allocate memory for images and hists
 n_images_max = mem_cells * 256
 data_shape = (n_images_max, 512, 128)
 agipd_corr.allocate_images(data_shape, n_cores_files)
 ```

 %% Cell type:code id: tags:

 ``` python
 def batches(l, batch_size):
    """Group a list into batches of (up to) batch_size elements"""
    start = 0
    while start < len(l):
        yield l[start:start + batch_size]
        start += batch_size
 ```

 %% Cell type:code id: tags:

 ``` python
 def imagewise_chunks(img_counts):
    """Break up the loaded data into chunks of up to chunk_size

    Yields (file data slot, start index, stop index)
    """


    for i_proc, n_img in enumerate(img_counts):
        n_chunks = math.ceil(n_img / chunk_size)
        for i in range(n_chunks):
            yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 if max_tasks_per_worker == -1:
    max_tasks_per_worker = None
 with multiprocessing.Pool(maxtasksperchild=max_tasks_per_worker) as pool:
    step_timer.done_step('Started pool')

    for file_batch in batches(file_list, n_cores_files):
        # TODO: Move some printed output to logging or similar
        print(f"Processing next {len(file_batch)} files")
        step_timer.start()
        img_counts = pool.starmap(
            agipd_corr.read_file,
            zip(range(len(file_batch)), file_batch, [not common_mode]*len(file_batch))
        )
        step_timer.done_step(f'Loading data from files')

        if img_counts == 0:
            # Skip any further processing and output if there are no images to
            # correct in this file.
            continue

        if mask_zero_std:
            # Evaluate zero-data-std mask
            pool.starmap(
                agipd_corr.mask_zero_std, itertools.product(
                    range(len(file_batch)),
                    np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
                )
            )
            step_timer.done_step('Mask 0 std')

        # Perform offset image-wise correction
        pool.starmap(agipd_corr.offset_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Offset correction")

        if blc_noise or blc_stripes or blc_hmatch:
            # Perform image-wise correction
            pool.starmap(agipd_corr.baseline_correction, imagewise_chunks(img_counts))
            step_timer.done_step("Base-line shift correction")

        if common_mode:
            # In common mode corrected is enabled.
            # Cell selection is only activated after common mode correction.
            # Perform cross-file correction parallel over asics
            image_files_idx = [i_proc for i_proc, n_img in enumerate(img_counts) if n_img > 0]
            pool.starmap(agipd_corr.cm_correction, itertools.product(
                image_files_idx, range(16)  # 16 ASICs per module
            ))
            step_timer.done_step("Common-mode correction")

            img_counts = pool.map(agipd_corr.apply_selected_pulses, image_files_idx)
            step_timer.done_step("Applying selected cells after common mode correction")

        # Perform image-wise correction"
        pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Gain corrections")

        # Save corrected data
        pool.starmap(agipd_corr.write_file, [
            (i_proc, file_name, str(out_folder / Path(file_name).name.replace("RAW", "CORR")))
            for i_proc, file_name in enumerate(file_batch)
        ])
        step_timer.done_step("Save")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Correction of {len(file_list)} files is finished")
 print(f"Total processing time {step_timer.timespan():.01f} s")
 print(f"Timing summary per batch of {n_cores_files} files:")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print('Skipping plots')
    import sys
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.gca(projection='3d')

    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)
    Z = data.T

    # Plot the surface.
    ax.plot_surface(X, Y, Z, cmap=colormap.coolwarm, linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")


 def do_2d_plot(data, edges, y_axis, x_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),
              np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
                   norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 def get_trains_data(data_folder, source, include, detector_id, tid=None, modules=16, fillvalue=None):
    """Load single train for all module

    :param data_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param detector_id: The karabo id of the detector to get data for
    :param tid: Train Id to be loaded. First train is considered if None is given
    :param path: Path to find image data inside h5 file
    """
    try:
        run_data = RunDirectory(data_folder, include)
    except FileNotFoundError:
        warning(f'No corrected files for {include}. Skipping plots.')
        import sys
        sys.exit(0)
    if tid is not None:
        tid, data = run_data.select(
            f'{detector_id}/DET/*', source).train_from_id(tid, keep_dims=True)
    else:
        # A first full trainId for all available modules is of interest.
        tid, data = next(run_data.select(
            f'{detector_id}/DET/*', source).trains(require_all=True, keep_dims=True))

    stacked_data = stack_detector_data(
        train=data, data=source, fillvalue=fillvalue, modules=modules)

    return tid, stacked_data
 ```

 %% Cell type:code id: tags:

 ``` python
 if dinstance == "AGIPD500K":
    geom = AGIPD_500K2GGeometry.from_origin()
 else:
    geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
        (-525, 625),
        (-550, -10),
        (520, -160),
        (542.5, 475),
    ])
 ```

 %% Cell type:code id: tags:

 ``` python
 include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
 tid, corrected = get_trains_data(out_folder, 'image.data', include, karabo_id, modules=nmods)

 _, gains = get_trains_data(out_folder, 'image.gain', include, karabo_id, tid, modules=nmods)
 _, mask = get_trains_data(out_folder, 'image.mask', include, karabo_id, tid, modules=nmods)
 _, blshift = get_trains_data(out_folder, 'image.blShift', include, karabo_id, tid, modules=nmods)
 _, cellId = get_trains_data(out_folder, 'image.cellId', include, karabo_id, tid, modules=nmods)
 _, pulseId = get_trains_data(out_folder, 'image.pulseId', include, karabo_id, tid, modules=nmods, fillvalue=0)
 _, raw = get_trains_data(run_folder, 'image.data', include, karabo_id, tid, modules=nmods)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Analogue Gain ###

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(raw[:,0,...].flatten().astype(np.float32),
                                         raw[:,1,...].flatten().astype(np.float32),
                                         bins=(100, 100),
                                         range=[[4000, 8192], [4000, 8192]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Digitized Gain ###

 The following plot shows plots signal vs. digitized gain

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(corrected.flatten().astype(np.float32),
                                         gains.flatten().astype(np.float32), bins=(100, 3),
                                         range=[[-50, 8192], [0, 3]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Gain statistics in %")
 table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
          f'{gains[gains==1].size/gains.size*100:.03f}',
          f'{gains[gains==2].size/gains.size*100:.03f}']]
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["High", "Medium", "Low"])))
 ```

 %% Cell type:markdown id: tags:

 ### Intensity per Pulse ###

 %% Cell type:code id: tags:

 ``` python
 pulse_range = [np.min(pulseId[pulseId>=0]), np.max(pulseId[pulseId>=0])]

 # Modify pulse_range, if only one pulse is selected.
 if pulse_range[0] == pulse_range[1]:
    pulse_range = [0, pulse_range[1]+int(acq_rate)]

 mean_data = np.nanmean(corrected, axis=(2, 3))
 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100, int(pulse_range[1])),
                                      range=[[-50, 1000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")

 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100,  int(pulse_range[1])),
                                      range=[[-50, 200000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 ```

 %% Cell type:markdown id: tags:

 ### Baseline shift ###

 Estimated base-line shift with respect to the total ADU counts of corrected image.

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(blshift.flatten(), bins=100, log=True)
 _ = plt.xlabel('Baseline shift [ADU]')
 _ = plt.ylabel('Counts')
 _ = ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 10))
 corrected_ave = np.nansum(corrected, axis=(2, 3))
 plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)
 plt.xlim(-1, 1000)
 plt.grid()
 plt.xlabel('Illuminated corrected [MADU] ')
 _ = plt.ylabel('Estimated baseline shift [ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
 if cell_id_preview not in cellId[:, 0]:
    print(f"WARNING: The selected cell_id_preview value {cell_id_preview} is not available in the corrected data.")
    cell_id_preview = cellId[:, 0][0]
    cell_idx_preview = 0
    print(f"Previewing the first available cellId: {cell_id_preview}.")
 else:
    cell_idx_preview = np.where(cellId[:, 0] == cell_id_preview)[0][0]
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Raw preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown(f'Mean over images of the RAW data\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(raw[slice(*cell_sel.crange), 0, ...], axis=0)
    vmin, vmax = get_range(data, 5)
    ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 else:
    print("Skipping mean RAW preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'Single shot of the RAW data from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(raw[cell_idx_preview, 0, ...], 5)
 ax = geom.plot_data_fast(raw[cell_idx_preview, 0, ...], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Corrected preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown('### Mean CORRECTED Preview ###\n'))
    display(Markdown(f'A mean across train: {tid}\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(corrected, axis=0)
    vmin, vmax = get_range(data, 7)
    ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=-50, vmax=vmax)
 else:
    print("Skipping mean CORRECTED preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'A single shot of the CORRECTED image from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_idx_preview], 7, -50)
 vmin = - 50
 ax = geom.plot_data_fast(corrected[cell_idx_preview], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_idx_preview], 5, -50)
 nbins = np.int((vmax + 50) / 2)
 h = ax.hist(corrected[cell_idx_preview].flatten(),
            bins=nbins, range=(-50, vmax),
            histtype='stepfilled', log=True)
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected, 10, -100)
 vmax = np.nanmax(corrected)
 if vmax > 50000:
    vmax=50000
 nbins = np.int((vmax + 100) / 5)
 h = ax.hist(corrected.flatten(), bins=nbins,
            range=(-100, vmax), histtype='step', log=True, label = 'All')
 ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='High gain', color='green')
 ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Medium gain', color='red')
 ax.hist(corrected[gains == 2].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Low gain', color='yellow')
 ax.legend()
 ax.grid()
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Maximum GAIN Preview ###\n'))
 display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.max(gains, axis=0), ax=ax,
                         cmap="jet", vmin=-1, vmax=3)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'### Single Shot Bad Pixels ### \n'))
 display(Markdown(f'A single shot bad pixel map from cell {cell_id_preview} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(np.log2(mask[cell_idx_preview]), ax=ax, vmin=0, vmax=32, cmap="jet")
 ```

 %% Cell type:code id: tags:

 ``` python
 if round_photons:
    display(Markdown('### Photonization histograms ###'))

    x_preround = (agipd_corr.hist_bins_preround[1:] + agipd_corr.hist_bins_preround[:-1]) / 2
    x_postround = (agipd_corr.hist_bins_postround[1:] + agipd_corr.hist_bins_postround[:-1]) / 2
    x_photons = np.arange(0, (x_postround[-1] + 1) / photon_energy)

    fig, ax = plt.subplots(ncols=1, nrows=1, clear=True)
    ax.plot(x_preround, agipd_corr.shared_hist_preround, '.-', color='C0')
    ax.bar(x_postround, agipd_corr.shared_hist_postround, photon_energy, color='C1', alpha=0.5)
    ax.set_yscale('log')
    ax.set_ylim(0, max(agipd_corr.shared_hist_preround.max(), agipd_corr.shared_hist_postround.max())*3)
    ax.set_xlim(x_postround[0], x_postround[-1]+1)
    ax.set_xlabel('Photon energy / keV')
    ax.set_ylabel('Intensity')
    ax.vlines(x_photons * photon_energy, *ax.get_ylim(), color='k', linestyle='dashed')

    phx = ax.twiny()
    phx.set_xlim(x_postround[0] / photon_energy, (x_postround[-1]+1)/photon_energy)
    phx.set_xticks(x_photons)
    phx.set_xlabel('# Photons')
    pass
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train  ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(np.mean(mask>0, axis=0), vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train. Only Dark Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 cm = np.copy(mask)
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 ax = geom.plot_data_fast(np.mean(cm>0, axis=0),
                         vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

--- a/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
+++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
 %% Cell type:markdown id: tags:

 # AGIPD Characterize Dark Images #

 Author: European XFEL Detector Group, Version: 2.0

 The following code analyzes a set of dark images taken with the AGIPD detector to deduce detector offsets , noise, bad-pixel maps and thresholding. All four types of constants are evaluated per-pixel and per-memory cell. Data for the detector's three gain stages needs to be present, separated into separate runs.

 The evaluated calibration constants are stored locally and injected in the calibration data base.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/d/raw/CALLAB/202031/p900113" # path to input data, required
 out_folder = "" # path to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 modules = [-1]  # list of modules to evaluate, RANGE ALLOWED
 run_high = 9985 # run number in which high gain data was recorded, required
 run_med = 9984 # run number in which medium gain data was recorded, required
 run_low = 9983 # run number in which low gain data was recorded, required
 operation_mode = "ADAPTIVE_GAIN"  # Detector operation mode, optional (defaults to "ADAPTIVE_GAIN")

 karabo_id = "HED_DET_AGIPD500K2G" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_template = "{}CH0" # inset for receiver devices
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information
 karabo_id_control = "HED_EXP_AGIPD500K2G" # karabo-id for control device '

 use_dir_creation_date = True  # use dir creation date as data production reference date
 cal_db_interface = "tcp://max-exfl-cal001:8020" # the database interface to use
 cal_db_timeout = 3000000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database
+sort_runs = True  # Sort the selected dark runs. This flag is added for old data (e.g. 900174 r0011).

 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 0 # bias voltage, set to 0 to use stored value in slow data.
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode, use -1 to use value stored in slow data.
 integration_time = -1 # integration time, negative values for auto-detection.
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 interlaced = False # assume interlaced data format, for data prior to Dec. 2017

 thresholds_offset_sigma = 3. # offset sigma thresholds for offset deduced bad pixels
-thresholds_offset_hard = [0, 0]  # For setting the same threshold offset for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.
+thresholds_offset_hard = [0, 0]  # For setting the same threshold offset for the 3 gains. Left for backward compatibility. Default [0, 0] to take the following parameters.
 thresholds_offset_hard_hg = [3000, 7000]  # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_mg = [6000, 10000]  # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_lg = [6000, 10000]  # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_hg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_hg, but for fixed gain operation
 thresholds_offset_hard_mg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_mg, but for fixed gain operation
 thresholds_offset_hard_lg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_lg, but for fixed gain operation

 thresholds_noise_sigma = 5. # noise sigma thresholds for offset deduced bad pixels
-thresholds_noise_hard = [0, 0] # For setting the same threshold noise for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.
+thresholds_noise_hard = [0, 0] # For setting the same threshold noise for the 3 gains. Left for backward compatibility. Default [0, 0] to take the following parameters.
 thresholds_noise_hard_hg = [4, 20] # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_mg = [4, 20] # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_lg = [4, 20] # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels

 thresholds_gain_sigma = 5.  # Gain separation sigma threshold
-max_trains = 550  # Maximum number of trains to use for processing dark. Set to 0 to process all available trains. 550 added for ~500GB nodes to temporarely avoid memory issues.
-min_trains = 1  # Miniumum number of trains for processing dark. If run folder has less than minimum trains, processing is stopped.
+max_trains = 550  # Maximum number of trains to use for processing dark. Set to 0 to process all available trains. 550 added for ~500GB nodes to temporarily avoid memory issues.
+min_trains = 1  # Minimum number of trains for processing dark. If run folder has less than minimum trains, processing is stopped.
 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. ~7mins extra time for 64 memory cells

 # This is used if modules is not specified:
 def find_modules(in_folder, run_high, modules):
    if (modules is not None) and modules != [-1]:
        return modules
    from pathlib import Path
    import re
    modules = set()
    for file in Path(in_folder, f'r{run_high:04d}').iterdir():
        m = re.search(r'-AGIPD(\d{2})-', file.name)
        if m:
            modules.add(int(m.group(1)))
    return sorted(modules)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import multiprocessing
 import os
 from collections import OrderedDict
 from datetime import timedelta
 from pathlib import Path
-from typing import List, Tuple
+from typing import Tuple

-import dateutil.parser
 import matplotlib
 import numpy as np
 import pasha as psh
 import tabulate
 import yaml
 from IPython.display import Latex, Markdown, display
 from extra_data import RunDirectory

 matplotlib.use('agg')

 import iCalibrationDB
 import matplotlib.pyplot as plt
-from cal_tools.agipdlib import AgipdCtrl
-from cal_tools.enums import AgipdGainMode, BadPixels
+from cal_tools.agipdlib import AgipdCtrlRuns
+from cal_tools.enums import BadPixels
 from cal_tools.plotting import (
    create_constant_overview,
    plot_badpix_3d,
    show_overview,
    show_processed_modules,
 )
 from cal_tools.tools import (
    get_dir_creation_date,
    get_from_db,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
-    map_gain_stages,
    module_index_to_qm,
    run_prop_seq_from_path,
    save_const_to_h5,
    send_to_db,
 )

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 # insert control device if format string (does nothing otherwise)
 ctrl_src = ctrl_source_template.format(karabo_id_control)

-runs_dict = OrderedDict()
 run_numbers = [run_high, run_med, run_low]

-for gain_idx, (run_name, run_number) in enumerate(zip(["high", "med", "low"], run_numbers)):
-    runs_dict[run_name] = {
-        "number": run_number,
-        "gain": gain_idx,
-        "dc": RunDirectory(f'{in_folder}/r{run_number:04d}/')
-    }
-
 creation_time=None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)

 print(f"Using {creation_time} as creation time of constant.")

 run, prop, seq = run_prop_seq_from_path(in_folder)

 # Read report path and create file location tuple to add with the injection
 file_loc = f"proposal:{prop} runs:{run_low} {run_med} {run_high}"

 report = get_report(metadata_folder)
 cal_db_interface = get_random_db_interface(cal_db_interface)
 print(f'Calibration database interface: {cal_db_interface}')

 instrument = karabo_id.split("_")[0]

 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 instrument_src = instrument_source_template.format(karabo_id, receiver_template)

 def create_karabo_da_list(modules):
    return(["AGIPD{:02d}".format(i) for i in modules])

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = create_karabo_da_list(modules)
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create out_folder if it doesn't exist.
 Path(out_folder).mkdir(parents=True, exist_ok=True)

 mod_image_size = []
-for run_dict in runs_dict.values():
+for run in run_numbers:
    missing_modules = []  # modules with no images within a run.
    n_trains_list = []   # list of the number of trains for each module within a run.
    # This is important in case of no slurm parallelization over modules is done.
    # (e.g. running notebook interactively)
    for m in modules:
        # validate that there are trains for the selected modules and run.
-        dc = run_dict["dc"].select(
+        dc = RunDirectory(f'{in_folder}/r{run:04d}/').select(
            instrument_src.format(m), "*", require_all=True)
        n_trains = len(dc.train_ids)

        if n_trains == 0:
-            print(f"WARNING: No images for module AGIPD{m:02d}, run {run_dict['number']}.")
+            print(f"WARNING: No images for module AGIPD{m:02d}, run {run}.")
            missing_modules.append(m)
        # Raise a warning if the module has less trains than expected.
        elif n_trains < min_trains:
-            print(f"WARNING: AGIPD{m:02d}, run {run_dict['number']} "
+            print(f"WARNING: AGIPD{m:02d}, run {run} "
                  f"has trains less than minimum trains: {min_trains}.")
        else:
            print(f"Processing {max_trains if max_trains < n_trains else n_trains} "
-                  f"for AGIPD{m:02d}, run {run_dict['number']} ")
+                  f"for AGIPD{m:02d}, run {run} ")

        n_trains_list.append(n_trains)
        mod_image_size.append(np.product(dc[instrument_src.format(m), "image.data"].shape) * 2  / 1e9)

    if max(n_trains_list) == 0:
-        raise ValueError(f"No images to process for run: {run_dict['number']}")
+        raise ValueError(f"No images to process for run: {run}")
    elif max(n_trains_list) < min_trains:
-        raise ValueError(f"{run_dict['number']} has less than minimum trains: {min_trains}")
+        raise ValueError(f"{run} has less than minimum trains: {min_trains}")

 # Update modules and karabo_da lists based on available modules to processes.
 modules = [m for m in modules if m not in missing_modules]
 karabo_da = create_karabo_da_list(modules)

 print(f"Will process data of ({sum(mod_image_size):.02f} GB).")
 ```

 %% Cell type:markdown id: tags:

 ## Read and validate the runs control data.

 %% Cell type:code id: tags:

 ``` python
-def read_run_conditions(runs_dict: dict):
-    agipd_cond = AgipdCtrl(
-        run_dc=runs_dict["dc"],
-        image_src=instrument_src_mod,
-        ctrl_src=ctrl_src,
-    )
-    cond_dict["runs"].append(runs_dict["number"])
-    if acq_rate == 0:
-        cond_dict["acq_rate"].append(agipd_cond.get_acq_rate())
-    if mem_cells == 0:
-        cond_dict["mem_cells"].append(agipd_cond.get_num_cells())
-    if gain_setting == -1:
-        cond_dict["gain_setting"].append(
-            agipd_cond.get_gain_setting(creation_time))
-    if bias_voltage == 0.:
-        cond_dict["bias_voltage"].append(
-            agipd_cond.get_bias_voltage(karabo_id_control))
-    if integration_time == -1:
-        cond_dict["integration_time"].append(
-            agipd_cond.get_integration_time())
-    if gain_mode == -1:
-        cond_dict["gain_mode"].append(agipd_cond.get_gain_mode())
-    else:
-        cond_dict["gain_mode"].append(AgipdGainMode(gain_mode))
-```
-
-%% Cell type:code id: tags:
-
-``` python
-def validate_gain_modes(gain_modes: List[AgipdGainMode]):
-    # Validate that gain modes are not a mix of adaptive and fixed gain.
-    if all(
-        gm == AgipdGainMode.ADAPTIVE_GAIN for gm in gain_modes
-    ):
-        fixed_gain_mode = False
-    # Some runs are adaptive by mistake.
-    elif any(
-        gm == AgipdGainMode.ADAPTIVE_GAIN for gm in gain_modes
-    ):
-        raise ValueError(
-            f"ERROR: Given runs {run_numbers}"
-            " have a mix of ADAPTIVE and FIXED gain modes: "
-            f"{gain_modes}."
-    )
-    elif list(gain_modes) == [
-        AgipdGainMode.FIXED_HIGH_GAIN,
-        AgipdGainMode.FIXED_MEDIUM_GAIN,
-        AgipdGainMode.FIXED_LOW_GAIN
-    ]:
-        fixed_gain_mode = True
-    else:
-        raise ValueError(
-        "ERROR: Wrong arrangment of given dark runs. "
-        f"Given runs' gain_modes are {gain_modes} for runs: {run_numbers}."
-    )
-    return fixed_gain_mode
-```
-
-%% Cell type:code id: tags:
-
-``` python
 # Read slow data from 1st channel only.
 # Read all modules in one notebook and validate the conditions across detectors?
 # Currently slurm jobs run per one module.

 # TODO: what if first module is not available. Maybe only channel 2 available
 instrument_src_mod = instrument_src.format(modules[0])

-cond_dict = dict()
-fixed_gain_mode = None
+agipd_ctrl_dark = AgipdCtrlRuns(
+    raw_folder=in_folder,
+    runs=run_numbers,
+    image_src=instrument_src_mod,
+    ctrl_src=ctrl_src,
+    sort_dark_runs_enabled=sort_runs
+)
+# Update run_numbers list in case it was sorted.
+run_numbers = agipd_ctrl_dark.runs
+if mem_cells == 0:
+    mem_cells = agipd_ctrl_dark.get_memory_cells()
+
+if acq_rate == 0:
+    acq_rate = agipd_ctrl_dark.get_acq_rate()
+
+if bias_voltage == 0:
+    bias_voltage = agipd_ctrl_dark.get_bias_voltage(karabo_id_control)
+
+fixed_gain_mode = False
+if gain_mode == -1:
+    gain_mode = agipd_ctrl_dark.gain_modes
+    fixed_gain_mode = agipd_ctrl_dark.fixed_gain_mode()
+
+if gain_setting == -1:
+    gain_setting = agipd_ctrl_dark.get_gain_setting()

-with multiprocessing.Manager() as manager:
-    cond_dict["runs"] = manager.list()
-    cond_dict["acq_rate"] = manager.list()
-    cond_dict["mem_cells"] = manager.list()
-    cond_dict["gain_setting"] = manager.list()
-    cond_dict["gain_mode"] = manager.list()
-    cond_dict["bias_voltage"] = manager.list()
-    cond_dict["integration_time"] = manager.list()
-
-    with multiprocessing.Pool(processes=len(modules)) as pool:
-        pool.starmap(read_run_conditions, zip(runs_dict.values()))
-
-    for cond, vlist in cond_dict.items():
-        if cond == "runs":
-            continue
-        elif cond == "gain_mode":
-            fixed_gain_mode = validate_gain_modes(cond_dict["gain_mode"])
-        elif not all(x == vlist[0] for x in vlist):
-            # TODO: raise ERROR??
-            print(
-                f"WARNING: {cond} is not the same for the runs "
-                f"{cond_dict['runs']} with values"
-                f" of {cond_dict[cond]}, respectively."
-            )
-    if cond_dict["acq_rate"]: acq_rate = cond_dict["acq_rate"][0]
-    if cond_dict["mem_cells"]: mem_cells = cond_dict["mem_cells"][0]
-    if cond_dict["gain_setting"]: gain_setting = cond_dict["gain_setting"][0]
-    if cond_dict["gain_mode"]: gain_mode = list(cond_dict["gain_mode"])
-    if cond_dict["bias_voltage"]: bias_voltage = cond_dict["bias_voltage"][0]
-    if cond_dict["integration_time"]: integration_time = cond_dict["integration_time"][0]
+if integration_time == -1:
+    integration_time = agipd_ctrl_dark.get_integration_time()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Determine the gain operation mode based on the gain_mode stored in control h5file.
 if operation_mode not in ("ADAPTIVE_GAIN", "FIXED_GAIN"):
    print(f"WARNING: unknown operation_mode \"{operation_mode}\" parameter set")

 if fixed_gain_mode and operation_mode == "ADAPTIVE_GAIN":
    print(
        "WARNING: Operation_mode parameter is ADAPTIVE_GAIN, but"
        "slow data indicates FIXED_GAIN. Processing fixed gain constants.")
 elif not fixed_gain_mode and operation_mode == "FIXED_GAIN":
    print(
        "WARNING: Operation_mode parameter is FIXED_GAIN, "
        "slow data indicates ADAPTIVE_GAIN. Processing adaptive gain constants.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Parameters are:")
 print(f"Proposal: {prop}")
 print(f"Acquisition rate: {acq_rate}")
 print(f"Memory cells: {mem_cells}")
 print(f"Runs: {run_numbers}")
 print(f"Interlaced mode: {interlaced}")
 print(f"Using DB: {db_output}")
 print(f"Input: {in_folder}")
 print(f"Output: {out_folder}")
 print(f"Bias voltage: {bias_voltage}V")
 print(f"Gain setting: {gain_setting}")
 print(f"Integration time: {integration_time}")
 print(f"Operation mode is {'fixed' if fixed_gain_mode else 'adaptive'} gain mode")
 ```

 %% Cell type:code id: tags:

 ``` python
 if thresholds_offset_hard != [0, 0]:
    # if set, this will override the individual parameters
    thresholds_offset_hard = [thresholds_offset_hard] * 3
 elif fixed_gain_mode:
    thresholds_offset_hard = [
        thresholds_offset_hard_hg_fixed,
        thresholds_offset_hard_mg_fixed,
        thresholds_offset_hard_lg_fixed,
    ]
 else:
    thresholds_offset_hard = [
        thresholds_offset_hard_hg,
        thresholds_offset_hard_mg,
        thresholds_offset_hard_lg,
    ]
 print("Will use the following hard offset thresholds")
 for name, value in zip(("High", "Medium", "Low"), thresholds_offset_hard):
    print(f"- {name} gain: {value}")

 if thresholds_noise_hard != [0, 0]:
    thresholds_noise_hard = [thresholds_noise_hard] * 3
 else:
    thresholds_noise_hard = [
        thresholds_noise_hard_hg,
        thresholds_noise_hard_mg,
        thresholds_noise_hard_lg,
    ]
 ```

 %% Cell type:markdown id: tags:

 ## Calculate Offsets, Noise and Thresholds ##

 The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array.

 %% Cell type:code id: tags:

 ``` python
 parallel_num_procs = min(6, len(modules)*3)
 parallel_num_threads = multiprocessing.cpu_count() // parallel_num_procs
 print(f"Will use {parallel_num_procs} processes with {parallel_num_threads} threads each")

 def characterize_module(
-    channel: int, runs_dict: dict,
+    channel: int, gain_run: Tuple[int, int],
 ) -> Tuple[int, int, np.array, np.array, np.array, np.array, np.array]:

+    gain_index, run = gain_run
    # Select the corresponding module channel.
    instrument_src_mod = instrument_src.format(channel)

-    run_dc = runs_dict["dc"].select(instrument_src_mod, require_all=True)
+    run_dc = RunDirectory(f'{in_folder}/r{run:04d}/').select(instrument_src_mod, require_all=True)
    if max_trains != 0:
        run_dc = run_dc.select_trains(np.s_[:max_trains])
-    gain_index = runs_dict["gain"]

    # Read module's image and cellId data.
    im = run_dc[instrument_src_mod, "image.data"].ndarray()
    cell_ids = np.squeeze(run_dc[instrument_src_mod, "image.cellId"].ndarray())

    local_thresholds_offset_hard = thresholds_offset_hard[gain_index]
    local_thresholds_noise_hard = thresholds_noise_hard[gain_index]

    if interlaced:
        if not fixed_gain_mode:
            ga = im[1::2, 0, ...]
        im = im[0::2, 0, ...].astype(np.float32)
        cell_ids = cell_ids[::2]
    else:
        if not fixed_gain_mode:
            ga = im[:, 1, ...]
        im = im[:, 0, ...].astype(np.float32)
    im = np.transpose(im)
    if not fixed_gain_mode:
        ga = np.transpose(ga)

    context = psh.context.ThreadContext(num_workers=parallel_num_threads)
    offset = context.alloc(shape=(im.shape[0], im.shape[1], mem_cells), dtype=np.float64)
    noise = context.alloc(like=offset)

    if fixed_gain_mode:
        gains = None
        gains_std = None
    else:
        gains = context.alloc(like=offset)
        gains_std = context.alloc(like=offset)

    def process_cell(worker_id, array_index, cell_number):
        cell_slice_index = (cell_ids == cell_number)
        im_slice = im[..., cell_slice_index]
        offset[..., cell_number] = np.median(im_slice, axis=2)
        noise[..., cell_number] = np.std(im_slice, axis=2)
        if not fixed_gain_mode:
            ga_slice = ga[..., cell_slice_index]
            gains[..., cell_number] = np.median(ga_slice, axis=2)
            gains_std[..., cell_number] = np.std(ga_slice, axis=2)
    unique_cell_ids = np.unique(cell_ids)

    # We assume cells are accepted starting 0.
    if np.any(unique_cell_ids > mem_cells):
        raise ValueError(
            f"Invalid cells found {unique_cell_ids} "
            f"for run: {run_dc.run_metadata()['runNumber']}.")

    context.map(process_cell, unique_cell_ids)

    # bad pixels
    bp = np.zeros_like(offset, dtype=np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0,1))
    offset_std = np.nanstd(offset, axis=(0,1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD
    bp[(offset < local_thresholds_offset_hard[0]) |
       (offset > local_thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0,1))
    noise_std = np.nanstd(noise, axis=(0,1))
    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD
    bp[(noise < local_thresholds_noise_hard[0]) | (noise > local_thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR

    return channel, gain_index, offset, noise, gains, gains_std, bp
 ```

 %% Cell type:code id: tags:

 ``` python
 with multiprocessing.Pool(processes=parallel_num_procs) as pool:
    results = pool.starmap(
-        characterize_module, itertools.product(modules, list(runs_dict.values())))
+        characterize_module, itertools.product(modules, list(enumerate(run_numbers))))

 # mapped values for processing 2 modules example:
-# [
-#     0, {"gain": 0, "run_number": <run-high>, "dc": <high-dc>},
-#     0, {"gain": 1, "run_number": <run-med>, "dc": <med-dc>},
-#     0, {"gain": 2, "run_number": <run-low>, "dc": <low-dc>},
-#     1, {"gain": 0, "run_number": <run-high>, "dc": <high-dc>},
-#     1, {"gain": 1, "run_number": <run-med>, "dc": <med-dc>},
-#     1, {"gain": 2, "run_number": <run-low>, "dc": <low-dc>},
+# [(0, (0, 9013))
+#     0, (0, run-high),
+#     0, (1, run-med),
+#     0, (2, run-low),
+#     1, (0, run-high),
+#     1, (1, run-med),
+#     1, (2, run-low),,
 # ]
 ```

 %% Cell type:code id: tags:

 ``` python
 offset_g = OrderedDict()
 noise_g = OrderedDict()
 badpix_g = OrderedDict()
 if not fixed_gain_mode:
    gain_g = OrderedDict()
    gainstd_g = OrderedDict()


 for module_index, gain_index, offset, noise, gains, gains_std, bp in results:
    qm = module_index_to_qm(module_index)
    if qm not in offset_g:
        offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2], 3))
        noise_g[qm] = np.zeros_like(offset_g[qm])
        badpix_g[qm] = np.zeros_like(offset_g[qm], np.uint32)
        if not fixed_gain_mode:
            gain_g[qm] = np.zeros_like(offset_g[qm])
            gainstd_g[qm] = np.zeros_like(offset_g[qm])

    offset_g[qm][..., gain_index] = offset
    noise_g[qm][..., gain_index] = noise
    badpix_g[qm][..., gain_index] = bp
    if not fixed_gain_mode:
        gain_g[qm][..., gain_index] = gains
        gainstd_g[qm][..., gain_index] = gains_std
 ```

 %% Cell type:code id: tags:

 ``` python
 # Add bad pixels due to bad gain separation
 if not fixed_gain_mode:
    for qm in gain_g.keys():
        for g in range(2):
            # Bad pixels during bad gain separation.
            # Fraction of pixels in the module with separation lower than "thresholds_gain_sigma".
            bad_sep = (gain_g[qm][..., g+1] - gain_g[qm][..., g]) / \
                np.sqrt(gainstd_g[qm][..., g+1]**2 + gainstd_g[qm][..., g]**2)
            badpix_g[qm][...,g+1][bad_sep<thresholds_gain_sigma] |= \
                BadPixels.GAIN_THRESHOLDING_ERROR
 ```

 %% Cell type:markdown id: tags:

 The thresholds for gain switching are then defined as the mean value between in individual gain bit levels. Note that these thresholds need to be refined with charge induced thresholds, as the two are not the same.

 %% Cell type:code id: tags:

 ``` python
 if not fixed_gain_mode:
    thresholds_g = {}
    for qm in gain_g.keys():
        thresholds_g[qm] = np.zeros((gain_g[qm].shape[0], gain_g[qm].shape[1], gain_g[qm].shape[2], 5))
        thresholds_g[qm][...,0] = (gain_g[qm][...,1]+gain_g[qm][...,0])/2
        thresholds_g[qm][...,1] = (gain_g[qm][...,2]+gain_g[qm][...,1])/2
        for i in range(3):
            thresholds_g[qm][...,2+i] = gain_g[qm][...,i]
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()
 for i in modules:
    qm = module_index_to_qm(i)
    res[qm] = {
        'Offset': offset_g[qm],
        'Noise': noise_g[qm],
        'BadPixelsDark': badpix_g[qm]
    }
    if not fixed_gain_mode:
        res[qm]['ThresholdsDark'] = thresholds_g[qm]
 ```

 %% Cell type:code id: tags:

 ``` python
 # set the operating condition
 # note: iCalibrationDB only adds gain_mode if it is truthy, so we don't need to handle None
 condition = iCalibrationDB.Conditions.Dark.AGIPD(
    memory_cells=mem_cells,
    bias_voltage=bias_voltage,
    acquisition_rate=acq_rate,
    gain_setting=gain_setting,
    gain_mode=fixed_gain_mode,
    integration_time=integration_time
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create mapping from module(s) (qm) to karabo_da(s) and PDU(s)
 qm_dict = OrderedDict()
 all_pdus = get_pdu_from_db(
    karabo_id,
    karabo_da,
    constant=iCalibrationDB.CalibrationConstant(),
    condition=condition,
    cal_db_interface=cal_db_interface,
    snapshot_at=creation_time.isoformat() if creation_time else None,
    timeout=cal_db_timeout
 )
 for module_index, module_da, module_pdu in zip(modules, karabo_da, all_pdus):
    qm = module_index_to_qm(module_index)
    qm_dict[qm] = {
        "karabo_da": module_da,
        "db_module": module_pdu
    }
 ```

 %% Cell type:markdown id: tags:

 ## Sending calibration constants to the database.

 %% Cell type:code id: tags:

 ``` python
 md = None

 for qm in res:
    db_module = qm_dict[qm]["db_module"]
    for const in res[qm]:
        dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()
        dconst.data = res[qm][const]

        if db_output:
            md = send_to_db(db_module, karabo_id, dconst, condition, file_loc,
                            report, cal_db_interface, creation_time=creation_time,
                            timeout=cal_db_timeout)

        if local_output:
            md = save_const_to_h5(db_module, karabo_id, dconst, condition, dconst.data,
                                  file_loc, report, creation_time, out_folder)
            print(f"Calibration constant {const} for {qm} is stored locally in {file_loc}.\n")

 print("Constants parameter conditions are:\n")
 print(f"• memory_cells: {mem_cells}\n• bias_voltage: {bias_voltage}\n"
      f"• acquisition_rate: {acq_rate}\n• gain_setting: {gain_setting}\n"
      f"• gain_mode: {fixed_gain_mode}\n• integration_time: {integration_time}\n"
      f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:markdown id: tags:

 ## Retrieving previous calibration constants for comparison.

 %% Cell type:code id: tags:

 ``` python
 # Start retrieving existing constants for comparison
 qm_x_const = [(qm, const) for const in res[qm] for qm in res]


 def retrieve_old_constant(qm, const):
    dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()

    data, mdata = get_from_db(
        karabo_id=karabo_id,
        karabo_da=qm_dict[qm]["karabo_da"],
        constant=dconst,
        condition=condition,
        empty_constant=None,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time-timedelta(seconds=1) if creation_time else None,
        strategy="pdu_prior_in_time",
        verbosity=1,
        timeout=cal_db_timeout
    )

    if mdata is None or data is None:
        timestamp = "Not found"
        filepath = None
        h5path = None
    else:
        timestamp = mdata.calibration_constant_version.begin_at.isoformat()
        filepath = os.path.join(
            mdata.calibration_constant_version.hdf5path,
            mdata.calibration_constant_version.filename
        )
        h5path = mdata.calibration_constant_version.h5path

    return data, timestamp, filepath, h5path


 old_retrieval_pool = multiprocessing.Pool()
 old_retrieval_res = old_retrieval_pool.starmap_async(
    retrieve_old_constant, qm_x_const
 )
 old_retrieval_pool.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 mnames=[]
 for i in modules:
    qm = module_index_to_qm(i)
    mnames.append(qm)
    display(Markdown(f'## Position of the module {qm} and its ASICs'))
 show_processed_modules(dinstance, constants=None, mnames=mnames, mode="position")
 ```

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:markdown id: tags:

 ### High Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 0
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))
 ```

 %% Cell type:markdown id: tags:

 ### Medium Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 1
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))
 ```

 %% Cell type:markdown id: tags:

 ### Low Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 2
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))
 ```

 %% Cell type:code id: tags:

 ``` python
 if high_res_badpix_3d:
    cols = {
        BadPixels.NOISE_OUT_OF_THRESHOLD: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.GAIN_THRESHOLDING_ERROR: (BadPixels.GAIN_THRESHOLDING_ERROR.name, '#FF40FF40'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD | BadPixels.NOISE_OUT_OF_THRESHOLD: ('OFFSET_OUT_OF_THRESHOLD + NOISE_OUT_OF_THRESHOLD', '#DD00DD80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD | BadPixels.NOISE_OUT_OF_THRESHOLD |
        BadPixels.GAIN_THRESHOLDING_ERROR: ('MIXED', '#BFDF009F')
    }

    display(Markdown("""

    ## Global Bad Pixel Behaviour ##

    The following plots show the results of bad pixel evaluation for all evaluated memory cells.
    Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2.
    This excludes single bad pixels present only in disconnected pixels.
    Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
    Colors encode the bad pixel type, or mixed type.

    """))

    gnames = ['High Gain', 'Medium Gain', 'Low Gain']
    for gain in range(3):
        display(Markdown(f'### {gnames[gain]} ###'))
        for mod, data in badpix_g.items():
            plot_badpix_3d(data[...,gain], cols, title=mod, rebin_fac=1)
            plt.show()
 ```

 %% Cell type:markdown id: tags:


 ## Aggregate values, and per Cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior.

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(offset_g, "Offset (ADU)", mem_cells, 4000, 8000,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(noise_g, "Noise (ADU)", mem_cells, 0, 100,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 if not fixed_gain_mode:
    # Plot only three gain threshold maps.
    bp_thresh = OrderedDict()
    for mod, con in badpix_g.items():
        bp_thresh[mod] = np.zeros((con.shape[0], con.shape[1], con.shape[2], 5), dtype=con.dtype)
        bp_thresh[mod][...,:2] = con[...,:2]
        bp_thresh[mod][...,2:] = con

    create_constant_overview(thresholds_g, "Threshold (ADU)", mem_cells, 4000, 10000, 5,
                             badpixels=[bp_thresh, np.nan],
                             gmap=['HG-MG Threshold', 'MG-LG Threshold', 'High gain', 'Medium gain', 'low gain'],
                             marker=['d','d','','','']
                             )
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixel_aggregate_g = OrderedDict()
 for m, d in badpix_g.items():
    bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)
 create_constant_overview(bad_pixel_aggregate_g, "Bad pixel fraction", mem_cells, 0, 0.10, 3)
 ```

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 # now we need the old constants
 old_const = {}
 old_mdata = {}
 old_retrieval_res.wait()

 for (qm, const), (data, timestamp, filepath, h5path) in zip(qm_x_const, old_retrieval_res.get()):
    old_const.setdefault(qm, {})[const] = data
    old_mdata.setdefault(qm, {})[const] = {
        "timestamp": timestamp,
        "filepath": filepath,
        "h5path": h5path
    }
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown("The following pre-existing constants are used for comparison:"))
 for qm, consts in old_mdata.items():
    display(Markdown(f"- {qm}"))
    for const in consts:
        display(Markdown(f"    - {const} at {consts[const]['timestamp']}"))
    # saving locations of old constants for summary notebook
    with open(f"{out_folder}/module_metadata_{qm}.yml", "w") as fd:
        yaml.safe_dump(
            {
                "module": qm,
                "pdu": qm_dict[qm]["db_module"],
                "old-constants": old_mdata[qm]
            },
            fd,
        )
 ```

 %% Cell type:code id: tags:

 ``` python
 table = []
 gain_names = ['High', 'Medium', 'Low']
 bits = [BadPixels.NOISE_OUT_OF_THRESHOLD, BadPixels.OFFSET_OUT_OF_THRESHOLD, BadPixels.OFFSET_NOISE_EVAL_ERROR, BadPixels.GAIN_THRESHOLDING_ERROR]
 for qm in badpix_g.keys():
    for gain in range(3):
        l_data = []
        l_data_old = []

        data = np.copy(badpix_g[qm][:,:,:,gain])
        datau32 = data.astype(np.uint32)
        l_data.append(len(datau32[datau32>0].flatten()))
        for bit in bits:
            l_data.append(np.count_nonzero(badpix_g[qm][:,:,:,gain] & bit))

        if old_const[qm]['BadPixelsDark'] is not None:
            dataold = np.copy(old_const[qm]['BadPixelsDark'][:, :, :, gain])
            datau32old = dataold.astype(np.uint32)
            l_data_old.append(len(datau32old[datau32old>0].flatten()))
            for bit in bits:
                l_data_old.append(np.count_nonzero(old_const[qm]['BadPixelsDark'][:, :, :, gain] & bit))

        l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',
                       'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR', 'GAIN_THRESHOLDING_ERROR']

        l_threshold = ['', f'{thresholds_noise_sigma}' f'{thresholds_noise_hard[gain]}',
                       f'{thresholds_offset_sigma}' f'{thresholds_offset_hard[gain]}',
                       '', f'{thresholds_gain_sigma}']

        for i in range(len(l_data)):
            line = [f'{l_data_name[i]}, {gain_names[gain]} gain', l_threshold[i], l_data[i]]

            if old_const[qm]['BadPixelsDark'] is not None:
                line += [l_data_old[i]]
            else:
                line += ['-']

            table.append(line)
        table.append(['', '', '', ''])

 display(Markdown('''
 ### Number of bad pixels

 One pixel can be bad for different reasons, therefore, the sum of all types of bad pixels can be more than the number of all bad pixels.

 '''))
 if len(table)>0:
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                         headers=["Pixel type", "Threshold",
                                                  "New constant", "Old constant"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 if fixed_gain_mode:
    constants = ['Offset', 'Noise']
 else:
    constants = ['Offset', 'Noise', 'ThresholdsDark']

 constants_x_qms = list(itertools.product(constants, res.keys()))


 def compute_table(const, qm):
    if const == 'ThresholdsDark':
        table = [['','HG-MG threshold', 'HG-MG threshold', 'MG-LG threshold', 'MG-LG threshold']]
    else:
        table = [['','High gain', 'High gain', 'Medium gain', 'Medium gain', 'Low gain', 'Low gain']]

    compare_with_old_constant = old_const[qm][const] is not None and \
        old_const[qm]['BadPixelsDark'] is not None

    data = np.copy(res[qm][const])

    if const == 'ThresholdsDark':
        data[...,0][res[qm]['BadPixelsDark'][...,0]>0] = np.nan
        data[...,1][res[qm]['BadPixelsDark'][...,1]>0] = np.nan
    else:
        data[res[qm]['BadPixelsDark']>0] = np.nan

    if compare_with_old_constant:
        data_old = np.copy(old_const[qm][const])
        if const == 'ThresholdsDark':
            data_old[...,0][old_const[qm]['BadPixelsDark'][...,0]>0] = np.nan
            data_old[...,1][old_const[qm]['BadPixelsDark'][...,1]>0] = np.nan
        else:
            data_old[old_const[qm]['BadPixelsDark']>0] = np.nan

    f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
    n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

    def compute_row(i):
        line = [n_list[i]]
        for gain in range(3):
            # Compare only 3 threshold gain-maps
            if gain == 2 and const == 'ThresholdsDark':
                continue
            stat_measure = f_list[i](data[...,gain])
            line.append(f"{stat_measure:6.1f}")
            if compare_with_old_constant:
                old_stat_measure = f_list[i](data_old[...,gain])
                line.append(f"{old_stat_measure:6.1f}")
            else:
                line.append("-")
        return line


    with multiprocessing.pool.ThreadPool(processes=multiprocessing.cpu_count() // len(constants_x_qms)) as pool:
        rows = pool.map(compute_row, range(len(f_list)))

    table.extend(rows)

    return table


 with multiprocessing.Pool(processes=len(constants_x_qms)) as pool:
    tables = pool.starmap(compute_table, constants_x_qms)

 for (const, qm), table in zip(constants_x_qms, tables):
    display(Markdown(f"### {qm}: {const} [ADU], good pixels only"))
    display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
 ```

 %% Cell type:markdown id: tags:

 # AGIPD Characterize Dark Images #

 Author: European XFEL Detector Group, Version: 2.0

 The following code analyzes a set of dark images taken with the AGIPD detector to deduce detector offsets , noise, bad-pixel maps and thresholding. All four types of constants are evaluated per-pixel and per-memory cell. Data for the detector's three gain stages needs to be present, separated into separate runs.

 The evaluated calibration constants are stored locally and injected in the calibration data base.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/d/raw/CALLAB/202031/p900113" # path to input data, required
 out_folder = "" # path to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 modules = [-1]  # list of modules to evaluate, RANGE ALLOWED
 run_high = 9985 # run number in which high gain data was recorded, required
 run_med = 9984 # run number in which medium gain data was recorded, required
 run_low = 9983 # run number in which low gain data was recorded, required
 operation_mode = "ADAPTIVE_GAIN"  # Detector operation mode, optional (defaults to "ADAPTIVE_GAIN")

 karabo_id = "HED_DET_AGIPD500K2G" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_template = "{}CH0" # inset for receiver devices
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information
 karabo_id_control = "HED_EXP_AGIPD500K2G" # karabo-id for control device '

 use_dir_creation_date = True  # use dir creation date as data production reference date
 cal_db_interface = "tcp://max-exfl-cal001:8020" # the database interface to use
 cal_db_timeout = 3000000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database
+sort_runs = True  # Sort the selected dark runs. This flag is added for old data (e.g. 900174 r0011).

 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 0 # bias voltage, set to 0 to use stored value in slow data.
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode, use -1 to use value stored in slow data.
 integration_time = -1 # integration time, negative values for auto-detection.
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 interlaced = False # assume interlaced data format, for data prior to Dec. 2017

 thresholds_offset_sigma = 3. # offset sigma thresholds for offset deduced bad pixels
-thresholds_offset_hard = [0, 0]  # For setting the same threshold offset for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.
+thresholds_offset_hard = [0, 0]  # For setting the same threshold offset for the 3 gains. Left for backward compatibility. Default [0, 0] to take the following parameters.
 thresholds_offset_hard_hg = [3000, 7000]  # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_mg = [6000, 10000]  # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_lg = [6000, 10000]  # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_hg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_hg, but for fixed gain operation
 thresholds_offset_hard_mg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_mg, but for fixed gain operation
 thresholds_offset_hard_lg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_lg, but for fixed gain operation

 thresholds_noise_sigma = 5. # noise sigma thresholds for offset deduced bad pixels
-thresholds_noise_hard = [0, 0] # For setting the same threshold noise for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.
+thresholds_noise_hard = [0, 0] # For setting the same threshold noise for the 3 gains. Left for backward compatibility. Default [0, 0] to take the following parameters.
 thresholds_noise_hard_hg = [4, 20] # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_mg = [4, 20] # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_lg = [4, 20] # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels

 thresholds_gain_sigma = 5.  # Gain separation sigma threshold
-max_trains = 550  # Maximum number of trains to use for processing dark. Set to 0 to process all available trains. 550 added for ~500GB nodes to temporarely avoid memory issues.
-min_trains = 1  # Miniumum number of trains for processing dark. If run folder has less than minimum trains, processing is stopped.
+max_trains = 550  # Maximum number of trains to use for processing dark. Set to 0 to process all available trains. 550 added for ~500GB nodes to temporarily avoid memory issues.
+min_trains = 1  # Minimum number of trains for processing dark. If run folder has less than minimum trains, processing is stopped.
 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. ~7mins extra time for 64 memory cells

 # This is used if modules is not specified:
 def find_modules(in_folder, run_high, modules):
    if (modules is not None) and modules != [-1]:
        return modules
    from pathlib import Path
    import re
    modules = set()
    for file in Path(in_folder, f'r{run_high:04d}').iterdir():
        m = re.search(r'-AGIPD(\d{2})-', file.name)
        if m:
            modules.add(int(m.group(1)))
    return sorted(modules)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import multiprocessing
 import os
 from collections import OrderedDict
 from datetime import timedelta
 from pathlib import Path
-from typing import List, Tuple
+from typing import Tuple

-import dateutil.parser
 import matplotlib
 import numpy as np
 import pasha as psh
 import tabulate
 import yaml
 from IPython.display import Latex, Markdown, display
 from extra_data import RunDirectory

 matplotlib.use('agg')

 import iCalibrationDB
 import matplotlib.pyplot as plt
-from cal_tools.agipdlib import AgipdCtrl
-from cal_tools.enums import AgipdGainMode, BadPixels
+from cal_tools.agipdlib import AgipdCtrlRuns
+from cal_tools.enums import BadPixels
 from cal_tools.plotting import (
    create_constant_overview,
    plot_badpix_3d,
    show_overview,
    show_processed_modules,
 )
 from cal_tools.tools import (
    get_dir_creation_date,
    get_from_db,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
-    map_gain_stages,
    module_index_to_qm,
    run_prop_seq_from_path,
    save_const_to_h5,
    send_to_db,
 )

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 # insert control device if format string (does nothing otherwise)
 ctrl_src = ctrl_source_template.format(karabo_id_control)

-runs_dict = OrderedDict()
 run_numbers = [run_high, run_med, run_low]

-for gain_idx, (run_name, run_number) in enumerate(zip(["high", "med", "low"], run_numbers)):
-    runs_dict[run_name] = {
-        "number": run_number,
-        "gain": gain_idx,
-        "dc": RunDirectory(f'{in_folder}/r{run_number:04d}/')
-    }
-
 creation_time=None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)

 print(f"Using {creation_time} as creation time of constant.")

 run, prop, seq = run_prop_seq_from_path(in_folder)

 # Read report path and create file location tuple to add with the injection
 file_loc = f"proposal:{prop} runs:{run_low} {run_med} {run_high}"

 report = get_report(metadata_folder)
 cal_db_interface = get_random_db_interface(cal_db_interface)
 print(f'Calibration database interface: {cal_db_interface}')

 instrument = karabo_id.split("_")[0]

 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 instrument_src = instrument_source_template.format(karabo_id, receiver_template)

 def create_karabo_da_list(modules):
    return(["AGIPD{:02d}".format(i) for i in modules])

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = create_karabo_da_list(modules)
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create out_folder if it doesn't exist.
 Path(out_folder).mkdir(parents=True, exist_ok=True)

 mod_image_size = []
-for run_dict in runs_dict.values():
+for run in run_numbers:
    missing_modules = []  # modules with no images within a run.
    n_trains_list = []   # list of the number of trains for each module within a run.
    # This is important in case of no slurm parallelization over modules is done.
    # (e.g. running notebook interactively)
    for m in modules:
        # validate that there are trains for the selected modules and run.
-        dc = run_dict["dc"].select(
+        dc = RunDirectory(f'{in_folder}/r{run:04d}/').select(
            instrument_src.format(m), "*", require_all=True)
        n_trains = len(dc.train_ids)

        if n_trains == 0:
-            print(f"WARNING: No images for module AGIPD{m:02d}, run {run_dict['number']}.")
+            print(f"WARNING: No images for module AGIPD{m:02d}, run {run}.")
            missing_modules.append(m)
        # Raise a warning if the module has less trains than expected.
        elif n_trains < min_trains:
-            print(f"WARNING: AGIPD{m:02d}, run {run_dict['number']} "
+            print(f"WARNING: AGIPD{m:02d}, run {run} "
                  f"has trains less than minimum trains: {min_trains}.")
        else:
            print(f"Processing {max_trains if max_trains < n_trains else n_trains} "
-                  f"for AGIPD{m:02d}, run {run_dict['number']} ")
+                  f"for AGIPD{m:02d}, run {run} ")

        n_trains_list.append(n_trains)
        mod_image_size.append(np.product(dc[instrument_src.format(m), "image.data"].shape) * 2  / 1e9)

    if max(n_trains_list) == 0:
-        raise ValueError(f"No images to process for run: {run_dict['number']}")
+        raise ValueError(f"No images to process for run: {run}")
    elif max(n_trains_list) < min_trains:
-        raise ValueError(f"{run_dict['number']} has less than minimum trains: {min_trains}")
+        raise ValueError(f"{run} has less than minimum trains: {min_trains}")

 # Update modules and karabo_da lists based on available modules to processes.
 modules = [m for m in modules if m not in missing_modules]
 karabo_da = create_karabo_da_list(modules)

 print(f"Will process data of ({sum(mod_image_size):.02f} GB).")
 ```

 %% Cell type:markdown id: tags:

 ## Read and validate the runs control data.

 %% Cell type:code id: tags:

 ``` python
-def read_run_conditions(runs_dict: dict):
-    agipd_cond = AgipdCtrl(
-        run_dc=runs_dict["dc"],
-        image_src=instrument_src_mod,
-        ctrl_src=ctrl_src,
-    )
-    cond_dict["runs"].append(runs_dict["number"])
-    if acq_rate == 0:
-        cond_dict["acq_rate"].append(agipd_cond.get_acq_rate())
-    if mem_cells == 0:
-        cond_dict["mem_cells"].append(agipd_cond.get_num_cells())
-    if gain_setting == -1:
-        cond_dict["gain_setting"].append(
-            agipd_cond.get_gain_setting(creation_time))
-    if bias_voltage == 0.:
-        cond_dict["bias_voltage"].append(
-            agipd_cond.get_bias_voltage(karabo_id_control))
-    if integration_time == -1:
-        cond_dict["integration_time"].append(
-            agipd_cond.get_integration_time())
-    if gain_mode == -1:
-        cond_dict["gain_mode"].append(agipd_cond.get_gain_mode())
-    else:
-        cond_dict["gain_mode"].append(AgipdGainMode(gain_mode))
-```
-
-%% Cell type:code id: tags:
-
-``` python
-def validate_gain_modes(gain_modes: List[AgipdGainMode]):
-    # Validate that gain modes are not a mix of adaptive and fixed gain.
-    if all(
-        gm == AgipdGainMode.ADAPTIVE_GAIN for gm in gain_modes
-    ):
-        fixed_gain_mode = False
-    # Some runs are adaptive by mistake.
-    elif any(
-        gm == AgipdGainMode.ADAPTIVE_GAIN for gm in gain_modes
-    ):
-        raise ValueError(
-            f"ERROR: Given runs {run_numbers}"
-            " have a mix of ADAPTIVE and FIXED gain modes: "
-            f"{gain_modes}."
-    )
-    elif list(gain_modes) == [
-        AgipdGainMode.FIXED_HIGH_GAIN,
-        AgipdGainMode.FIXED_MEDIUM_GAIN,
-        AgipdGainMode.FIXED_LOW_GAIN
-    ]:
-        fixed_gain_mode = True
-    else:
-        raise ValueError(
-        "ERROR: Wrong arrangment of given dark runs. "
-        f"Given runs' gain_modes are {gain_modes} for runs: {run_numbers}."
-    )
-    return fixed_gain_mode
-```
-
-%% Cell type:code id: tags:
-
-``` python
 # Read slow data from 1st channel only.
 # Read all modules in one notebook and validate the conditions across detectors?
 # Currently slurm jobs run per one module.

 # TODO: what if first module is not available. Maybe only channel 2 available
 instrument_src_mod = instrument_src.format(modules[0])

-cond_dict = dict()
-fixed_gain_mode = None
+agipd_ctrl_dark = AgipdCtrlRuns(
+    raw_folder=in_folder,
+    runs=run_numbers,
+    image_src=instrument_src_mod,
+    ctrl_src=ctrl_src,
+    sort_dark_runs_enabled=sort_runs
+)
+# Update run_numbers list in case it was sorted.
+run_numbers = agipd_ctrl_dark.runs
+if mem_cells == 0:
+    mem_cells = agipd_ctrl_dark.get_memory_cells()
+
+if acq_rate == 0:
+    acq_rate = agipd_ctrl_dark.get_acq_rate()
+
+if bias_voltage == 0:
+    bias_voltage = agipd_ctrl_dark.get_bias_voltage(karabo_id_control)
+
+fixed_gain_mode = False
+if gain_mode == -1:
+    gain_mode = agipd_ctrl_dark.gain_modes
+    fixed_gain_mode = agipd_ctrl_dark.fixed_gain_mode()
+
+if gain_setting == -1:
+    gain_setting = agipd_ctrl_dark.get_gain_setting()

-with multiprocessing.Manager() as manager:
-    cond_dict["runs"] = manager.list()
-    cond_dict["acq_rate"] = manager.list()
-    cond_dict["mem_cells"] = manager.list()
-    cond_dict["gain_setting"] = manager.list()
-    cond_dict["gain_mode"] = manager.list()
-    cond_dict["bias_voltage"] = manager.list()
-    cond_dict["integration_time"] = manager.list()
-
-    with multiprocessing.Pool(processes=len(modules)) as pool:
-        pool.starmap(read_run_conditions, zip(runs_dict.values()))
-
-    for cond, vlist in cond_dict.items():
-        if cond == "runs":
-            continue
-        elif cond == "gain_mode":
-            fixed_gain_mode = validate_gain_modes(cond_dict["gain_mode"])
-        elif not all(x == vlist[0] for x in vlist):
-            # TODO: raise ERROR??
-            print(
-                f"WARNING: {cond} is not the same for the runs "
-                f"{cond_dict['runs']} with values"
-                f" of {cond_dict[cond]}, respectively."
-            )
-    if cond_dict["acq_rate"]: acq_rate = cond_dict["acq_rate"][0]
-    if cond_dict["mem_cells"]: mem_cells = cond_dict["mem_cells"][0]
-    if cond_dict["gain_setting"]: gain_setting = cond_dict["gain_setting"][0]
-    if cond_dict["gain_mode"]: gain_mode = list(cond_dict["gain_mode"])
-    if cond_dict["bias_voltage"]: bias_voltage = cond_dict["bias_voltage"][0]
-    if cond_dict["integration_time"]: integration_time = cond_dict["integration_time"][0]
+if integration_time == -1:
+    integration_time = agipd_ctrl_dark.get_integration_time()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Determine the gain operation mode based on the gain_mode stored in control h5file.
 if operation_mode not in ("ADAPTIVE_GAIN", "FIXED_GAIN"):
    print(f"WARNING: unknown operation_mode \"{operation_mode}\" parameter set")

 if fixed_gain_mode and operation_mode == "ADAPTIVE_GAIN":
    print(
        "WARNING: Operation_mode parameter is ADAPTIVE_GAIN, but"
        "slow data indicates FIXED_GAIN. Processing fixed gain constants.")
 elif not fixed_gain_mode and operation_mode == "FIXED_GAIN":
    print(
        "WARNING: Operation_mode parameter is FIXED_GAIN, "
        "slow data indicates ADAPTIVE_GAIN. Processing adaptive gain constants.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Parameters are:")
 print(f"Proposal: {prop}")
 print(f"Acquisition rate: {acq_rate}")
 print(f"Memory cells: {mem_cells}")
 print(f"Runs: {run_numbers}")
 print(f"Interlaced mode: {interlaced}")
 print(f"Using DB: {db_output}")
 print(f"Input: {in_folder}")
 print(f"Output: {out_folder}")
 print(f"Bias voltage: {bias_voltage}V")
 print(f"Gain setting: {gain_setting}")
 print(f"Integration time: {integration_time}")
 print(f"Operation mode is {'fixed' if fixed_gain_mode else 'adaptive'} gain mode")
 ```

 %% Cell type:code id: tags:

 ``` python
 if thresholds_offset_hard != [0, 0]:
    # if set, this will override the individual parameters
    thresholds_offset_hard = [thresholds_offset_hard] * 3
 elif fixed_gain_mode:
    thresholds_offset_hard = [
        thresholds_offset_hard_hg_fixed,
        thresholds_offset_hard_mg_fixed,
        thresholds_offset_hard_lg_fixed,
    ]
 else:
    thresholds_offset_hard = [
        thresholds_offset_hard_hg,
        thresholds_offset_hard_mg,
        thresholds_offset_hard_lg,
    ]
 print("Will use the following hard offset thresholds")
 for name, value in zip(("High", "Medium", "Low"), thresholds_offset_hard):
    print(f"- {name} gain: {value}")

 if thresholds_noise_hard != [0, 0]:
    thresholds_noise_hard = [thresholds_noise_hard] * 3
 else:
    thresholds_noise_hard = [
        thresholds_noise_hard_hg,
        thresholds_noise_hard_mg,
        thresholds_noise_hard_lg,
    ]
 ```

 %% Cell type:markdown id: tags:

 ## Calculate Offsets, Noise and Thresholds ##

 The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array.

 %% Cell type:code id: tags:

 ``` python
 parallel_num_procs = min(6, len(modules)*3)
 parallel_num_threads = multiprocessing.cpu_count() // parallel_num_procs
 print(f"Will use {parallel_num_procs} processes with {parallel_num_threads} threads each")

 def characterize_module(
-    channel: int, runs_dict: dict,
+    channel: int, gain_run: Tuple[int, int],
 ) -> Tuple[int, int, np.array, np.array, np.array, np.array, np.array]:

+    gain_index, run = gain_run
    # Select the corresponding module channel.
    instrument_src_mod = instrument_src.format(channel)

-    run_dc = runs_dict["dc"].select(instrument_src_mod, require_all=True)
+    run_dc = RunDirectory(f'{in_folder}/r{run:04d}/').select(instrument_src_mod, require_all=True)
    if max_trains != 0:
        run_dc = run_dc.select_trains(np.s_[:max_trains])
-    gain_index = runs_dict["gain"]

    # Read module's image and cellId data.
    im = run_dc[instrument_src_mod, "image.data"].ndarray()
    cell_ids = np.squeeze(run_dc[instrument_src_mod, "image.cellId"].ndarray())

    local_thresholds_offset_hard = thresholds_offset_hard[gain_index]
    local_thresholds_noise_hard = thresholds_noise_hard[gain_index]

    if interlaced:
        if not fixed_gain_mode:
            ga = im[1::2, 0, ...]
        im = im[0::2, 0, ...].astype(np.float32)
        cell_ids = cell_ids[::2]
    else:
        if not fixed_gain_mode:
            ga = im[:, 1, ...]
        im = im[:, 0, ...].astype(np.float32)
    im = np.transpose(im)
    if not fixed_gain_mode:
        ga = np.transpose(ga)

    context = psh.context.ThreadContext(num_workers=parallel_num_threads)
    offset = context.alloc(shape=(im.shape[0], im.shape[1], mem_cells), dtype=np.float64)
    noise = context.alloc(like=offset)

    if fixed_gain_mode:
        gains = None
        gains_std = None
    else:
        gains = context.alloc(like=offset)
        gains_std = context.alloc(like=offset)

    def process_cell(worker_id, array_index, cell_number):
        cell_slice_index = (cell_ids == cell_number)
        im_slice = im[..., cell_slice_index]
        offset[..., cell_number] = np.median(im_slice, axis=2)
        noise[..., cell_number] = np.std(im_slice, axis=2)
        if not fixed_gain_mode:
            ga_slice = ga[..., cell_slice_index]
            gains[..., cell_number] = np.median(ga_slice, axis=2)
            gains_std[..., cell_number] = np.std(ga_slice, axis=2)
    unique_cell_ids = np.unique(cell_ids)

    # We assume cells are accepted starting 0.
    if np.any(unique_cell_ids > mem_cells):
        raise ValueError(
            f"Invalid cells found {unique_cell_ids} "
            f"for run: {run_dc.run_metadata()['runNumber']}.")

    context.map(process_cell, unique_cell_ids)

    # bad pixels
    bp = np.zeros_like(offset, dtype=np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0,1))
    offset_std = np.nanstd(offset, axis=(0,1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD
    bp[(offset < local_thresholds_offset_hard[0]) |
       (offset > local_thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0,1))
    noise_std = np.nanstd(noise, axis=(0,1))
    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD
    bp[(noise < local_thresholds_noise_hard[0]) | (noise > local_thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR

    return channel, gain_index, offset, noise, gains, gains_std, bp
 ```

 %% Cell type:code id: tags:

 ``` python
 with multiprocessing.Pool(processes=parallel_num_procs) as pool:
    results = pool.starmap(
-        characterize_module, itertools.product(modules, list(runs_dict.values())))
+        characterize_module, itertools.product(modules, list(enumerate(run_numbers))))

 # mapped values for processing 2 modules example:
-# [
-#     0, {"gain": 0, "run_number": <run-high>, "dc": <high-dc>},
-#     0, {"gain": 1, "run_number": <run-med>, "dc": <med-dc>},
-#     0, {"gain": 2, "run_number": <run-low>, "dc": <low-dc>},
-#     1, {"gain": 0, "run_number": <run-high>, "dc": <high-dc>},
-#     1, {"gain": 1, "run_number": <run-med>, "dc": <med-dc>},
-#     1, {"gain": 2, "run_number": <run-low>, "dc": <low-dc>},
+# [(0, (0, 9013))
+#     0, (0, run-high),
+#     0, (1, run-med),
+#     0, (2, run-low),
+#     1, (0, run-high),
+#     1, (1, run-med),
+#     1, (2, run-low),,
 # ]
 ```

 %% Cell type:code id: tags:

 ``` python
 offset_g = OrderedDict()
 noise_g = OrderedDict()
 badpix_g = OrderedDict()
 if not fixed_gain_mode:
    gain_g = OrderedDict()
    gainstd_g = OrderedDict()


 for module_index, gain_index, offset, noise, gains, gains_std, bp in results:
    qm = module_index_to_qm(module_index)
    if qm not in offset_g:
        offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2], 3))
        noise_g[qm] = np.zeros_like(offset_g[qm])
        badpix_g[qm] = np.zeros_like(offset_g[qm], np.uint32)
        if not fixed_gain_mode:
            gain_g[qm] = np.zeros_like(offset_g[qm])
            gainstd_g[qm] = np.zeros_like(offset_g[qm])

    offset_g[qm][..., gain_index] = offset
    noise_g[qm][..., gain_index] = noise
    badpix_g[qm][..., gain_index] = bp
    if not fixed_gain_mode:
        gain_g[qm][..., gain_index] = gains
        gainstd_g[qm][..., gain_index] = gains_std
 ```

 %% Cell type:code id: tags:

 ``` python
 # Add bad pixels due to bad gain separation
 if not fixed_gain_mode:
    for qm in gain_g.keys():
        for g in range(2):
            # Bad pixels during bad gain separation.
            # Fraction of pixels in the module with separation lower than "thresholds_gain_sigma".
            bad_sep = (gain_g[qm][..., g+1] - gain_g[qm][..., g]) / \
                np.sqrt(gainstd_g[qm][..., g+1]**2 + gainstd_g[qm][..., g]**2)
            badpix_g[qm][...,g+1][bad_sep<thresholds_gain_sigma] |= \
                BadPixels.GAIN_THRESHOLDING_ERROR
 ```

 %% Cell type:markdown id: tags:

 The thresholds for gain switching are then defined as the mean value between in individual gain bit levels. Note that these thresholds need to be refined with charge induced thresholds, as the two are not the same.

 %% Cell type:code id: tags:

 ``` python
 if not fixed_gain_mode:
    thresholds_g = {}
    for qm in gain_g.keys():
        thresholds_g[qm] = np.zeros((gain_g[qm].shape[0], gain_g[qm].shape[1], gain_g[qm].shape[2], 5))
        thresholds_g[qm][...,0] = (gain_g[qm][...,1]+gain_g[qm][...,0])/2
        thresholds_g[qm][...,1] = (gain_g[qm][...,2]+gain_g[qm][...,1])/2
        for i in range(3):
            thresholds_g[qm][...,2+i] = gain_g[qm][...,i]
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()
 for i in modules:
    qm = module_index_to_qm(i)
    res[qm] = {
        'Offset': offset_g[qm],
        'Noise': noise_g[qm],
        'BadPixelsDark': badpix_g[qm]
    }
    if not fixed_gain_mode:
        res[qm]['ThresholdsDark'] = thresholds_g[qm]
 ```

 %% Cell type:code id: tags:

 ``` python
 # set the operating condition
 # note: iCalibrationDB only adds gain_mode if it is truthy, so we don't need to handle None
 condition = iCalibrationDB.Conditions.Dark.AGIPD(
    memory_cells=mem_cells,
    bias_voltage=bias_voltage,
    acquisition_rate=acq_rate,
    gain_setting=gain_setting,
    gain_mode=fixed_gain_mode,
    integration_time=integration_time
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create mapping from module(s) (qm) to karabo_da(s) and PDU(s)
 qm_dict = OrderedDict()
 all_pdus = get_pdu_from_db(
    karabo_id,
    karabo_da,
    constant=iCalibrationDB.CalibrationConstant(),
    condition=condition,
    cal_db_interface=cal_db_interface,
    snapshot_at=creation_time.isoformat() if creation_time else None,
    timeout=cal_db_timeout
 )
 for module_index, module_da, module_pdu in zip(modules, karabo_da, all_pdus):
    qm = module_index_to_qm(module_index)
    qm_dict[qm] = {
        "karabo_da": module_da,
        "db_module": module_pdu
    }
 ```

 %% Cell type:markdown id: tags:

 ## Sending calibration constants to the database.

 %% Cell type:code id: tags:

 ``` python
 md = None

 for qm in res:
    db_module = qm_dict[qm]["db_module"]
    for const in res[qm]:
        dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()
        dconst.data = res[qm][const]

        if db_output:
            md = send_to_db(db_module, karabo_id, dconst, condition, file_loc,
                            report, cal_db_interface, creation_time=creation_time,
                            timeout=cal_db_timeout)

        if local_output:
            md = save_const_to_h5(db_module, karabo_id, dconst, condition, dconst.data,
                                  file_loc, report, creation_time, out_folder)
            print(f"Calibration constant {const} for {qm} is stored locally in {file_loc}.\n")

 print("Constants parameter conditions are:\n")
 print(f"• memory_cells: {mem_cells}\n• bias_voltage: {bias_voltage}\n"
      f"• acquisition_rate: {acq_rate}\n• gain_setting: {gain_setting}\n"
      f"• gain_mode: {fixed_gain_mode}\n• integration_time: {integration_time}\n"
      f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:markdown id: tags:

 ## Retrieving previous calibration constants for comparison.

 %% Cell type:code id: tags:

 ``` python
 # Start retrieving existing constants for comparison
 qm_x_const = [(qm, const) for const in res[qm] for qm in res]


 def retrieve_old_constant(qm, const):
    dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()

    data, mdata = get_from_db(
        karabo_id=karabo_id,
        karabo_da=qm_dict[qm]["karabo_da"],
        constant=dconst,
        condition=condition,
        empty_constant=None,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time-timedelta(seconds=1) if creation_time else None,
        strategy="pdu_prior_in_time",
        verbosity=1,
        timeout=cal_db_timeout
    )

    if mdata is None or data is None:
        timestamp = "Not found"
        filepath = None
        h5path = None
    else:
        timestamp = mdata.calibration_constant_version.begin_at.isoformat()
        filepath = os.path.join(
            mdata.calibration_constant_version.hdf5path,
            mdata.calibration_constant_version.filename
        )
        h5path = mdata.calibration_constant_version.h5path

    return data, timestamp, filepath, h5path


 old_retrieval_pool = multiprocessing.Pool()
 old_retrieval_res = old_retrieval_pool.starmap_async(
    retrieve_old_constant, qm_x_const
 )
 old_retrieval_pool.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 mnames=[]
 for i in modules:
    qm = module_index_to_qm(i)
    mnames.append(qm)
    display(Markdown(f'## Position of the module {qm} and its ASICs'))
 show_processed_modules(dinstance, constants=None, mnames=mnames, mode="position")
 ```

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:markdown id: tags:

 ### High Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 0
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))
 ```

 %% Cell type:markdown id: tags:

 ### Medium Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 1
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))
 ```

 %% Cell type:markdown id: tags:

 ### Low Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 2
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))
 ```

 %% Cell type:code id: tags:

 ``` python
 if high_res_badpix_3d:
    cols = {
        BadPixels.NOISE_OUT_OF_THRESHOLD: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.GAIN_THRESHOLDING_ERROR: (BadPixels.GAIN_THRESHOLDING_ERROR.name, '#FF40FF40'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD | BadPixels.NOISE_OUT_OF_THRESHOLD: ('OFFSET_OUT_OF_THRESHOLD + NOISE_OUT_OF_THRESHOLD', '#DD00DD80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD | BadPixels.NOISE_OUT_OF_THRESHOLD |
        BadPixels.GAIN_THRESHOLDING_ERROR: ('MIXED', '#BFDF009F')
    }

    display(Markdown("""

    ## Global Bad Pixel Behaviour ##

    The following plots show the results of bad pixel evaluation for all evaluated memory cells.
    Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2.
    This excludes single bad pixels present only in disconnected pixels.
    Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
    Colors encode the bad pixel type, or mixed type.

    """))

    gnames = ['High Gain', 'Medium Gain', 'Low Gain']
    for gain in range(3):
        display(Markdown(f'### {gnames[gain]} ###'))
        for mod, data in badpix_g.items():
            plot_badpix_3d(data[...,gain], cols, title=mod, rebin_fac=1)
            plt.show()
 ```

 %% Cell type:markdown id: tags:


 ## Aggregate values, and per Cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior.

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(offset_g, "Offset (ADU)", mem_cells, 4000, 8000,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(noise_g, "Noise (ADU)", mem_cells, 0, 100,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 if not fixed_gain_mode:
    # Plot only three gain threshold maps.
    bp_thresh = OrderedDict()
    for mod, con in badpix_g.items():
        bp_thresh[mod] = np.zeros((con.shape[0], con.shape[1], con.shape[2], 5), dtype=con.dtype)
        bp_thresh[mod][...,:2] = con[...,:2]
        bp_thresh[mod][...,2:] = con

    create_constant_overview(thresholds_g, "Threshold (ADU)", mem_cells, 4000, 10000, 5,
                             badpixels=[bp_thresh, np.nan],
                             gmap=['HG-MG Threshold', 'MG-LG Threshold', 'High gain', 'Medium gain', 'low gain'],
                             marker=['d','d','','','']
                             )
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixel_aggregate_g = OrderedDict()
 for m, d in badpix_g.items():
    bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)
 create_constant_overview(bad_pixel_aggregate_g, "Bad pixel fraction", mem_cells, 0, 0.10, 3)
 ```

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 # now we need the old constants
 old_const = {}
 old_mdata = {}
 old_retrieval_res.wait()

 for (qm, const), (data, timestamp, filepath, h5path) in zip(qm_x_const, old_retrieval_res.get()):
    old_const.setdefault(qm, {})[const] = data
    old_mdata.setdefault(qm, {})[const] = {
        "timestamp": timestamp,
        "filepath": filepath,
        "h5path": h5path
    }
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown("The following pre-existing constants are used for comparison:"))
 for qm, consts in old_mdata.items():
    display(Markdown(f"- {qm}"))
    for const in consts:
        display(Markdown(f"    - {const} at {consts[const]['timestamp']}"))
    # saving locations of old constants for summary notebook
    with open(f"{out_folder}/module_metadata_{qm}.yml", "w") as fd:
        yaml.safe_dump(
            {
                "module": qm,
                "pdu": qm_dict[qm]["db_module"],
                "old-constants": old_mdata[qm]
            },
            fd,
        )
 ```

 %% Cell type:code id: tags:

 ``` python
 table = []
 gain_names = ['High', 'Medium', 'Low']
 bits = [BadPixels.NOISE_OUT_OF_THRESHOLD, BadPixels.OFFSET_OUT_OF_THRESHOLD, BadPixels.OFFSET_NOISE_EVAL_ERROR, BadPixels.GAIN_THRESHOLDING_ERROR]
 for qm in badpix_g.keys():
    for gain in range(3):
        l_data = []
        l_data_old = []

        data = np.copy(badpix_g[qm][:,:,:,gain])
        datau32 = data.astype(np.uint32)
        l_data.append(len(datau32[datau32>0].flatten()))
        for bit in bits:
            l_data.append(np.count_nonzero(badpix_g[qm][:,:,:,gain] & bit))

        if old_const[qm]['BadPixelsDark'] is not None:
            dataold = np.copy(old_const[qm]['BadPixelsDark'][:, :, :, gain])
            datau32old = dataold.astype(np.uint32)
            l_data_old.append(len(datau32old[datau32old>0].flatten()))
            for bit in bits:
                l_data_old.append(np.count_nonzero(old_const[qm]['BadPixelsDark'][:, :, :, gain] & bit))

        l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',
                       'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR', 'GAIN_THRESHOLDING_ERROR']

        l_threshold = ['', f'{thresholds_noise_sigma}' f'{thresholds_noise_hard[gain]}',
                       f'{thresholds_offset_sigma}' f'{thresholds_offset_hard[gain]}',
                       '', f'{thresholds_gain_sigma}']

        for i in range(len(l_data)):
            line = [f'{l_data_name[i]}, {gain_names[gain]} gain', l_threshold[i], l_data[i]]

            if old_const[qm]['BadPixelsDark'] is not None:
                line += [l_data_old[i]]
            else:
                line += ['-']

            table.append(line)
        table.append(['', '', '', ''])

 display(Markdown('''
 ### Number of bad pixels

 One pixel can be bad for different reasons, therefore, the sum of all types of bad pixels can be more than the number of all bad pixels.

 '''))
 if len(table)>0:
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                         headers=["Pixel type", "Threshold",
                                                  "New constant", "Old constant"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 if fixed_gain_mode:
    constants = ['Offset', 'Noise']
 else:
    constants = ['Offset', 'Noise', 'ThresholdsDark']

 constants_x_qms = list(itertools.product(constants, res.keys()))


 def compute_table(const, qm):
    if const == 'ThresholdsDark':
        table = [['','HG-MG threshold', 'HG-MG threshold', 'MG-LG threshold', 'MG-LG threshold']]
    else:
        table = [['','High gain', 'High gain', 'Medium gain', 'Medium gain', 'Low gain', 'Low gain']]

    compare_with_old_constant = old_const[qm][const] is not None and \
        old_const[qm]['BadPixelsDark'] is not None

    data = np.copy(res[qm][const])

    if const == 'ThresholdsDark':
        data[...,0][res[qm]['BadPixelsDark'][...,0]>0] = np.nan
        data[...,1][res[qm]['BadPixelsDark'][...,1]>0] = np.nan
    else:
        data[res[qm]['BadPixelsDark']>0] = np.nan

    if compare_with_old_constant:
        data_old = np.copy(old_const[qm][const])
        if const == 'ThresholdsDark':
            data_old[...,0][old_const[qm]['BadPixelsDark'][...,0]>0] = np.nan
            data_old[...,1][old_const[qm]['BadPixelsDark'][...,1]>0] = np.nan
        else:
            data_old[old_const[qm]['BadPixelsDark']>0] = np.nan

    f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
    n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

    def compute_row(i):
        line = [n_list[i]]
        for gain in range(3):
            # Compare only 3 threshold gain-maps
            if gain == 2 and const == 'ThresholdsDark':
                continue
            stat_measure = f_list[i](data[...,gain])
            line.append(f"{stat_measure:6.1f}")
            if compare_with_old_constant:
                old_stat_measure = f_list[i](data_old[...,gain])
                line.append(f"{old_stat_measure:6.1f}")
            else:
                line.append("-")
        return line


    with multiprocessing.pool.ThreadPool(processes=multiprocessing.cpu_count() // len(constants_x_qms)) as pool:
        rows = pool.map(compute_row, range(len(f_list)))

    table.extend(rows)

    return table


 with multiprocessing.Pool(processes=len(constants_x_qms)) as pool:
    tables = pool.starmap(compute_table, constants_x_qms)

 for (const, qm), table in zip(constants_x_qms, tables):
    display(Markdown(f"### {qm}: {const} [ADU], good pixels only"))
    display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
 ```

--- a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Jungfrau Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the Jungfrau Detector

 %% Cell type:code id: tags:

 ``` python
-in_folder = "/gpfs/exfel/exp/SPB/202130/p900204/raw"  # the folder to read data from, required
-out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/remove"  # the folder to output to, required
-run = 91  # run to process, required
+in_folder = "/gpfs/exfel/exp/FXE/202301/p003279/raw"  # the folder to read data from, required
+out_folder =  "/gpfs/exfel/data/scratch/kluyvert/jf-corr-p3279-r275"  # the folder to output to, required
+run = 275  # run to process, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1]  # sequences to correct, set to [-1] for all, range allowed
 sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel

 # Parameters used to access raw data.
-karabo_id = "SPB_IRDA_JF4M"  # karabo prefix of Jungfrau devices
-karabo_da = ['JNGFR01', 'JNGFR02', 'JNGFR03', 'JNGFR04', 'JNGFR05', 'JNGFR06', 'JNGFR07', 'JNGFR08']  # data aggregators
+karabo_id = "FXE_XAD_JF500K"  # karabo prefix of Jungfrau devices
+karabo_da = ['JNGFR03']  # data aggregators
 receiver_template = "JNGFR{:02d}"  # Detector receiver template for accessing raw data files. e.g. "JNGFR{:02d}"
 instrument_source_template = '{}/DET/{}:daqOutput'  # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = '{}/DET/CONTROL'  # template for control source name (filled with karabo_id_control)
 karabo_id_control = ""  # if control is on a different ID, set to empty string if it is the same a karabo-id

 # Parameters for calibration database.
 cal_db_interface = "tcp://max-exfl-cal001:8017#8025" # the database interface to use
 cal_db_timeout = 180000  # timeout on caldb requests
 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"

 # Parameters affecting corrected data.
 relative_gain = True  # do relative gain correction.
 strixel_sensor = False  # reordering for strixel detector layout.
 strixel_double_norm = 2.0  # normalization to use for double-size pixels, only applied for strixel sensors.
 limit_trains = 0  # ONLY FOR TESTING. process only first N trains, Use 0 to process all.
 chunks_ids = 32  # HDF chunk size for memoryCell and frameNumber.
 chunks_data = 1  # HDF chunk size for pixel data in number of frames.

 # Parameters for retrieving calibration constants
 manual_slow_data = False  # if true, use manually entered bias_voltage, integration_time, gain_setting, and gain_mode values
 integration_time = 4.96  # integration time in us, will be overwritten by value in file
 gain_setting = 0  # 0 for dynamic gain, 1 for dynamic HG0, will be overwritten by value in file
 gain_mode = 0  # 0 for runs with dynamic gain setting, 1 for fixgain. It will be overwritten by value in file, if manual_slow_data is set to True.
 mem_cells = -1  # Set mem_cells to -1 to automatically use the value stored in RAW data.
 bias_voltage = 180  # will be overwritten by value in file

 # Parameters for plotting
 skip_plots = False  # exit after writing corrected files
 plot_trains = 500  # Number of trains to plot for RAW and CORRECTED plots. Set to -1 to automatically plot all trains.
 cell_id_preview = 15  # cell Id used for preview in single-shot plots

 # Parameters for ROI selection and reduction
 roi_definitions = [-1]  # List with groups of 6 values defining ROIs, e.g. [3, 120, 180, 200, 550, -2] for module 3 (JNGFR03), slice 120:180, 200:550, average along axis -2 (slow scan, or -1 for fast scan)
+roi_threshold = 2.  # Corrected pixels below the threshold will be excluded from ROI projections. Set to -1 to include all pixels.

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import fnmatch
 import multiprocessing
 import sys
 import warnings
 from logging import warning
 from pathlib import Path

 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
 import pasha as psh
 import tabulate
 from IPython.display import Latex, Markdown, display
 from extra_data import DataCollection, H5File, RunDirectory, by_id, components
 from matplotlib.colors import LogNorm

 import cal_tools.restful_config as rest_cfg
 from cal_tools.calcat_interface import JUNGFRAU_CalibrationData
 from cal_tools.jungfraulib import JungfrauCtrl
 from cal_tools.enums import BadPixels
 from cal_tools.jungfraulib import JungfrauCtrl
 from cal_tools.plotting import init_jungfrau_geom
 from cal_tools.files import DataFile
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    calcat_creation_time,
    map_seq_files,
    write_constants_fragment,
 )

 warnings.filterwarnings('ignore')

 matplotlib.use('agg')
 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_folder = in_folder / f'r{run:04d}'
 run_dc = RunDirectory(run_folder)
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)

 out_folder.mkdir(parents=True, exist_ok=True)

 print(f"Run is: {run}")
 print(f"Instrument H5File source: {instrument_src}")
 karabo_da = sorted(karabo_da)
 print(f"Process modules: {karabo_da}")

 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f"Creation time: {creation_time}")

 if karabo_id_control == "":
    karabo_id_control = karabo_id

 if any(axis_no not in {-2, -1, 2, 3} for axis_no in roi_definitions[5::6]):
    print("ROI averaging must be on axis 2/3 (or equivalently -2/-1). "
          f"Axis numbers given: {roi_definitions[5::6]}")
    sys.exit(1)
 ```

 %% Cell type:code id: tags:

 ``` python
 ctrl_src = ctrl_source_template.format(karabo_id_control)
 ctrl_data = JungfrauCtrl(run_dc, ctrl_src)

 if mem_cells < 0:
    memory_cells, sc_start = ctrl_data.get_memory_cells()

    mem_cells_name = "single cell" if memory_cells == 1 else "burst"
    print(f"Run is in {mem_cells_name} mode.\nStorage cell start: {sc_start:02d}")
 else:
    memory_cells = mem_cells
    mem_cells_name = "single cell" if memory_cells == 1 else "burst"
    print(f"Run is in manually set to {mem_cells_name} mode. With {memory_cells} memory cells")

 if not manual_slow_data:
    integration_time = ctrl_data.get_integration_time()
    bias_voltage = ctrl_data.get_bias_voltage()
    gain_setting = ctrl_data.get_gain_setting()
    gain_mode = ctrl_data.get_gain_mode()

 print(f"Integration time is {integration_time} us")
 print(f"Gain setting is {gain_setting} (run settings: {ctrl_data.run_settings})")
 print(f"Gain mode is {gain_mode} ({ctrl_data.run_mode})")
 print(f"Bias voltage is {bias_voltage} V")
 print(f"Number of memory cells are {memory_cells}")
 ```

 %% Cell type:markdown id: tags:

 ### Retrieving calibration constants

 %% Cell type:code id: tags:

 ``` python
 jf_cal = JUNGFRAU_CalibrationData(
    detector_name=karabo_id,
    sensor_bias_voltage=bias_voltage,
    event_at=creation_time,
    modules=karabo_da,
    memory_cells=memory_cells,
    integration_time=integration_time,
    gain_setting=gain_setting,
    gain_mode=gain_mode,
    client=rest_cfg.calibration_client(),
 )

 da_to_pdu = {}
 for mod_info in jf_cal.physical_detector_units.values():
    da_to_pdu[mod_info["karabo_da"]] = mod_info["physical_name"]

 constant_names = ["Offset10Hz", "BadPixelsDark10Hz"]
 if relative_gain:
    constant_names += ["BadPixelsFF10Hz", "RelativeGain10Hz"]

 jf_metadata = jf_cal.metadata(calibrations=constant_names)
 # Display retrieved calibration constants timestamps
 jf_cal.display_markdown_retrieved_constants(metadata=jf_metadata)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Validate the constants availability and raise/warn correspondingly.
 for mod in karabo_da[:]:
    calibrations = jf_metadata.get(mod, {})

    missing_dark_constants = {"Offset10Hz", "BadPixelsDark10Hz"} - set(calibrations)
    missing_gain_constants = {"BadPixelsFF10Hz", "RelativeGain10Hz"} - set(calibrations)

    if missing_dark_constants:
        warning(
            f"Dark constants {missing_dark_constants} are not available to correct {mod}."
            f" Module {mod} won't be corrected.")
        karabo_da.remove(mod)

    if relative_gain and missing_gain_constants:
        warning(f"Gain constants {missing_gain_constants} were not retrieved for {mod}."
                " No Relative gain correction for this module")
 if not karabo_da:  # Dark constants are missing for all modules.
    raise ValueError("Dark constants are missing for all modules.")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Record constant details in YAML metadata
 write_constants_fragment(
    out_folder=(metadata_folder or out_folder),
    det_metadata=jf_metadata,
    caldb_root=jf_cal.caldb_root)


 # load constants arrays after storing fragment YAML file
 # and validating constants availability.
 const_data = jf_cal.ndarray_map(metadata=jf_metadata)
 ```

 %% Cell type:code id: tags:

 ``` python
 def prepare_constants(module: str):
    """Prepare constant arrays.

    :param module: The module name (karabo_da)
    :return:
        offset_map (offset map),
        mask (mask of bad pixels),
        gain_map (map of relative gain factors),
        module (name of module),
    """
    constant_arrays = const_data[module]
    offset_map = constant_arrays["Offset10Hz"]
    mask = constant_arrays["BadPixelsDark10Hz"]

    gain_map = constant_arrays.get("RelativeGain10Hz")
    mask_ff = constant_arrays.get("BadPixelsFF10Hz")

    # Combine masks
    if mask_ff is not None:
        mask |= np.moveaxis(mask_ff, 0, 1)

    if memory_cells > 1:
        # move from x, y, cell, gain to cell, x, y, gain
        offset_map = np.moveaxis(offset_map, [0, 1], [1, 2])
        mask = np.moveaxis(mask, [0, 1], [1, 2])
    else:
        offset_map = np.squeeze(offset_map)
        mask = np.squeeze(mask)

    # masking double size pixels
    mask[..., [255, 256], :, :] |= BadPixels.NON_STANDARD_SIZE
    mask[..., [255, 256, 511, 512, 767, 768], :] |= BadPixels.NON_STANDARD_SIZE

    if gain_map is not None:
        if memory_cells > 1:
            gain_map = np.moveaxis(gain_map, [0, 2], [2, 0])
            # add extra empty cell constant
            b = np.ones(((1,)+gain_map.shape[1:]))
            gain_map = np.concatenate((gain_map, b), axis=0)
        else:
            gain_map = np.moveaxis(np.squeeze(gain_map), 1, 0)

    return offset_map, mask, gain_map, module

 with multiprocessing.Pool() as pool:
    r = pool.map(prepare_constants, karabo_da)

 # Print timestamps for the retrieved constants.
 constants = {}
 for offset_map, mask, gain_map, k_da in r:

    constants[k_da] = (offset_map, mask, gain_map)

 const_data.clear()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read available sequence files to correct.
 mapped_files, num_seq_files = map_seq_files(
    run_folder, karabo_da, sequences)

 if not len(mapped_files):
    raise IndexError(
        "No sequence files available to correct for the selected sequences and karabo_da.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Processing a total of {num_seq_files} sequence files")
 table = []
 fi = 0
 for kda, sfiles in mapped_files.items():
    for k, f in enumerate(sfiles):
        if k == 0:
            table.append((fi, kda, k, f))
        else:
            table.append((fi, "", k,  f))
        fi += 1
 md = display(Latex(tabulate.tabulate(
    table, tablefmt='latex',
    headers=["#", "module", "# module", "file"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 if strixel_sensor:
    from cal_tools.jfstrixel import STRIXEL_SHAPE as strixel_frame_shape, double_pixel_indices, to_strixel
    Ydouble, Xdouble = double_pixel_indices()
    print('Strixel sensor transformation enabled')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Correct a chunk of images for offset and gain
 def correct_train(wid, index, d):
    d = d.astype(np.float32)  # [cells, x, y]
    g = gain[index]

    # Copy gain over first to keep it at the original 3 for low gain.
    if strixel_sensor:
        to_strixel(g, out=gain_corr[index, ...])
    else:
        gain_corr[index, ...] = g

    # Jungfrau gains 0[00], 1[01], 3[11]
    # Change low gain to 2 for indexing purposes.
    g[g==3] = 2

    # Select memory cells
    if memory_cells > 1:
        """
        Even though it is correct to assume that memory cells pattern
        can be the same across all trains (for one correction run
        taken with one acquisition), it is preferred to not assume
        this to account for exceptions that can happen.
        """
        m = memcells[index].copy()
        # 255 is a cell value pointing to no cell image data (image of 0 pixels).
        # Corresponding image will be corrected with constant of cell 0. To avoid values of 0.
        # This line is depending on not storing the modified memory cells in the corrected data.
        m[m==255] = 0

        offset_map_cell = offset_map[m, ...]  # [16 + empty cell, x, y]
        mask_cell = mask[m, ...]
    else:
        offset_map_cell = offset_map
        mask_cell = mask

    # Offset correction
    offset = np.choose(g, np.moveaxis(offset_map_cell, -1, 0))

    d -= offset

    # Gain correction
    if relative_gain and gain_map is not None:
        if memory_cells > 1:
            gain_map_cell = gain_map[m, ...]
        else:
            gain_map_cell = gain_map
        cal = np.choose(g, np.moveaxis(gain_map_cell, -1, 0))
        d /= cal

    msk = np.choose(g, np.moveaxis(mask_cell, -1, 0))

    if strixel_sensor:
        to_strixel(d, out=data_corr[index, ...])
        data_corr[index, :, Ydouble, Xdouble] /= strixel_double_norm
        to_strixel(msk, out=mask_corr[index, ...])
    else:
        data_corr[index, ...] = d
        mask_corr[index, ...] = msk
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()

 n_cpus = multiprocessing.cpu_count()
 context = psh.context.ProcessContext(num_workers=n_cpus)
 print(f"Using {n_cpus} workers for correction.")
 ```

 %% Cell type:code id: tags:

 ``` python
 def save_reduced_rois(ofile, data_corr, mask_corr, karabo_da):
    """If ROIs are defined for this karabo_da, reduce them and save to the output file"""
    rois_defined = 0
    module_no = int(karabo_da[-2:])
    params_source = f'{karabo_id}/ROIPROC/{karabo_da}'
    rois_source = f'{params_source}:output'
    if roi_definitions != [-1]:
        # Create Instrument and Control sections to later add datasets.
        outp_source = ofile.create_instrument_source(rois_source)
        ctrl_source = ofile.create_control_source(params_source)
    for i in range(len(roi_definitions) // 6):
        roi_module, a1, a2, b1, b2, mean_axis = roi_definitions[i*6 : (i+1)*6]
        if roi_module == module_no:
            rois_defined += 1
+            # Set pixels below the threshold to 0 (but still used in the averaging)
+            roi_data = data_corr[..., a1:a2, b1:b2]
+            if roi_threshold > -1:
+                roi_data = roi_data * (roi_data > roi_threshold)
            # Apply the mask and average remaining pixels to 1D
-            roi_data = data_corr[..., a1:a2, b1:b2].mean(
+            roi_data = roi_data.mean(
                axis=mean_axis, where=(mask_corr[..., a1:a2, b1:b2] == 0)
            )

            # Add roi corrected datasets
            outp_source.create_key(f'data.roi{rois_defined}.data', data=roi_data)

            # Add roi run control datasets.
            ctrl_source.create_run_key(f'roi{rois_defined}.region', np.array([[a1, a2, b1, b2]]))
            ctrl_source.create_run_key(f'roi{rois_defined}.reduce_axis', np.array([mean_axis]))
+            ctrl_source.create_run_key(f'roi{rois_defined}.threshold', np.array([roi_threshold], dtype=np.float32))

    if rois_defined:
        # Copy the index for the new source
        # Create count/first datasets at INDEX source.
        ofile.copy(f'INDEX/{karabo_id}/DET/{karabo_da}:daqOutput/data',
                   f'INDEX/{rois_source}/data')
        ntrains = ofile['INDEX/trainId'].shape[0]
        ctrl_source.create_index(ntrains)
 ```

 %% Cell type:markdown id: tags:

 ### Correcting RAW data ###

 %% Cell type:code id: tags:

 ``` python
 # Loop over modules
 empty_seq = 0
 corrected_files = []
 for local_karabo_da, mapped_files_module in mapped_files.items():
    instrument_src_kda = instrument_src.format(int(local_karabo_da[-2:]))

    for sequence_file in mapped_files_module:
        # Save corrected data in an output file with name
        # of corresponding raw sequence file.
        ofile_name = sequence_file.name.replace("RAW", "CORR")
        out_file = out_folder / ofile_name
        corrected_files.append(ofile_name)

        # Load sequence file data collection, data.adc keydata,
        # the shape for data to later created arrays of the same shape,
        # and number of available trains to correct.
        seq_dc = H5File(sequence_file)
        seq_dc_adc = seq_dc[instrument_src_kda, "data.adc"]
        ishape = seq_dc_adc.shape  # input shape.
        corr_ntrains = ishape[0]  # number of available trains to correct.
        all_train_ids = seq_dc_adc.train_ids

        # Raise a WARNING if this sequence has no trains to correct.
        # Otherwise, print number of trains with no data.
        if corr_ntrains == 0:
            warning(f"No trains to correct for {sequence_file.name}: "
                 "Skipping the processing of this file.")
            empty_seq += 1
            continue
        elif len(all_train_ids) != corr_ntrains:
            print(f"{sequence_file.name} has {len(seq_dc_adc.train_ids) - corr_ntrains} "
                  "trains with missing data.")

        # For testing, limit corrected trains. i.e. Getting output faster.
        if limit_trains > 0:
            print(f"\nCorrected trains are limited to: {limit_trains} trains")
            corr_ntrains = min(corr_ntrains, limit_trains)

        print(f"\nNumber of corrected trains are: {corr_ntrains} for {ofile_name}")

        # Load constants from the constants dictionary.
        # These arrays are used by `correct_train()` function
        offset_map, mask, gain_map = constants[local_karabo_da]

        # Determine total output shape.
        if strixel_sensor:
            oshape = (*ishape[:-2], *strixel_frame_shape)
        else:
            oshape = ishape

        # Allocate shared arrays for corrected data. Used in `correct_train()`
        data_corr = context.alloc(shape=oshape, dtype=np.float32)
        gain_corr = context.alloc(shape=oshape, dtype=np.uint8)
        mask_corr = context.alloc(shape=oshape, dtype=np.uint32)

        step_timer.start()
        # Overwrite seq_dc after eliminating empty trains or/and applying limited images.
        seq_dc = seq_dc.select(
            instrument_src_kda, "*", require_all=True).select_trains(np.s_[:corr_ntrains])

        # Load raw images(adc), gain, memcells, and frame numbers.
        data = seq_dc[instrument_src_kda, "data.adc"].ndarray()
        gain = seq_dc[instrument_src_kda, "data.gain"].ndarray()
        memcells = seq_dc[instrument_src_kda, "data.memoryCell"].ndarray()
        frame_number = seq_dc[instrument_src_kda, "data.frameNumber"].ndarray()

        # Validate that the selected cell id to preview is available in raw data.
        if memory_cells > 1:
            # For plotting, assuming that memory cells are sorted the same for all trains.
            found_cells = memcells[0] == cell_id_preview
            if any(found_cells):
                cell_idx_preview = np.where(found_cells)[0][0]
            else:
                print(f"The selected cell_id_preview {cell_id_preview} is not available in burst mode. "
                      f"Previewing cell `{memcells[0]}`.")
                cell_idx_preview = 0
        else:
            cell_idx_preview = 0

        # Correct data per train
        context.map(correct_train, data)
        step_timer.done_step(f"Correction time.")

        step_timer.start()

        # Create CORR files and add corrected data sections.
        image_counts = seq_dc[instrument_src_kda, "data.adc"].data_counts(labelled=False)

        with DataFile(out_file, 'w') as outp_file:
            # Create INDEX datasets.
            outp_file.create_index(seq_dc.train_ids, from_file=seq_dc.files[0])

            # Create Instrument section to later add corrected datasets.
            outp_source = outp_file.create_instrument_source(instrument_src_kda)

            # Create count/first datasets at INDEX source.
            outp_source.create_index(data=image_counts)

            # RAW memoryCell and frameNumber are not corrected. But we are storing only
            # the values for the corrected trains.
            outp_source.create_key(
                "data.memoryCell", data=memcells,
                chunks=(min(chunks_ids, memcells.shape[0]), 1))
            outp_source.create_key(
                "data.frameNumber", data=frame_number,
                chunks=(min(chunks_ids, frame_number.shape[0]), 1))
            # Add main corrected `data.adc`` dataset and store corrected data.
            outp_source.create_key(
                "data.adc", data=data_corr,
                chunks=(min(chunks_data, data_corr.shape[0]), *oshape[1:]))
            outp_source.create_compressed_key(
                "data.gain", data=gain_corr)
            outp_source.create_compressed_key(
                "data.mask", data=mask_corr)

            # Temporary hotfix for FXE assuming this dataset is in corrected files.
            outp_source.create_key(
                "data.trainId", data=seq_dc.train_ids,
                chunks=(min(50, len(seq_dc.train_ids))))

            save_reduced_rois(outp_file, data_corr, mask_corr, local_karabo_da)

            # Create METDATA datasets
            outp_file.create_metadata(
                like=seq_dc,
                sequence=seq_dc.run_metadata()["sequenceNumber"],
            )

        step_timer.done_step(f'Saving data time.')
 if empty_seq == sum([len(i) for i in mapped_files.values()]):
    warning("No valid trains for RAW data to correct.")
    sys.exit(0)
 ```

 %% Cell type:markdown id: tags:

 ### Processing time summary ###

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print('Skipping plots')
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 _, geom = init_jungfrau_geom(karabo_id=karabo_id, karabo_da=karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 first_seq = 0 if sequences == [-1] else sequences[0]

 corrected_files = [
    out_folder / f for f in fnmatch.filter(corrected_files, f"*{run}*S{first_seq:05d}*")
 ]
 with DataCollection.from_paths(corrected_files) as corr_dc:
    # Reading CORR data for plotting.
    jf_corr = components.JUNGFRAU(
        corr_dc,
        detector_name=karabo_id,
    ).select_trains(np.s_[:plot_trains])
    tid, jf_corr_data = next(iter(jf_corr.trains(require_all=True)))

 # Shape = [modules, trains, cells, x, y]
 # TODO: Fix the case if not all modules were requested to be corrected.
 # For example if only one modules was corrected. An assertion error is expected
 # at `geom.plot_data_fast`, while plotting corrected images.
 corrected = jf_corr.get_array("data.adc")[:, :, cell_idx_preview, ...].values
 corrected_train = jf_corr_data["data.adc"][
    :, cell_idx_preview, ...
 ].values  # loose the train axis.

 mask = jf_corr.get_array("data.mask")[:, :, cell_idx_preview, ...].values
 mask_train = jf_corr_data["data.mask"][:, cell_idx_preview, ...].values

 with RunDirectory(f"{in_folder}/r{run:04d}/", f"*S{first_seq:05d}*", _use_voview=False) as raw_dc:

    # Reading RAW data for plotting.
    jf_raw = components.JUNGFRAU(raw_dc, detector_name=karabo_id).select_trains(
            np.s_[:plot_trains]
    )

 raw = jf_raw.get_array("data.adc")[:, :, cell_idx_preview, ...].values
 raw_train = (
    jf_raw.select_trains(by_id[[tid]])
    .get_array("data.adc")[:, 0, cell_idx_preview, ...]
    .values
 )

 gain = jf_raw.get_array("data.gain")[:, :, cell_idx_preview, ...].values
 gain_train_cells = (
    jf_raw.select_trains(by_id[[tid]]).get_array("data.gain")[:, :, :, ...].values
 )
 ```

 %% Cell type:markdown id: tags:

 ### Mean RAW Preview

 %% Cell type:code id: tags:

 ``` python
 print(f"The per pixel mean of the first {raw.shape[1]} trains of the first sequence file")

 fig, ax = plt.subplots(figsize=(18, 10))
 raw_mean = np.mean(raw, axis=1)
 geom.plot_data_fast(
    raw_mean,
    ax=ax,
    vmin=min(0.75*np.median(raw_mean[raw_mean > 0]), 2000),
    vmax=max(1.5*np.median(raw_mean[raw_mean > 0]), 16000),
    cmap="jet",
    colorbar={'shrink': 1, 'pad': 0.01},
 )
 ax.set_title(f'{karabo_id} - Mean RAW', size=18)
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ### Mean CORRECTED Preview

 %% Cell type:code id: tags:

 ``` python
 print(f"The per pixel mean of the first {corrected.shape[1]} trains of the first sequence file")

 fig, ax = plt.subplots(figsize=(18, 10))
 corrected_mean = np.mean(corrected, axis=1)
 _corrected_vmin = min(0.75*np.median(corrected_mean[corrected_mean > 0]), -0.5)
 _corrected_vmax = max(2.*np.median(corrected_mean[corrected_mean > 0]), 100)

 mean_plot_kwargs = dict(
    vmin=_corrected_vmin, vmax=_corrected_vmax, cmap="jet"
 )

 if not strixel_sensor:
    geom.plot_data_fast(
        corrected_mean,
        ax=ax,
        colorbar={'shrink': 1, 'pad': 0.01},
        **mean_plot_kwargs
    )
 else:
    ax.imshow(corrected_mean.squeeze(), aspect=10, **mean_plot_kwargs)

 ax.set_title(f'{karabo_id} - Mean CORRECTED', size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(figsize=(18, 10))
 corrected_masked = corrected.copy()
 corrected_masked[mask != 0] = np.nan
 corrected_masked_mean = np.nanmean(corrected_masked, axis=1)
 del corrected_masked

 if not strixel_sensor:
    geom.plot_data_fast(
        corrected_masked_mean,
        ax=ax,
        colorbar={'shrink': 1, 'pad': 0.01},
        **mean_plot_kwargs
    )
 else:
    ax.imshow(corrected_mean.squeeze(), aspect=10, **mean_plot_kwargs)

 ax.set_title(f'{karabo_id} - Mean CORRECTED with mask', size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown((f"#### A single image from train {tid}")))

 fig, ax = plt.subplots(figsize=(18, 10))

 single_plot_kwargs = dict(
    vmin=min(0.75 * np.median(corrected_train[corrected_train > 0]), -0.5),
    vmax=max(2.0 * np.median(corrected_train[corrected_train > 0]), 100),
    cmap="jet"
 )

 if not strixel_sensor:
    geom.plot_data_fast(
        corrected_train,
        ax=ax,
        colorbar={"shrink": 1, "pad": 0.01},
        **single_plot_kwargs
    )
 else:
    ax.imshow(corrected_train.squeeze(), aspect=10, **single_plot_kwargs)

 ax.set_title(f"{karabo_id} - CORRECTED train: {tid}", size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis, title):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [
        np.min(edges[1]),
        np.max(edges[1]),
        np.min(edges[0]),
        np.max(edges[0]),
    ]

    im = ax.imshow(
        data[::-1, :],
        extent=extent,
        aspect="auto",
        norm=LogNorm(vmin=1, vmax=np.max(data))
    )
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_title(title)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:markdown id: tags:

 ### Gain Bit Value

 %% Cell type:code id: tags:

 ``` python
 for i, mod in enumerate(karabo_da):
    pdu = da_to_pdu[mod]
    h, ex, ey = np.histogram2d(
        raw[i].flatten(),
        gain[i].flatten(),
        bins=[100, 4],
        range=[[0, 10000], [0, 4]],
    )
    do_2d_plot(
        h,
        (ex, ey),
        "Signal (ADU)",
        "Gain Bit Value (high gain=0[00], medium gain=1[01], low gain=3[11])",
        f"Module {mod} ({pdu})",
    )
 ```

 %% Cell type:markdown id: tags:

 ## Signal Distribution ##

 %% Cell type:code id: tags:

 ``` python
 for i, mod in enumerate(karabo_da):
    pdu = da_to_pdu[mod]
    fig, axs = plt.subplots(nrows=2, ncols=1, figsize=(18, 10))
    corrected_flatten = corrected[i].flatten()
    for ax, hist_range in zip(axs, [(-100, 1000), (-1000, 10000)]):
        h = ax.hist(
            corrected_flatten,
            bins=1000,
            range=hist_range,
            log=True,
        )
        l = ax.set_xlabel("Signal (keV)")
        l = ax.set_ylabel("Counts")
        _ = ax.set_title(f'Module {mod} ({pdu})')
 ```

 %% Cell type:markdown id: tags:

 ### Maximum GAIN Preview

 %% Cell type:code id: tags:

 ``` python
 display(Markdown((f"#### The per pixel maximum of train {tid} of the GAIN data")))

 fig, ax = plt.subplots(figsize=(18, 10))
 gain_max = np.max(gain_train_cells, axis=(1, 2))
 geom.plot_data_fast(
    gain_max,
    ax=ax,
    cmap="jet",
    colorbar={'shrink': 1, 'pad': 0.01},
 )
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append(
        (item.name, f"{item.value:016b}"))
 md = display(Latex(tabulate.tabulate(
    table, tablefmt='latex',
    headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:markdown id: tags:

 ### Single Image Bad Pixels ###

 A single image bad pixel map for the first image of the first train

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f"#### Bad pixels image for train {tid}"))

 fig, ax = plt.subplots(figsize=(18, 10))
 if not strixel_sensor:
    geom.plot_data_fast(
        np.log2(mask_train),
        ax=ax,
        vmin=0, vmax=32, cmap="jet",
        colorbar={'shrink': 1, 'pad': 0.01},
    )
 else:
    ax.imshow(np.log2(mask_train).squeeze(), vmin=0, vmax=32, cmap='jet', aspect=10)

 plt.show()
 ```

 %% Cell type:markdown id: tags:

 # Jungfrau Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the Jungfrau Detector

 %% Cell type:code id: tags:

 ``` python
-in_folder = "/gpfs/exfel/exp/SPB/202130/p900204/raw"  # the folder to read data from, required
-out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/remove"  # the folder to output to, required
-run = 91  # run to process, required
+in_folder = "/gpfs/exfel/exp/FXE/202301/p003279/raw"  # the folder to read data from, required
+out_folder =  "/gpfs/exfel/data/scratch/kluyvert/jf-corr-p3279-r275"  # the folder to output to, required
+run = 275  # run to process, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1]  # sequences to correct, set to [-1] for all, range allowed
 sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel

 # Parameters used to access raw data.
-karabo_id = "SPB_IRDA_JF4M"  # karabo prefix of Jungfrau devices
-karabo_da = ['JNGFR01', 'JNGFR02', 'JNGFR03', 'JNGFR04', 'JNGFR05', 'JNGFR06', 'JNGFR07', 'JNGFR08']  # data aggregators
+karabo_id = "FXE_XAD_JF500K"  # karabo prefix of Jungfrau devices
+karabo_da = ['JNGFR03']  # data aggregators
 receiver_template = "JNGFR{:02d}"  # Detector receiver template for accessing raw data files. e.g. "JNGFR{:02d}"
 instrument_source_template = '{}/DET/{}:daqOutput'  # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = '{}/DET/CONTROL'  # template for control source name (filled with karabo_id_control)
 karabo_id_control = ""  # if control is on a different ID, set to empty string if it is the same a karabo-id

 # Parameters for calibration database.
 cal_db_interface = "tcp://max-exfl-cal001:8017#8025" # the database interface to use
 cal_db_timeout = 180000  # timeout on caldb requests
 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"

 # Parameters affecting corrected data.
 relative_gain = True  # do relative gain correction.
 strixel_sensor = False  # reordering for strixel detector layout.
 strixel_double_norm = 2.0  # normalization to use for double-size pixels, only applied for strixel sensors.
 limit_trains = 0  # ONLY FOR TESTING. process only first N trains, Use 0 to process all.
 chunks_ids = 32  # HDF chunk size for memoryCell and frameNumber.
 chunks_data = 1  # HDF chunk size for pixel data in number of frames.

 # Parameters for retrieving calibration constants
 manual_slow_data = False  # if true, use manually entered bias_voltage, integration_time, gain_setting, and gain_mode values
 integration_time = 4.96  # integration time in us, will be overwritten by value in file
 gain_setting = 0  # 0 for dynamic gain, 1 for dynamic HG0, will be overwritten by value in file
 gain_mode = 0  # 0 for runs with dynamic gain setting, 1 for fixgain. It will be overwritten by value in file, if manual_slow_data is set to True.
 mem_cells = -1  # Set mem_cells to -1 to automatically use the value stored in RAW data.
 bias_voltage = 180  # will be overwritten by value in file

 # Parameters for plotting
 skip_plots = False  # exit after writing corrected files
 plot_trains = 500  # Number of trains to plot for RAW and CORRECTED plots. Set to -1 to automatically plot all trains.
 cell_id_preview = 15  # cell Id used for preview in single-shot plots

 # Parameters for ROI selection and reduction
 roi_definitions = [-1]  # List with groups of 6 values defining ROIs, e.g. [3, 120, 180, 200, 550, -2] for module 3 (JNGFR03), slice 120:180, 200:550, average along axis -2 (slow scan, or -1 for fast scan)
+roi_threshold = 2.  # Corrected pixels below the threshold will be excluded from ROI projections. Set to -1 to include all pixels.

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import fnmatch
 import multiprocessing
 import sys
 import warnings
 from logging import warning
 from pathlib import Path

 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
 import pasha as psh
 import tabulate
 from IPython.display import Latex, Markdown, display
 from extra_data import DataCollection, H5File, RunDirectory, by_id, components
 from matplotlib.colors import LogNorm

 import cal_tools.restful_config as rest_cfg
 from cal_tools.calcat_interface import JUNGFRAU_CalibrationData
 from cal_tools.jungfraulib import JungfrauCtrl
 from cal_tools.enums import BadPixels
 from cal_tools.jungfraulib import JungfrauCtrl
 from cal_tools.plotting import init_jungfrau_geom
 from cal_tools.files import DataFile
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    calcat_creation_time,
    map_seq_files,
    write_constants_fragment,
 )

 warnings.filterwarnings('ignore')

 matplotlib.use('agg')
 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_folder = in_folder / f'r{run:04d}'
 run_dc = RunDirectory(run_folder)
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)

 out_folder.mkdir(parents=True, exist_ok=True)

 print(f"Run is: {run}")
 print(f"Instrument H5File source: {instrument_src}")
 karabo_da = sorted(karabo_da)
 print(f"Process modules: {karabo_da}")

 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f"Creation time: {creation_time}")

 if karabo_id_control == "":
    karabo_id_control = karabo_id

 if any(axis_no not in {-2, -1, 2, 3} for axis_no in roi_definitions[5::6]):
    print("ROI averaging must be on axis 2/3 (or equivalently -2/-1). "
          f"Axis numbers given: {roi_definitions[5::6]}")
    sys.exit(1)
 ```

 %% Cell type:code id: tags:

 ``` python
 ctrl_src = ctrl_source_template.format(karabo_id_control)
 ctrl_data = JungfrauCtrl(run_dc, ctrl_src)

 if mem_cells < 0:
    memory_cells, sc_start = ctrl_data.get_memory_cells()

    mem_cells_name = "single cell" if memory_cells == 1 else "burst"
    print(f"Run is in {mem_cells_name} mode.\nStorage cell start: {sc_start:02d}")
 else:
    memory_cells = mem_cells
    mem_cells_name = "single cell" if memory_cells == 1 else "burst"
    print(f"Run is in manually set to {mem_cells_name} mode. With {memory_cells} memory cells")

 if not manual_slow_data:
    integration_time = ctrl_data.get_integration_time()
    bias_voltage = ctrl_data.get_bias_voltage()
    gain_setting = ctrl_data.get_gain_setting()
    gain_mode = ctrl_data.get_gain_mode()

 print(f"Integration time is {integration_time} us")
 print(f"Gain setting is {gain_setting} (run settings: {ctrl_data.run_settings})")
 print(f"Gain mode is {gain_mode} ({ctrl_data.run_mode})")
 print(f"Bias voltage is {bias_voltage} V")
 print(f"Number of memory cells are {memory_cells}")
 ```

 %% Cell type:markdown id: tags:

 ### Retrieving calibration constants

 %% Cell type:code id: tags:

 ``` python
 jf_cal = JUNGFRAU_CalibrationData(
    detector_name=karabo_id,
    sensor_bias_voltage=bias_voltage,
    event_at=creation_time,
    modules=karabo_da,
    memory_cells=memory_cells,
    integration_time=integration_time,
    gain_setting=gain_setting,
    gain_mode=gain_mode,
    client=rest_cfg.calibration_client(),
 )

 da_to_pdu = {}
 for mod_info in jf_cal.physical_detector_units.values():
    da_to_pdu[mod_info["karabo_da"]] = mod_info["physical_name"]

 constant_names = ["Offset10Hz", "BadPixelsDark10Hz"]
 if relative_gain:
    constant_names += ["BadPixelsFF10Hz", "RelativeGain10Hz"]

 jf_metadata = jf_cal.metadata(calibrations=constant_names)
 # Display retrieved calibration constants timestamps
 jf_cal.display_markdown_retrieved_constants(metadata=jf_metadata)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Validate the constants availability and raise/warn correspondingly.
 for mod in karabo_da[:]:
    calibrations = jf_metadata.get(mod, {})

    missing_dark_constants = {"Offset10Hz", "BadPixelsDark10Hz"} - set(calibrations)
    missing_gain_constants = {"BadPixelsFF10Hz", "RelativeGain10Hz"} - set(calibrations)

    if missing_dark_constants:
        warning(
            f"Dark constants {missing_dark_constants} are not available to correct {mod}."
            f" Module {mod} won't be corrected.")
        karabo_da.remove(mod)

    if relative_gain and missing_gain_constants:
        warning(f"Gain constants {missing_gain_constants} were not retrieved for {mod}."
                " No Relative gain correction for this module")
 if not karabo_da:  # Dark constants are missing for all modules.
    raise ValueError("Dark constants are missing for all modules.")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Record constant details in YAML metadata
 write_constants_fragment(
    out_folder=(metadata_folder or out_folder),
    det_metadata=jf_metadata,
    caldb_root=jf_cal.caldb_root)


 # load constants arrays after storing fragment YAML file
 # and validating constants availability.
 const_data = jf_cal.ndarray_map(metadata=jf_metadata)
 ```

 %% Cell type:code id: tags:

 ``` python
 def prepare_constants(module: str):
    """Prepare constant arrays.

    :param module: The module name (karabo_da)
    :return:
        offset_map (offset map),
        mask (mask of bad pixels),
        gain_map (map of relative gain factors),
        module (name of module),
    """
    constant_arrays = const_data[module]
    offset_map = constant_arrays["Offset10Hz"]
    mask = constant_arrays["BadPixelsDark10Hz"]

    gain_map = constant_arrays.get("RelativeGain10Hz")
    mask_ff = constant_arrays.get("BadPixelsFF10Hz")

    # Combine masks
    if mask_ff is not None:
        mask |= np.moveaxis(mask_ff, 0, 1)

    if memory_cells > 1:
        # move from x, y, cell, gain to cell, x, y, gain
        offset_map = np.moveaxis(offset_map, [0, 1], [1, 2])
        mask = np.moveaxis(mask, [0, 1], [1, 2])
    else:
        offset_map = np.squeeze(offset_map)
        mask = np.squeeze(mask)

    # masking double size pixels
    mask[..., [255, 256], :, :] |= BadPixels.NON_STANDARD_SIZE
    mask[..., [255, 256, 511, 512, 767, 768], :] |= BadPixels.NON_STANDARD_SIZE

    if gain_map is not None:
        if memory_cells > 1:
            gain_map = np.moveaxis(gain_map, [0, 2], [2, 0])
            # add extra empty cell constant
            b = np.ones(((1,)+gain_map.shape[1:]))
            gain_map = np.concatenate((gain_map, b), axis=0)
        else:
            gain_map = np.moveaxis(np.squeeze(gain_map), 1, 0)

    return offset_map, mask, gain_map, module

 with multiprocessing.Pool() as pool:
    r = pool.map(prepare_constants, karabo_da)

 # Print timestamps for the retrieved constants.
 constants = {}
 for offset_map, mask, gain_map, k_da in r:

    constants[k_da] = (offset_map, mask, gain_map)

 const_data.clear()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read available sequence files to correct.
 mapped_files, num_seq_files = map_seq_files(
    run_folder, karabo_da, sequences)

 if not len(mapped_files):
    raise IndexError(
        "No sequence files available to correct for the selected sequences and karabo_da.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Processing a total of {num_seq_files} sequence files")
 table = []
 fi = 0
 for kda, sfiles in mapped_files.items():
    for k, f in enumerate(sfiles):
        if k == 0:
            table.append((fi, kda, k, f))
        else:
            table.append((fi, "", k,  f))
        fi += 1
 md = display(Latex(tabulate.tabulate(
    table, tablefmt='latex',
    headers=["#", "module", "# module", "file"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 if strixel_sensor:
    from cal_tools.jfstrixel import STRIXEL_SHAPE as strixel_frame_shape, double_pixel_indices, to_strixel
    Ydouble, Xdouble = double_pixel_indices()
    print('Strixel sensor transformation enabled')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Correct a chunk of images for offset and gain
 def correct_train(wid, index, d):
    d = d.astype(np.float32)  # [cells, x, y]
    g = gain[index]

    # Copy gain over first to keep it at the original 3 for low gain.
    if strixel_sensor:
        to_strixel(g, out=gain_corr[index, ...])
    else:
        gain_corr[index, ...] = g

    # Jungfrau gains 0[00], 1[01], 3[11]
    # Change low gain to 2 for indexing purposes.
    g[g==3] = 2

    # Select memory cells
    if memory_cells > 1:
        """
        Even though it is correct to assume that memory cells pattern
        can be the same across all trains (for one correction run
        taken with one acquisition), it is preferred to not assume
        this to account for exceptions that can happen.
        """
        m = memcells[index].copy()
        # 255 is a cell value pointing to no cell image data (image of 0 pixels).
        # Corresponding image will be corrected with constant of cell 0. To avoid values of 0.
        # This line is depending on not storing the modified memory cells in the corrected data.
        m[m==255] = 0

        offset_map_cell = offset_map[m, ...]  # [16 + empty cell, x, y]
        mask_cell = mask[m, ...]
    else:
        offset_map_cell = offset_map
        mask_cell = mask

    # Offset correction
    offset = np.choose(g, np.moveaxis(offset_map_cell, -1, 0))

    d -= offset

    # Gain correction
    if relative_gain and gain_map is not None:
        if memory_cells > 1:
            gain_map_cell = gain_map[m, ...]
        else:
            gain_map_cell = gain_map
        cal = np.choose(g, np.moveaxis(gain_map_cell, -1, 0))
        d /= cal

    msk = np.choose(g, np.moveaxis(mask_cell, -1, 0))

    if strixel_sensor:
        to_strixel(d, out=data_corr[index, ...])
        data_corr[index, :, Ydouble, Xdouble] /= strixel_double_norm
        to_strixel(msk, out=mask_corr[index, ...])
    else:
        data_corr[index, ...] = d
        mask_corr[index, ...] = msk
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()

 n_cpus = multiprocessing.cpu_count()
 context = psh.context.ProcessContext(num_workers=n_cpus)
 print(f"Using {n_cpus} workers for correction.")
 ```

 %% Cell type:code id: tags:

 ``` python
 def save_reduced_rois(ofile, data_corr, mask_corr, karabo_da):
    """If ROIs are defined for this karabo_da, reduce them and save to the output file"""
    rois_defined = 0
    module_no = int(karabo_da[-2:])
    params_source = f'{karabo_id}/ROIPROC/{karabo_da}'
    rois_source = f'{params_source}:output'
    if roi_definitions != [-1]:
        # Create Instrument and Control sections to later add datasets.
        outp_source = ofile.create_instrument_source(rois_source)
        ctrl_source = ofile.create_control_source(params_source)
    for i in range(len(roi_definitions) // 6):
        roi_module, a1, a2, b1, b2, mean_axis = roi_definitions[i*6 : (i+1)*6]
        if roi_module == module_no:
            rois_defined += 1
+            # Set pixels below the threshold to 0 (but still used in the averaging)
+            roi_data = data_corr[..., a1:a2, b1:b2]
+            if roi_threshold > -1:
+                roi_data = roi_data * (roi_data > roi_threshold)
            # Apply the mask and average remaining pixels to 1D
-            roi_data = data_corr[..., a1:a2, b1:b2].mean(
+            roi_data = roi_data.mean(
                axis=mean_axis, where=(mask_corr[..., a1:a2, b1:b2] == 0)
            )

            # Add roi corrected datasets
            outp_source.create_key(f'data.roi{rois_defined}.data', data=roi_data)

            # Add roi run control datasets.
            ctrl_source.create_run_key(f'roi{rois_defined}.region', np.array([[a1, a2, b1, b2]]))
            ctrl_source.create_run_key(f'roi{rois_defined}.reduce_axis', np.array([mean_axis]))
+            ctrl_source.create_run_key(f'roi{rois_defined}.threshold', np.array([roi_threshold], dtype=np.float32))

    if rois_defined:
        # Copy the index for the new source
        # Create count/first datasets at INDEX source.
        ofile.copy(f'INDEX/{karabo_id}/DET/{karabo_da}:daqOutput/data',
                   f'INDEX/{rois_source}/data')
        ntrains = ofile['INDEX/trainId'].shape[0]
        ctrl_source.create_index(ntrains)
 ```

 %% Cell type:markdown id: tags:

 ### Correcting RAW data ###

 %% Cell type:code id: tags:

 ``` python
 # Loop over modules
 empty_seq = 0
 corrected_files = []
 for local_karabo_da, mapped_files_module in mapped_files.items():
    instrument_src_kda = instrument_src.format(int(local_karabo_da[-2:]))

    for sequence_file in mapped_files_module:
        # Save corrected data in an output file with name
        # of corresponding raw sequence file.
        ofile_name = sequence_file.name.replace("RAW", "CORR")
        out_file = out_folder / ofile_name
        corrected_files.append(ofile_name)

        # Load sequence file data collection, data.adc keydata,
        # the shape for data to later created arrays of the same shape,
        # and number of available trains to correct.
        seq_dc = H5File(sequence_file)
        seq_dc_adc = seq_dc[instrument_src_kda, "data.adc"]
        ishape = seq_dc_adc.shape  # input shape.
        corr_ntrains = ishape[0]  # number of available trains to correct.
        all_train_ids = seq_dc_adc.train_ids

        # Raise a WARNING if this sequence has no trains to correct.
        # Otherwise, print number of trains with no data.
        if corr_ntrains == 0:
            warning(f"No trains to correct for {sequence_file.name}: "
                 "Skipping the processing of this file.")
            empty_seq += 1
            continue
        elif len(all_train_ids) != corr_ntrains:
            print(f"{sequence_file.name} has {len(seq_dc_adc.train_ids) - corr_ntrains} "
                  "trains with missing data.")

        # For testing, limit corrected trains. i.e. Getting output faster.
        if limit_trains > 0:
            print(f"\nCorrected trains are limited to: {limit_trains} trains")
            corr_ntrains = min(corr_ntrains, limit_trains)

        print(f"\nNumber of corrected trains are: {corr_ntrains} for {ofile_name}")

        # Load constants from the constants dictionary.
        # These arrays are used by `correct_train()` function
        offset_map, mask, gain_map = constants[local_karabo_da]

        # Determine total output shape.
        if strixel_sensor:
            oshape = (*ishape[:-2], *strixel_frame_shape)
        else:
            oshape = ishape

        # Allocate shared arrays for corrected data. Used in `correct_train()`
        data_corr = context.alloc(shape=oshape, dtype=np.float32)
        gain_corr = context.alloc(shape=oshape, dtype=np.uint8)
        mask_corr = context.alloc(shape=oshape, dtype=np.uint32)

        step_timer.start()
        # Overwrite seq_dc after eliminating empty trains or/and applying limited images.
        seq_dc = seq_dc.select(
            instrument_src_kda, "*", require_all=True).select_trains(np.s_[:corr_ntrains])

        # Load raw images(adc), gain, memcells, and frame numbers.
        data = seq_dc[instrument_src_kda, "data.adc"].ndarray()
        gain = seq_dc[instrument_src_kda, "data.gain"].ndarray()
        memcells = seq_dc[instrument_src_kda, "data.memoryCell"].ndarray()
        frame_number = seq_dc[instrument_src_kda, "data.frameNumber"].ndarray()

        # Validate that the selected cell id to preview is available in raw data.
        if memory_cells > 1:
            # For plotting, assuming that memory cells are sorted the same for all trains.
            found_cells = memcells[0] == cell_id_preview
            if any(found_cells):
                cell_idx_preview = np.where(found_cells)[0][0]
            else:
                print(f"The selected cell_id_preview {cell_id_preview} is not available in burst mode. "
                      f"Previewing cell `{memcells[0]}`.")
                cell_idx_preview = 0
        else:
            cell_idx_preview = 0

        # Correct data per train
        context.map(correct_train, data)
        step_timer.done_step(f"Correction time.")

        step_timer.start()

        # Create CORR files and add corrected data sections.
        image_counts = seq_dc[instrument_src_kda, "data.adc"].data_counts(labelled=False)

        with DataFile(out_file, 'w') as outp_file:
            # Create INDEX datasets.
            outp_file.create_index(seq_dc.train_ids, from_file=seq_dc.files[0])

            # Create Instrument section to later add corrected datasets.
            outp_source = outp_file.create_instrument_source(instrument_src_kda)

            # Create count/first datasets at INDEX source.
            outp_source.create_index(data=image_counts)

            # RAW memoryCell and frameNumber are not corrected. But we are storing only
            # the values for the corrected trains.
            outp_source.create_key(
                "data.memoryCell", data=memcells,
                chunks=(min(chunks_ids, memcells.shape[0]), 1))
            outp_source.create_key(
                "data.frameNumber", data=frame_number,
                chunks=(min(chunks_ids, frame_number.shape[0]), 1))
            # Add main corrected `data.adc`` dataset and store corrected data.
            outp_source.create_key(
                "data.adc", data=data_corr,
                chunks=(min(chunks_data, data_corr.shape[0]), *oshape[1:]))
            outp_source.create_compressed_key(
                "data.gain", data=gain_corr)
            outp_source.create_compressed_key(
                "data.mask", data=mask_corr)

            # Temporary hotfix for FXE assuming this dataset is in corrected files.
            outp_source.create_key(
                "data.trainId", data=seq_dc.train_ids,
                chunks=(min(50, len(seq_dc.train_ids))))

            save_reduced_rois(outp_file, data_corr, mask_corr, local_karabo_da)

            # Create METDATA datasets
            outp_file.create_metadata(
                like=seq_dc,
                sequence=seq_dc.run_metadata()["sequenceNumber"],
            )

        step_timer.done_step(f'Saving data time.')
 if empty_seq == sum([len(i) for i in mapped_files.values()]):
    warning("No valid trains for RAW data to correct.")
    sys.exit(0)
 ```

 %% Cell type:markdown id: tags:

 ### Processing time summary ###

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print('Skipping plots')
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 _, geom = init_jungfrau_geom(karabo_id=karabo_id, karabo_da=karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 first_seq = 0 if sequences == [-1] else sequences[0]

 corrected_files = [
    out_folder / f for f in fnmatch.filter(corrected_files, f"*{run}*S{first_seq:05d}*")
 ]
 with DataCollection.from_paths(corrected_files) as corr_dc:
    # Reading CORR data for plotting.
    jf_corr = components.JUNGFRAU(
        corr_dc,
        detector_name=karabo_id,
    ).select_trains(np.s_[:plot_trains])
    tid, jf_corr_data = next(iter(jf_corr.trains(require_all=True)))

 # Shape = [modules, trains, cells, x, y]
 # TODO: Fix the case if not all modules were requested to be corrected.
 # For example if only one modules was corrected. An assertion error is expected
 # at `geom.plot_data_fast`, while plotting corrected images.
 corrected = jf_corr.get_array("data.adc")[:, :, cell_idx_preview, ...].values
 corrected_train = jf_corr_data["data.adc"][
    :, cell_idx_preview, ...
 ].values  # loose the train axis.

 mask = jf_corr.get_array("data.mask")[:, :, cell_idx_preview, ...].values
 mask_train = jf_corr_data["data.mask"][:, cell_idx_preview, ...].values

 with RunDirectory(f"{in_folder}/r{run:04d}/", f"*S{first_seq:05d}*", _use_voview=False) as raw_dc:

    # Reading RAW data for plotting.
    jf_raw = components.JUNGFRAU(raw_dc, detector_name=karabo_id).select_trains(
            np.s_[:plot_trains]
    )

 raw = jf_raw.get_array("data.adc")[:, :, cell_idx_preview, ...].values
 raw_train = (
    jf_raw.select_trains(by_id[[tid]])
    .get_array("data.adc")[:, 0, cell_idx_preview, ...]
    .values
 )

 gain = jf_raw.get_array("data.gain")[:, :, cell_idx_preview, ...].values
 gain_train_cells = (
    jf_raw.select_trains(by_id[[tid]]).get_array("data.gain")[:, :, :, ...].values
 )
 ```

 %% Cell type:markdown id: tags:

 ### Mean RAW Preview

 %% Cell type:code id: tags:

 ``` python
 print(f"The per pixel mean of the first {raw.shape[1]} trains of the first sequence file")

 fig, ax = plt.subplots(figsize=(18, 10))
 raw_mean = np.mean(raw, axis=1)
 geom.plot_data_fast(
    raw_mean,
    ax=ax,
    vmin=min(0.75*np.median(raw_mean[raw_mean > 0]), 2000),
    vmax=max(1.5*np.median(raw_mean[raw_mean > 0]), 16000),
    cmap="jet",
    colorbar={'shrink': 1, 'pad': 0.01},
 )
 ax.set_title(f'{karabo_id} - Mean RAW', size=18)
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ### Mean CORRECTED Preview

 %% Cell type:code id: tags:

 ``` python
 print(f"The per pixel mean of the first {corrected.shape[1]} trains of the first sequence file")

 fig, ax = plt.subplots(figsize=(18, 10))
 corrected_mean = np.mean(corrected, axis=1)
 _corrected_vmin = min(0.75*np.median(corrected_mean[corrected_mean > 0]), -0.5)
 _corrected_vmax = max(2.*np.median(corrected_mean[corrected_mean > 0]), 100)

 mean_plot_kwargs = dict(
    vmin=_corrected_vmin, vmax=_corrected_vmax, cmap="jet"
 )

 if not strixel_sensor:
    geom.plot_data_fast(
        corrected_mean,
        ax=ax,
        colorbar={'shrink': 1, 'pad': 0.01},
        **mean_plot_kwargs
    )
 else:
    ax.imshow(corrected_mean.squeeze(), aspect=10, **mean_plot_kwargs)

 ax.set_title(f'{karabo_id} - Mean CORRECTED', size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(figsize=(18, 10))
 corrected_masked = corrected.copy()
 corrected_masked[mask != 0] = np.nan
 corrected_masked_mean = np.nanmean(corrected_masked, axis=1)
 del corrected_masked

 if not strixel_sensor:
    geom.plot_data_fast(
        corrected_masked_mean,
        ax=ax,
        colorbar={'shrink': 1, 'pad': 0.01},
        **mean_plot_kwargs
    )
 else:
    ax.imshow(corrected_mean.squeeze(), aspect=10, **mean_plot_kwargs)

 ax.set_title(f'{karabo_id} - Mean CORRECTED with mask', size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown((f"#### A single image from train {tid}")))

 fig, ax = plt.subplots(figsize=(18, 10))

 single_plot_kwargs = dict(
    vmin=min(0.75 * np.median(corrected_train[corrected_train > 0]), -0.5),
    vmax=max(2.0 * np.median(corrected_train[corrected_train > 0]), 100),
    cmap="jet"
 )

 if not strixel_sensor:
    geom.plot_data_fast(
        corrected_train,
        ax=ax,
        colorbar={"shrink": 1, "pad": 0.01},
        **single_plot_kwargs
    )
 else:
    ax.imshow(corrected_train.squeeze(), aspect=10, **single_plot_kwargs)

 ax.set_title(f"{karabo_id} - CORRECTED train: {tid}", size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis, title):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [
        np.min(edges[1]),
        np.max(edges[1]),
        np.min(edges[0]),
        np.max(edges[0]),
    ]

    im = ax.imshow(
        data[::-1, :],
        extent=extent,
        aspect="auto",
        norm=LogNorm(vmin=1, vmax=np.max(data))
    )
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_title(title)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:markdown id: tags:

 ### Gain Bit Value

 %% Cell type:code id: tags:

 ``` python
 for i, mod in enumerate(karabo_da):
    pdu = da_to_pdu[mod]
    h, ex, ey = np.histogram2d(
        raw[i].flatten(),
        gain[i].flatten(),
        bins=[100, 4],
        range=[[0, 10000], [0, 4]],
    )
    do_2d_plot(
        h,
        (ex, ey),
        "Signal (ADU)",
        "Gain Bit Value (high gain=0[00], medium gain=1[01], low gain=3[11])",
        f"Module {mod} ({pdu})",
    )
 ```

 %% Cell type:markdown id: tags:

 ## Signal Distribution ##

 %% Cell type:code id: tags:

 ``` python
 for i, mod in enumerate(karabo_da):
    pdu = da_to_pdu[mod]
    fig, axs = plt.subplots(nrows=2, ncols=1, figsize=(18, 10))
    corrected_flatten = corrected[i].flatten()
    for ax, hist_range in zip(axs, [(-100, 1000), (-1000, 10000)]):
        h = ax.hist(
            corrected_flatten,
            bins=1000,
            range=hist_range,
            log=True,
        )
        l = ax.set_xlabel("Signal (keV)")
        l = ax.set_ylabel("Counts")
        _ = ax.set_title(f'Module {mod} ({pdu})')
 ```

 %% Cell type:markdown id: tags:

 ### Maximum GAIN Preview

 %% Cell type:code id: tags:

 ``` python
 display(Markdown((f"#### The per pixel maximum of train {tid} of the GAIN data")))

 fig, ax = plt.subplots(figsize=(18, 10))
 gain_max = np.max(gain_train_cells, axis=(1, 2))
 geom.plot_data_fast(
    gain_max,
    ax=ax,
    cmap="jet",
    colorbar={'shrink': 1, 'pad': 0.01},
 )
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append(
        (item.name, f"{item.value:016b}"))
 md = display(Latex(tabulate.tabulate(
    table, tablefmt='latex',
    headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:markdown id: tags:

 ### Single Image Bad Pixels ###

 A single image bad pixel map for the first image of the first train

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f"#### Bad pixels image for train {tid}"))

 fig, ax = plt.subplots(figsize=(18, 10))
 if not strixel_sensor:
    geom.plot_data_fast(
        np.log2(mask_train),
        ax=ax,
        vmin=0, vmax=32, cmap="jet",
        colorbar={'shrink': 1, 'pad': 0.01},
    )
 else:
    ax.imshow(np.log2(mask_train).squeeze(), vmin=0, vmax=32, cmap='jet', aspect=10)

 plt.show()
 ```

--- a/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Jungfrau Dark Image Characterization #

 Author: European XFEL Detector Group, Version: 2.0

 Analyzes Jungfrau dark image data to deduce offset, noise and resulting bad pixel maps

 %% Cell type:code id: tags:

 ``` python
 in_folder = '/gpfs/exfel/exp/SPB/202130/p900204/raw/'  # folder under which runs are located, required
 out_folder = '/gpfs/exfel/data/scratch/ahmedk/test/remove' # path to place reports at, required
 metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run_high = 141 # run number for G0 dark run, required
 run_med = 142 # run number for G1 dark run, required
 run_low = 143 # run number for G2 dark run, required

 # Parameters used to access raw data.
 karabo_da = ['JNGFR01', 'JNGFR02','JNGFR03','JNGFR04', 'JNGFR05', 'JNGFR06','JNGFR07','JNGFR08'] # list of data aggregators, which corresponds to different JF modules
 karabo_id = 'SPB_IRDA_JF4M'  # karabo_id (detector identifier) prefix of Jungfrau detector to process.
 karabo_id_control = ''  # if control is on a different ID, set to empty string if it is the same a karabo-id
 receiver_template = 'JNGFR{:02}' # inset for receiver devices
 instrument_source_template = '{}/DET/{}:daqOutput'  # template for instrument source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = '{}/DET/CONTROL'  # template for control source name (filled with karabo_id_control)

 # Parameters for calibration database and storing constants.
 use_dir_creation_date = True  # use dir creation date
 cal_db_interface = 'tcp://max-exfl-cal001:8016#8045'  # calibrate db interface to connect to
 cal_db_timeout = 300000 # timeout on caldb requests
 local_output = True  # output constants locally
 db_output = False  # output constants to database

 # Parameters affecting creating dark calibration constants.
 badpixel_threshold_sigma = 5.  # bad pixels defined by values outside n times this std from median
 offset_abs_threshold_low = [1000, 10000, 10000]  # absolute bad pixel threshold in terms of offset, lower values
 offset_abs_threshold_high = [8000, 15000, 15000]  # absolute bad pixel threshold in terms of offset, upper values
 max_trains = 1000  # Maximum trains to process darks. Set to 0 to process all available train images. 1000 trains is enough resolution to create the dark constants
 min_trains = 100  # Minimum number of trains to process dark constants. Raise a warning if the run has fewer trains.
 manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values
 time_limits = 0.025  # to find calibration constants later on, the integration time is allowed to vary by 0.5 us

 # Parameters to be used for injecting dark calibration constants.
 integration_time = 1000 # integration time in us, will be overwritten by value in file
 gain_setting = 0  # 0 for dynamic, forceswitchg1, forceswitchg2, 1 for dynamichg0, fixgain1, fixgain2. Will be overwritten by value in file
 gain_mode = 0  # 1 if medium and low runs are  fixgain1 and fixgain2, otherwise 0. It will be overwritten by value in file, if manual_slow_data
 bias_voltage = 90  # sensor bias voltage in V, will be overwritten by value in file
 memory_cells = 16  # number of memory cells

 # Parameters used for plotting
 detailed_report = False

 # TODO: this is used for only Warning check at AGIPD dark.
 # Need to rethink if it makes sense to use it here as well.
 operation_mode = 'ADAPTIVE_GAIN'  # Detector operation mode, optional
 ```

 %% Cell type:code id: tags:

 ``` python
 import os
 import warnings
 from logging import warning
 warnings.filterwarnings('ignore')

 import matplotlib
 import matplotlib.pyplot as plt
 import multiprocessing
 import numpy as np
 import pasha as psh
 import yaml
 from IPython.display import Markdown, display
 from extra_data import RunDirectory

 matplotlib.use('agg')
 %matplotlib inline

 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.histogram import histPlot
 from cal_tools import jungfraulib, step_timing
 from cal_tools.enums import BadPixels, JungfrauGainMode
 from cal_tools.tools import (
    get_dir_creation_date,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    save_const_to_h5,
    send_to_db,
 )
 from iCalibrationDB import Conditions, Constants
 ```

 %% Cell type:code id: tags:

 ``` python
 # Constants relevant for the analysis
 run_nums = [run_high, run_med, run_low]  # run number for G0/HG0, G1, G2
 sensor_size = (1024, 512)
 gains = [0, 1, 2]

 fixed_settings = [
    JungfrauGainMode.FIX_GAIN_1.value, JungfrauGainMode.FIX_GAIN_2.value]
 dynamic_settings = [
    JungfrauGainMode.FORCE_SWITCH_HG1.value, JungfrauGainMode.FORCE_SWITCH_HG2.value]
 old_fixed_settings = ["fixgain1", "fixgain2"]

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)
    print(f"Using {creation_time} as creation time")
 os.makedirs(out_folder, exist_ok=True)

 cal_db_interface = get_random_db_interface(cal_db_interface)
 print(f'Calibration database interface: {cal_db_interface}')

 if karabo_id_control == "":
    karabo_id_control = karabo_id
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = f"proposal:{proposal} runs:{run_high} {run_med} {run_low}"

 report = get_report(metadata_folder)

 step_timer = step_timing.StepTimer()
 ```

 %% Cell type:markdown id: tags:

 ## Reading control data

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 gain_runs = dict()

 med_low_settings = []

 ctrl_src = ctrl_source_template.format(karabo_id_control)

+run_nums = jungfraulib.sort_runs_by_gain(
+    raw_folder=in_folder,
+    runs=run_nums,
+    ctrl_src=ctrl_src,
+    )
+
 for gain, run_n in enumerate(run_nums):
    run_dc = RunDirectory(f"{in_folder}/r{run_n:04d}/")
    gain_runs[run_n] = [gain, run_dc]
    ctrl_data = jungfraulib.JungfrauCtrl(run_dc, ctrl_src)
    # Read control data for the high gain run only.
-    if run_n == run_high:
+    if gain == 0:

        run_mcells, sc_start = ctrl_data.get_memory_cells()

        if not manual_slow_data:
            integration_time = ctrl_data.get_integration_time()
            bias_voltage = ctrl_data.get_bias_voltage()
            gain_setting = ctrl_data.get_gain_setting()
            print(f"Gain setting is {gain_setting} ({ctrl_data.run_settings})")
            print(f"Integration time is {integration_time} us")
            print(f"Bias voltage is {bias_voltage} V")
        if run_mcells == 1:
            memory_cells = 1
            print('Dark runs in single cell mode, '
                  f'storage cell start: {sc_start:02d}')
        else:
            memory_cells = 16
            print('Dark runs in burst mode, '
                  f'storage cell start: {sc_start:02d}')
    else:
        gain_mode = ctrl_data.get_gain_mode()
        med_low_settings.append(ctrl_data.run_mode)

-# A transperent workaround for old raw data with wrong/missing medium and low settings
-if med_low_settings == [None, None]:
-    warning("run.settings is not stored in the data to read. "
-            f"Hence assuming gain_mode = {gain_mode} for adaptive old data.")
-elif med_low_settings == ["dynamicgain", "forceswitchg1"]:
-    warning(f"run.settings for medium and low gain runs are wrong {med_low_settings}. "
-            f"This is an expected bug for old raw data. Setting gain_mode to {gain_mode}.")
-# Validate that low_med_settings is not a mix of adaptive and fixed settings.
-elif not (sorted(med_low_settings) in [fixed_settings, dynamic_settings, old_fixed_settings]):  # noqa
-    raise ValueError(
-        "Medium and low run settings are not as expected. "
-        f"Either {dynamic_settings}, {fixed_settings}, or {old_fixed_settings} are expected.\n"
-        f"Got {sorted(med_low_settings)} for both runs, respectively.")
-
 print(f"Gain mode is {gain_mode} ({med_low_settings})")

 step_timer.done_step(f'Reading control data.')
 ```

 %% Cell type:code id: tags:

 ``` python
 # set the operating condition
 condition = Conditions.Dark.jungfrau(
    memory_cells=memory_cells,
    bias_voltage=bias_voltage,
    integration_time=integration_time,
    gain_setting=gain_setting,
    gain_mode=gain_mode,
 )

 db_modules = get_pdu_from_db(
    karabo_id=karabo_id,
    karabo_da=karabo_da,
    constant=Constants.jungfrau.Offset(),
    condition=condition,
    cal_db_interface=cal_db_interface,
    snapshot_at=creation_time)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Start retrieving existing constants for comparison
 mod_x_const = [(mod, const) for const in ["Offset", "Noise", "BadPixelsDark"] for mod in karabo_da]

 from cal_tools.tools import get_from_db
 from datetime import timedelta

 def retrieve_old_constant(mod, const):
    dconst = getattr(Constants.jungfrau, const)()

    data, mdata = get_from_db(
        karabo_id=karabo_id,
        karabo_da=mod,
        constant=dconst,
        condition=condition,
        empty_constant=None,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time-timedelta(seconds=60) if creation_time else None,
        strategy="pdu_prior_in_time",
        verbosity=1,
        timeout=cal_db_timeout
    )

    if mdata is None or data is None:
        timestamp = "Not found"
        filepath = None
        h5path = None
    else:
        timestamp = mdata.calibration_constant_version.begin_at.isoformat()
        filepath = os.path.join(
            mdata.calibration_constant_version.hdf5path,
            mdata.calibration_constant_version.filename
        )
        h5path = mdata.calibration_constant_version.h5path

    return data, timestamp, filepath, h5path


 old_retrieval_pool = multiprocessing.Pool()
 old_retrieval_res = old_retrieval_pool.starmap_async(
    retrieve_old_constant, mod_x_const
 )
 old_retrieval_pool.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Use only high gain threshold for all gains in case of fixed_gain.

 if gain_mode:  # fixed_gain
    offset_abs_threshold = [[offset_abs_threshold_low[0]]*3, [offset_abs_threshold_high[0]]*3]
 else:
    offset_abs_threshold = [offset_abs_threshold_low, offset_abs_threshold_high]
 ```

 %% Cell type:code id: tags:

 ``` python
 context = psh.context.ThreadContext(num_workers=memory_cells)
 ```

 %% Cell type:code id: tags:

 ``` python
 """
 All jungfrau runs are taken through one acquisition, except for the forceswitch runs.
 While taking non-fixed dark runs, a procedure of multiple acquisitions is used to switch the storage cell indices.

 This is done for medium and low gain dark dynamic runs, only [forceswitchg1, forceswitchg2]:
 Switching the cell indices in burst mode is a work around for hardware procedure
 deficiency that produces wrong data for dark runs except for the first storage cell.
 This is why multiple acquisitions are taken to switch the used storage cells and
 acquire data through two cells for each of the 16 cells instead of acquiring darks through all 16 cells.
 """

 print(f"Maximum trains to process is set to {max_trains}")

 noise_map = dict()
 offset_map = dict()
 bad_pixels_map = dict()

 for mod in karabo_da:
    step_timer.start()
    instrument_src = instrument_source_template.format(
        karabo_id, receiver_template.format(int(mod[-2:])))

    print(f"\n- Instrument data path for {mod} is {instrument_src}.")

    offset_map[mod] = context.alloc(
        shape=(sensor_size+(memory_cells, 3)), fill=0, dtype=np.float32)
    noise_map[mod] = context.alloc(like=offset_map[mod], fill=0)
    bad_pixels_map[mod] = context.alloc(like=offset_map[mod], dtype=np.uint32, fill=0)

    for run_n, [gain, run_dc] in gain_runs.items():

        def process_cell(worker_id, array_index, cell_number):
            cell_slice_idx = acelltable == cell_number
            thiscell = images[..., cell_slice_idx]  # [1024, 512, n_trains]

            # Identify cells/trains with images of 0 pixels.
            # TODO: An investigation is ongoing by DET to identify reason for these empty images.
            nonzero_adc = np.any(thiscell != 0 , axis=(0, 1))  # [n_trains]

            # Exclude empty images with 0 pixels, before calculating offset and noise
            thiscell = thiscell[..., nonzero_adc]
            offset_map[mod][..., cell_number, gain] = np.mean(  # [1024, 512]
                thiscell, axis=2, dtype=np.float32)
            noise_map[mod][..., cell_number, gain] = np.std(  # [1024, 512]
                thiscell, axis=2, dtype=np.float32)
            del thiscell
            # Check if there are wrong bad gain values.
            # 1. Exclude empty images.
            # 2. Indicate pixels with wrong gain value for any train for each cell.
            # TODO: mean is used to use thresholds for accepting gain values, even if not 0 mean value.
            gain_avg = np.mean(  # [1024, 512]
                gain_vals[..., cell_slice_idx][..., nonzero_adc],
                axis=2, dtype=np.float32
            )

            # [1024, 512]
            bad_pixels_map[mod][..., cell_number, gain][gain_avg != raw_g] |= BadPixels.WRONG_GAIN_VALUE.value

        print(f"Gain stage {gain}, run {run_n}")

        # load shape of data for memory cells, and detector size (imgs, cells, x, y)
        n_trains = run_dc[instrument_src, "data.adc"].shape[0]
        # load number of data available, including trains with empty data.
        all_trains = len(run_dc.train_ids)
        instr_dc = run_dc.select(instrument_src, require_all=True)
        empty_trains = all_trains - n_trains
        if empty_trains != 0:
            print(f"{mod} has {empty_trains} empty trains out of {all_trains} trains")
        if max_trains > 0:
            n_trains = min(n_trains, max_trains)
        print(f"Processing {n_trains} images.")

        if n_trains == 0:
            raise ValueError(f"{run_n} has no trains to process.")

        if n_trains < min_trains:
            warning(f"Less than {min_trains} trains are available in RAW data.")

        # Select only requested number of images to process darks.
        instr_dc = instr_dc.select_trains(np.s_[:n_trains])
        images = np.transpose(
            instr_dc[instrument_src, "data.adc"].ndarray(), (3, 2, 1, 0))
        acelltable = np.transpose(instr_dc[instrument_src, "data.memoryCell"].ndarray())
        gain_vals = np.transpose(
            instr_dc[instrument_src, "data.gain"].ndarray(), (3, 2, 1, 0))

        # define gain value as saved in raw gain map
        raw_g = 3 if gain == 2 else gain

        if memory_cells == 1:
            acelltable -= sc_start
        # Only for dynamic medium and low gain runs [forceswitchg1, forceswitchg2] in burst mode.

        if gain_mode == 0 and gain > 0 and memory_cells == 16:
            # 255 similar to the receiver which uses the 255
            # value to indicate a cell without an image.
            # image shape for forceswitchg1 and forceswitchg2 = (1024, 512, 2, trains)
            # compared to expected shape of (1024, 512, 16, trains) for high gain run.
            acelltable[1:] = 255

        # Calculate offset and noise maps
        context.map(process_cell, range(memory_cells))
        del images
        del acelltable
        del gain_vals
    step_timer.done_step(f'Creating Offset and noise constants for a module.')
 ```

 %% Cell type:code id: tags:

 ``` python
 if detailed_report:
    display(Markdown("## Offset and Noise Maps:"))
    display(Markdown(
        "Below offset and noise maps for the high ($g_0$) gain stage are shown, "
        "alongside the distribution of these values. One expects block-like "
        "structures mapping to the ASICs of the detector"))
    g_name = ['G0', 'G1', 'G2']
    g_range = [(0, 8000), (8000, 16000), (8000, 16000)]
    n_range = [(0., 50.), (0., 50.), (0., 50.)]

    unit = '[ADCu]'
    # TODO: Fix plots arrangment and speed for Jungfrau burst mode.
    step_timer.start()
    for pdu, mod in zip(db_modules, karabo_da):
        for g_idx in gains:
            for cell in range(0, memory_cells):
                f_o0 = heatmapPlot(
                    np.swapaxes(offset_map[mod][..., cell, g_idx], 0, 1),
                    y_label="Row",
                    x_label="Column",
                    lut_label=unit,
                    aspect=1.,
                    vmin=g_range[g_idx][0],
                    vmax=g_range[g_idx][1],
                    title=f'Pedestal {g_name[g_idx]} - Cell {cell:02d} - Module {mod} ({pdu})')

                fo0, ax_o0 = plt.subplots()
                res_o0 = histPlot(
                    ax_o0, offset_map[mod][..., cell, g_idx],
                    bins=800,
                    range=g_range[g_idx],
                    facecolor='b',
                    histotype='stepfilled',
                )

                ax_o0.tick_params(axis='both',which='major',labelsize=15)
                ax_o0.set_title(
                    f'Module pedestal distribution - Cell {cell:02d} - Module {mod} ({pdu})',
                    fontsize=15)
                ax_o0.set_xlabel(f'Pedestal {g_name[g_idx]} {unit}',fontsize=15)
                ax_o0.set_yscale('log')

                f_n0 = heatmapPlot(
                    np.swapaxes(noise_map[mod][..., cell, g_idx], 0, 1),
                    y_label="Row",
                    x_label="Column",
                    lut_label= unit,
                    aspect=1.,
                    vmin=n_range[g_idx][0],
                    vmax=n_range[g_idx][1],
                    title=f"RMS noise {g_name[g_idx]} - Cell {cell:02d} - Module {mod} ({pdu})",
                )

                fn0, ax_n0 = plt.subplots()
                res_n0 = histPlot(
                    ax_n0,
                    noise_map[mod][..., cell, g_idx],
                    bins=100,
                    range=n_range[g_idx],
                    facecolor='b',
                    histotype='stepfilled',
                )

                ax_n0.tick_params(axis='both', which='major', labelsize=15)
                ax_n0.set_title(
                    f'Module noise distribution - Cell {cell:02d} - Module {mod} ({pdu})',
                    fontsize=15)
                ax_n0.set_xlabel(
                    f'RMS noise {g_name[g_idx]} ' + unit, fontsize=15)
                plt.show()
    step_timer.done_step(f'Plotting offset and noise maps.')
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixel Map ###

 The bad pixel map is deduced by comparing offset and noise of each pixel ($v_i$) and each gain ($g$) against the median value for that gain stage:

 $$
 v_i > \mathrm{median}(v_{k,g}) + n \sigma_{v_{k,g}}
 $$
 or
 $$
 v_i < \mathrm{median}(v_{k,g}) - n \sigma_{v_{k,g}}
 $$

 Values are encoded in a 32 bit mask, where for the dark image deduced bad pixels the following non-zero entries are relevant:

 %% Cell type:code id: tags:

 ``` python
 def print_bp_entry(bp):
    print("{:<30s} {:032b} -> {}".format(bp.name, bp.value, int(bp.value)))

 print_bp_entry(BadPixels.OFFSET_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.NOISE_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.OFFSET_NOISE_EVAL_ERROR)
 print_bp_entry(BadPixels.WRONG_GAIN_VALUE)

 def eval_bpidx(d):

    mdn = np.nanmedian(d, axis=(0, 1))[None, None, :, :]
    std = np.nanstd(d, axis=(0, 1))[None, None, :, :]
    idx = (d > badpixel_threshold_sigma*std+mdn) | (d < (-badpixel_threshold_sigma)*std+mdn)

    return idx
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 for pdu, mod in zip(db_modules, karabo_da):
    display(Markdown(f"### Badpixels for module {mod} ({pdu}):"))
    offset_abs_threshold = np.array(offset_abs_threshold)

    bad_pixels_map[mod][eval_bpidx(offset_map[mod])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value

    bad_pixels_map[mod][~np.isfinite(offset_map[mod])] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    bad_pixels_map[mod][eval_bpidx(noise_map[mod])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value

    bad_pixels_map[mod][~np.isfinite(noise_map[mod])] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    bad_pixels_map[mod][(offset_map[mod] < offset_abs_threshold[0][None, None, None, :]) | (offset_map[mod] > offset_abs_threshold[1][None, None, None, :])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value  # noqa

    if detailed_report:

        for g_idx in gains:
            for cell in range(memory_cells):
                bad_pixels = bad_pixels_map[mod][:, :, cell, g_idx]
                fn_0 = heatmapPlot(
                    np.swapaxes(bad_pixels, 0, 1),
                    y_label="Row",
                    x_label="Column",
                    lut_label=f"Badpixels {g_name[g_idx]} [ADCu]",
                    aspect=1.,
                    vmin=0, vmax=5,
                    title=f'G{g_idx} Bad pixel map - Cell {cell:02d} - Module {mod} ({pdu})')
 step_timer.done_step(f'Creating bad pixels constant')
 ```

 %% Cell type:markdown id: tags:

 ## Inject and save calibration constants

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 for mod, db_mod in zip(karabo_da, db_modules):
    constants = {
        'Offset': np.moveaxis(offset_map[mod], 0, 1),
        'Noise': np.moveaxis(noise_map[mod], 0, 1),
        'BadPixelsDark': np.moveaxis(bad_pixels_map[mod], 0, 1),
    }

    md = None

    for key, const_data in constants.items():

        const =  getattr(Constants.jungfrau, key)()
        const.data = const_data

        for parm in condition.parameters:
            if parm.name == "Integration Time":
                parm.lower_deviation = time_limits
                parm.upper_deviation = time_limits

        if db_output:
            md = send_to_db(
                db_module=db_mod,
                karabo_id=karabo_id,
                constant=const,
                condition=condition,
                file_loc=file_loc,
                report_path=report,
                cal_db_interface=cal_db_interface,
                creation_time=creation_time,
                timeout=cal_db_timeout,
            )
        if local_output:
            md = save_const_to_h5(
                db_module=db_mod,
                karabo_id=karabo_id,
                constant=const,
                condition=condition,
                data=const.data,
                file_loc=file_loc,
                report=report,
                creation_time=creation_time,
                out_folder=out_folder,
            )
            print(f"Calibration constant {key} is stored locally at {out_folder}.\n")

 print("Constants parameter conditions are:\n")
 print(
    f"• Bias voltage: {bias_voltage}\n"
    f"• Memory cells: {memory_cells}\n"
    f"• Integration time: {integration_time}\n"
    f"• Gain setting: {gain_setting}\n"
    f"• Gain mode: {gain_mode}\n"
    f"• Creation time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")  # noqa
 step_timer.done_step("Injecting constants.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 # now we need the old constants
 old_const = {}
 old_mdata = {}
 old_retrieval_res.wait()

 for (mod, const), (data, timestamp, filepath, h5path) in zip(
    mod_x_const, old_retrieval_res.get()):
    old_const.setdefault(mod, {})[const] = data
    old_mdata.setdefault(mod, {})[const] = {
        "timestamp": timestamp,
        "filepath": filepath,
        "h5path": h5path,
    }
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown("## The following pre-existing constants are used for comparison:"))

 for mod, consts in old_mdata.items():
    pdu = db_modules[karabo_da.index(mod)]
    display(Markdown(f"- {mod} ({pdu})"))
    for const in consts:
        display(Markdown(f"    - {const} at {consts[const]['timestamp']}"))
    # saving locations of old constants for summary notebook
    with open(f"{metadata_folder or out_folder}/module_metadata_{mod}.yml", "w") as fd:
        yaml.safe_dump(
            {
                "module": mod,
                "pdu": pdu,
                "old-constants": old_mdata[mod],
            },
            fd,
        )
 ```

 %% Cell type:markdown id: tags:

 # Jungfrau Dark Image Characterization #

 Author: European XFEL Detector Group, Version: 2.0

 Analyzes Jungfrau dark image data to deduce offset, noise and resulting bad pixel maps

 %% Cell type:code id: tags:

 ``` python
 in_folder = '/gpfs/exfel/exp/SPB/202130/p900204/raw/'  # folder under which runs are located, required
 out_folder = '/gpfs/exfel/data/scratch/ahmedk/test/remove' # path to place reports at, required
 metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run_high = 141 # run number for G0 dark run, required
 run_med = 142 # run number for G1 dark run, required
 run_low = 143 # run number for G2 dark run, required

 # Parameters used to access raw data.
 karabo_da = ['JNGFR01', 'JNGFR02','JNGFR03','JNGFR04', 'JNGFR05', 'JNGFR06','JNGFR07','JNGFR08'] # list of data aggregators, which corresponds to different JF modules
 karabo_id = 'SPB_IRDA_JF4M'  # karabo_id (detector identifier) prefix of Jungfrau detector to process.
 karabo_id_control = ''  # if control is on a different ID, set to empty string if it is the same a karabo-id
 receiver_template = 'JNGFR{:02}' # inset for receiver devices
 instrument_source_template = '{}/DET/{}:daqOutput'  # template for instrument source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = '{}/DET/CONTROL'  # template for control source name (filled with karabo_id_control)

 # Parameters for calibration database and storing constants.
 use_dir_creation_date = True  # use dir creation date
 cal_db_interface = 'tcp://max-exfl-cal001:8016#8045'  # calibrate db interface to connect to
 cal_db_timeout = 300000 # timeout on caldb requests
 local_output = True  # output constants locally
 db_output = False  # output constants to database

 # Parameters affecting creating dark calibration constants.
 badpixel_threshold_sigma = 5.  # bad pixels defined by values outside n times this std from median
 offset_abs_threshold_low = [1000, 10000, 10000]  # absolute bad pixel threshold in terms of offset, lower values
 offset_abs_threshold_high = [8000, 15000, 15000]  # absolute bad pixel threshold in terms of offset, upper values
 max_trains = 1000  # Maximum trains to process darks. Set to 0 to process all available train images. 1000 trains is enough resolution to create the dark constants
 min_trains = 100  # Minimum number of trains to process dark constants. Raise a warning if the run has fewer trains.
 manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values
 time_limits = 0.025  # to find calibration constants later on, the integration time is allowed to vary by 0.5 us

 # Parameters to be used for injecting dark calibration constants.
 integration_time = 1000 # integration time in us, will be overwritten by value in file
 gain_setting = 0  # 0 for dynamic, forceswitchg1, forceswitchg2, 1 for dynamichg0, fixgain1, fixgain2. Will be overwritten by value in file
 gain_mode = 0  # 1 if medium and low runs are  fixgain1 and fixgain2, otherwise 0. It will be overwritten by value in file, if manual_slow_data
 bias_voltage = 90  # sensor bias voltage in V, will be overwritten by value in file
 memory_cells = 16  # number of memory cells

 # Parameters used for plotting
 detailed_report = False

 # TODO: this is used for only Warning check at AGIPD dark.
 # Need to rethink if it makes sense to use it here as well.
 operation_mode = 'ADAPTIVE_GAIN'  # Detector operation mode, optional
 ```

 %% Cell type:code id: tags:

 ``` python
 import os
 import warnings
 from logging import warning
 warnings.filterwarnings('ignore')

 import matplotlib
 import matplotlib.pyplot as plt
 import multiprocessing
 import numpy as np
 import pasha as psh
 import yaml
 from IPython.display import Markdown, display
 from extra_data import RunDirectory

 matplotlib.use('agg')
 %matplotlib inline

 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.histogram import histPlot
 from cal_tools import jungfraulib, step_timing
 from cal_tools.enums import BadPixels, JungfrauGainMode
 from cal_tools.tools import (
    get_dir_creation_date,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    save_const_to_h5,
    send_to_db,
 )
 from iCalibrationDB import Conditions, Constants
 ```

 %% Cell type:code id: tags:

 ``` python
 # Constants relevant for the analysis
 run_nums = [run_high, run_med, run_low]  # run number for G0/HG0, G1, G2
 sensor_size = (1024, 512)
 gains = [0, 1, 2]

 fixed_settings = [
    JungfrauGainMode.FIX_GAIN_1.value, JungfrauGainMode.FIX_GAIN_2.value]
 dynamic_settings = [
    JungfrauGainMode.FORCE_SWITCH_HG1.value, JungfrauGainMode.FORCE_SWITCH_HG2.value]
 old_fixed_settings = ["fixgain1", "fixgain2"]

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)
    print(f"Using {creation_time} as creation time")
 os.makedirs(out_folder, exist_ok=True)

 cal_db_interface = get_random_db_interface(cal_db_interface)
 print(f'Calibration database interface: {cal_db_interface}')

 if karabo_id_control == "":
    karabo_id_control = karabo_id
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = f"proposal:{proposal} runs:{run_high} {run_med} {run_low}"

 report = get_report(metadata_folder)

 step_timer = step_timing.StepTimer()
 ```

 %% Cell type:markdown id: tags:

 ## Reading control data

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 gain_runs = dict()

 med_low_settings = []

 ctrl_src = ctrl_source_template.format(karabo_id_control)

+run_nums = jungfraulib.sort_runs_by_gain(
+    raw_folder=in_folder,
+    runs=run_nums,
+    ctrl_src=ctrl_src,
+    )
+
 for gain, run_n in enumerate(run_nums):
    run_dc = RunDirectory(f"{in_folder}/r{run_n:04d}/")
    gain_runs[run_n] = [gain, run_dc]
    ctrl_data = jungfraulib.JungfrauCtrl(run_dc, ctrl_src)
    # Read control data for the high gain run only.
-    if run_n == run_high:
+    if gain == 0:

        run_mcells, sc_start = ctrl_data.get_memory_cells()

        if not manual_slow_data:
            integration_time = ctrl_data.get_integration_time()
            bias_voltage = ctrl_data.get_bias_voltage()
            gain_setting = ctrl_data.get_gain_setting()
            print(f"Gain setting is {gain_setting} ({ctrl_data.run_settings})")
            print(f"Integration time is {integration_time} us")
            print(f"Bias voltage is {bias_voltage} V")
        if run_mcells == 1:
            memory_cells = 1
            print('Dark runs in single cell mode, '
                  f'storage cell start: {sc_start:02d}')
        else:
            memory_cells = 16
            print('Dark runs in burst mode, '
                  f'storage cell start: {sc_start:02d}')
    else:
        gain_mode = ctrl_data.get_gain_mode()
        med_low_settings.append(ctrl_data.run_mode)

-# A transperent workaround for old raw data with wrong/missing medium and low settings
-if med_low_settings == [None, None]:
-    warning("run.settings is not stored in the data to read. "
-            f"Hence assuming gain_mode = {gain_mode} for adaptive old data.")
-elif med_low_settings == ["dynamicgain", "forceswitchg1"]:
-    warning(f"run.settings for medium and low gain runs are wrong {med_low_settings}. "
-            f"This is an expected bug for old raw data. Setting gain_mode to {gain_mode}.")
-# Validate that low_med_settings is not a mix of adaptive and fixed settings.
-elif not (sorted(med_low_settings) in [fixed_settings, dynamic_settings, old_fixed_settings]):  # noqa
-    raise ValueError(
-        "Medium and low run settings are not as expected. "
-        f"Either {dynamic_settings}, {fixed_settings}, or {old_fixed_settings} are expected.\n"
-        f"Got {sorted(med_low_settings)} for both runs, respectively.")
-
 print(f"Gain mode is {gain_mode} ({med_low_settings})")

 step_timer.done_step(f'Reading control data.')
 ```

 %% Cell type:code id: tags:

 ``` python
 # set the operating condition
 condition = Conditions.Dark.jungfrau(
    memory_cells=memory_cells,
    bias_voltage=bias_voltage,
    integration_time=integration_time,
    gain_setting=gain_setting,
    gain_mode=gain_mode,
 )

 db_modules = get_pdu_from_db(
    karabo_id=karabo_id,
    karabo_da=karabo_da,
    constant=Constants.jungfrau.Offset(),
    condition=condition,
    cal_db_interface=cal_db_interface,
    snapshot_at=creation_time)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Start retrieving existing constants for comparison
 mod_x_const = [(mod, const) for const in ["Offset", "Noise", "BadPixelsDark"] for mod in karabo_da]

 from cal_tools.tools import get_from_db
 from datetime import timedelta

 def retrieve_old_constant(mod, const):
    dconst = getattr(Constants.jungfrau, const)()

    data, mdata = get_from_db(
        karabo_id=karabo_id,
        karabo_da=mod,
        constant=dconst,
        condition=condition,
        empty_constant=None,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time-timedelta(seconds=60) if creation_time else None,
        strategy="pdu_prior_in_time",
        verbosity=1,
        timeout=cal_db_timeout
    )

    if mdata is None or data is None:
        timestamp = "Not found"
        filepath = None
        h5path = None
    else:
        timestamp = mdata.calibration_constant_version.begin_at.isoformat()
        filepath = os.path.join(
            mdata.calibration_constant_version.hdf5path,
            mdata.calibration_constant_version.filename
        )
        h5path = mdata.calibration_constant_version.h5path

    return data, timestamp, filepath, h5path


 old_retrieval_pool = multiprocessing.Pool()
 old_retrieval_res = old_retrieval_pool.starmap_async(
    retrieve_old_constant, mod_x_const
 )
 old_retrieval_pool.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Use only high gain threshold for all gains in case of fixed_gain.

 if gain_mode:  # fixed_gain
    offset_abs_threshold = [[offset_abs_threshold_low[0]]*3, [offset_abs_threshold_high[0]]*3]
 else:
    offset_abs_threshold = [offset_abs_threshold_low, offset_abs_threshold_high]
 ```

 %% Cell type:code id: tags:

 ``` python
 context = psh.context.ThreadContext(num_workers=memory_cells)
 ```

 %% Cell type:code id: tags:

 ``` python
 """
 All jungfrau runs are taken through one acquisition, except for the forceswitch runs.
 While taking non-fixed dark runs, a procedure of multiple acquisitions is used to switch the storage cell indices.

 This is done for medium and low gain dark dynamic runs, only [forceswitchg1, forceswitchg2]:
 Switching the cell indices in burst mode is a work around for hardware procedure
 deficiency that produces wrong data for dark runs except for the first storage cell.
 This is why multiple acquisitions are taken to switch the used storage cells and
 acquire data through two cells for each of the 16 cells instead of acquiring darks through all 16 cells.
 """

 print(f"Maximum trains to process is set to {max_trains}")

 noise_map = dict()
 offset_map = dict()
 bad_pixels_map = dict()

 for mod in karabo_da:
    step_timer.start()
    instrument_src = instrument_source_template.format(
        karabo_id, receiver_template.format(int(mod[-2:])))

    print(f"\n- Instrument data path for {mod} is {instrument_src}.")

    offset_map[mod] = context.alloc(
        shape=(sensor_size+(memory_cells, 3)), fill=0, dtype=np.float32)
    noise_map[mod] = context.alloc(like=offset_map[mod], fill=0)
    bad_pixels_map[mod] = context.alloc(like=offset_map[mod], dtype=np.uint32, fill=0)

    for run_n, [gain, run_dc] in gain_runs.items():

        def process_cell(worker_id, array_index, cell_number):
            cell_slice_idx = acelltable == cell_number
            thiscell = images[..., cell_slice_idx]  # [1024, 512, n_trains]

            # Identify cells/trains with images of 0 pixels.
            # TODO: An investigation is ongoing by DET to identify reason for these empty images.
            nonzero_adc = np.any(thiscell != 0 , axis=(0, 1))  # [n_trains]

            # Exclude empty images with 0 pixels, before calculating offset and noise
            thiscell = thiscell[..., nonzero_adc]
            offset_map[mod][..., cell_number, gain] = np.mean(  # [1024, 512]
                thiscell, axis=2, dtype=np.float32)
            noise_map[mod][..., cell_number, gain] = np.std(  # [1024, 512]
                thiscell, axis=2, dtype=np.float32)
            del thiscell
            # Check if there are wrong bad gain values.
            # 1. Exclude empty images.
            # 2. Indicate pixels with wrong gain value for any train for each cell.
            # TODO: mean is used to use thresholds for accepting gain values, even if not 0 mean value.
            gain_avg = np.mean(  # [1024, 512]
                gain_vals[..., cell_slice_idx][..., nonzero_adc],
                axis=2, dtype=np.float32
            )

            # [1024, 512]
            bad_pixels_map[mod][..., cell_number, gain][gain_avg != raw_g] |= BadPixels.WRONG_GAIN_VALUE.value

        print(f"Gain stage {gain}, run {run_n}")

        # load shape of data for memory cells, and detector size (imgs, cells, x, y)
        n_trains = run_dc[instrument_src, "data.adc"].shape[0]
        # load number of data available, including trains with empty data.
        all_trains = len(run_dc.train_ids)
        instr_dc = run_dc.select(instrument_src, require_all=True)
        empty_trains = all_trains - n_trains
        if empty_trains != 0:
            print(f"{mod} has {empty_trains} empty trains out of {all_trains} trains")
        if max_trains > 0:
            n_trains = min(n_trains, max_trains)
        print(f"Processing {n_trains} images.")

        if n_trains == 0:
            raise ValueError(f"{run_n} has no trains to process.")

        if n_trains < min_trains:
            warning(f"Less than {min_trains} trains are available in RAW data.")

        # Select only requested number of images to process darks.
        instr_dc = instr_dc.select_trains(np.s_[:n_trains])
        images = np.transpose(
            instr_dc[instrument_src, "data.adc"].ndarray(), (3, 2, 1, 0))
        acelltable = np.transpose(instr_dc[instrument_src, "data.memoryCell"].ndarray())
        gain_vals = np.transpose(
            instr_dc[instrument_src, "data.gain"].ndarray(), (3, 2, 1, 0))

        # define gain value as saved in raw gain map
        raw_g = 3 if gain == 2 else gain

        if memory_cells == 1:
            acelltable -= sc_start
        # Only for dynamic medium and low gain runs [forceswitchg1, forceswitchg2] in burst mode.

        if gain_mode == 0 and gain > 0 and memory_cells == 16:
            # 255 similar to the receiver which uses the 255
            # value to indicate a cell without an image.
            # image shape for forceswitchg1 and forceswitchg2 = (1024, 512, 2, trains)
            # compared to expected shape of (1024, 512, 16, trains) for high gain run.
            acelltable[1:] = 255

        # Calculate offset and noise maps
        context.map(process_cell, range(memory_cells))
        del images
        del acelltable
        del gain_vals
    step_timer.done_step(f'Creating Offset and noise constants for a module.')
 ```

 %% Cell type:code id: tags:

 ``` python
 if detailed_report:
    display(Markdown("## Offset and Noise Maps:"))
    display(Markdown(
        "Below offset and noise maps for the high ($g_0$) gain stage are shown, "
        "alongside the distribution of these values. One expects block-like "
        "structures mapping to the ASICs of the detector"))
    g_name = ['G0', 'G1', 'G2']
    g_range = [(0, 8000), (8000, 16000), (8000, 16000)]
    n_range = [(0., 50.), (0., 50.), (0., 50.)]

    unit = '[ADCu]'
    # TODO: Fix plots arrangment and speed for Jungfrau burst mode.
    step_timer.start()
    for pdu, mod in zip(db_modules, karabo_da):
        for g_idx in gains:
            for cell in range(0, memory_cells):
                f_o0 = heatmapPlot(
                    np.swapaxes(offset_map[mod][..., cell, g_idx], 0, 1),
                    y_label="Row",
                    x_label="Column",
                    lut_label=unit,
                    aspect=1.,
                    vmin=g_range[g_idx][0],
                    vmax=g_range[g_idx][1],
                    title=f'Pedestal {g_name[g_idx]} - Cell {cell:02d} - Module {mod} ({pdu})')

                fo0, ax_o0 = plt.subplots()
                res_o0 = histPlot(
                    ax_o0, offset_map[mod][..., cell, g_idx],
                    bins=800,
                    range=g_range[g_idx],
                    facecolor='b',
                    histotype='stepfilled',
                )

                ax_o0.tick_params(axis='both',which='major',labelsize=15)
                ax_o0.set_title(
                    f'Module pedestal distribution - Cell {cell:02d} - Module {mod} ({pdu})',
                    fontsize=15)
                ax_o0.set_xlabel(f'Pedestal {g_name[g_idx]} {unit}',fontsize=15)
                ax_o0.set_yscale('log')

                f_n0 = heatmapPlot(
                    np.swapaxes(noise_map[mod][..., cell, g_idx], 0, 1),
                    y_label="Row",
                    x_label="Column",
                    lut_label= unit,
                    aspect=1.,
                    vmin=n_range[g_idx][0],
                    vmax=n_range[g_idx][1],
                    title=f"RMS noise {g_name[g_idx]} - Cell {cell:02d} - Module {mod} ({pdu})",
                )

                fn0, ax_n0 = plt.subplots()
                res_n0 = histPlot(
                    ax_n0,
                    noise_map[mod][..., cell, g_idx],
                    bins=100,
                    range=n_range[g_idx],
                    facecolor='b',
                    histotype='stepfilled',
                )

                ax_n0.tick_params(axis='both', which='major', labelsize=15)
                ax_n0.set_title(
                    f'Module noise distribution - Cell {cell:02d} - Module {mod} ({pdu})',
                    fontsize=15)
                ax_n0.set_xlabel(
                    f'RMS noise {g_name[g_idx]} ' + unit, fontsize=15)
                plt.show()
    step_timer.done_step(f'Plotting offset and noise maps.')
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixel Map ###

 The bad pixel map is deduced by comparing offset and noise of each pixel ($v_i$) and each gain ($g$) against the median value for that gain stage:

 $$
 v_i > \mathrm{median}(v_{k,g}) + n \sigma_{v_{k,g}}
 $$
 or
 $$
 v_i < \mathrm{median}(v_{k,g}) - n \sigma_{v_{k,g}}
 $$

 Values are encoded in a 32 bit mask, where for the dark image deduced bad pixels the following non-zero entries are relevant:

 %% Cell type:code id: tags:

 ``` python
 def print_bp_entry(bp):
    print("{:<30s} {:032b} -> {}".format(bp.name, bp.value, int(bp.value)))

 print_bp_entry(BadPixels.OFFSET_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.NOISE_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.OFFSET_NOISE_EVAL_ERROR)
 print_bp_entry(BadPixels.WRONG_GAIN_VALUE)

 def eval_bpidx(d):

    mdn = np.nanmedian(d, axis=(0, 1))[None, None, :, :]
    std = np.nanstd(d, axis=(0, 1))[None, None, :, :]
    idx = (d > badpixel_threshold_sigma*std+mdn) | (d < (-badpixel_threshold_sigma)*std+mdn)

    return idx
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 for pdu, mod in zip(db_modules, karabo_da):
    display(Markdown(f"### Badpixels for module {mod} ({pdu}):"))
    offset_abs_threshold = np.array(offset_abs_threshold)

    bad_pixels_map[mod][eval_bpidx(offset_map[mod])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value

    bad_pixels_map[mod][~np.isfinite(offset_map[mod])] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    bad_pixels_map[mod][eval_bpidx(noise_map[mod])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value

    bad_pixels_map[mod][~np.isfinite(noise_map[mod])] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    bad_pixels_map[mod][(offset_map[mod] < offset_abs_threshold[0][None, None, None, :]) | (offset_map[mod] > offset_abs_threshold[1][None, None, None, :])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value  # noqa

    if detailed_report:

        for g_idx in gains:
            for cell in range(memory_cells):
                bad_pixels = bad_pixels_map[mod][:, :, cell, g_idx]
                fn_0 = heatmapPlot(
                    np.swapaxes(bad_pixels, 0, 1),
                    y_label="Row",
                    x_label="Column",
                    lut_label=f"Badpixels {g_name[g_idx]} [ADCu]",
                    aspect=1.,
                    vmin=0, vmax=5,
                    title=f'G{g_idx} Bad pixel map - Cell {cell:02d} - Module {mod} ({pdu})')
 step_timer.done_step(f'Creating bad pixels constant')
 ```

 %% Cell type:markdown id: tags:

 ## Inject and save calibration constants

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 for mod, db_mod in zip(karabo_da, db_modules):
    constants = {
        'Offset': np.moveaxis(offset_map[mod], 0, 1),
        'Noise': np.moveaxis(noise_map[mod], 0, 1),
        'BadPixelsDark': np.moveaxis(bad_pixels_map[mod], 0, 1),
    }

    md = None

    for key, const_data in constants.items():

        const =  getattr(Constants.jungfrau, key)()
        const.data = const_data

        for parm in condition.parameters:
            if parm.name == "Integration Time":
                parm.lower_deviation = time_limits
                parm.upper_deviation = time_limits

        if db_output:
            md = send_to_db(
                db_module=db_mod,
                karabo_id=karabo_id,
                constant=const,
                condition=condition,
                file_loc=file_loc,
                report_path=report,
                cal_db_interface=cal_db_interface,
                creation_time=creation_time,
                timeout=cal_db_timeout,
            )
        if local_output:
            md = save_const_to_h5(
                db_module=db_mod,
                karabo_id=karabo_id,
                constant=const,
                condition=condition,
                data=const.data,
                file_loc=file_loc,
                report=report,
                creation_time=creation_time,
                out_folder=out_folder,
            )
            print(f"Calibration constant {key} is stored locally at {out_folder}.\n")

 print("Constants parameter conditions are:\n")
 print(
    f"• Bias voltage: {bias_voltage}\n"
    f"• Memory cells: {memory_cells}\n"
    f"• Integration time: {integration_time}\n"
    f"• Gain setting: {gain_setting}\n"
    f"• Gain mode: {gain_mode}\n"
    f"• Creation time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")  # noqa
 step_timer.done_step("Injecting constants.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 # now we need the old constants
 old_const = {}
 old_mdata = {}
 old_retrieval_res.wait()

 for (mod, const), (data, timestamp, filepath, h5path) in zip(
    mod_x_const, old_retrieval_res.get()):
    old_const.setdefault(mod, {})[const] = data
    old_mdata.setdefault(mod, {})[const] = {
        "timestamp": timestamp,
        "filepath": filepath,
        "h5path": h5path,
    }
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown("## The following pre-existing constants are used for comparison:"))

 for mod, consts in old_mdata.items():
    pdu = db_modules[karabo_da.index(mod)]
    display(Markdown(f"- {mod} ({pdu})"))
    for const in consts:
        display(Markdown(f"    - {const} at {consts[const]['timestamp']}"))
    # saving locations of old constants for summary notebook
    with open(f"{metadata_folder or out_folder}/module_metadata_{mod}.yml", "w") as fd:
        yaml.safe_dump(
            {
                "module": mod,
                "pdu": pdu,
                "old-constants": old_mdata[mod],
            },
            fd,
        )
 ```

--- a/notebooks/REMI/REMI_Digitize_and_Transform.ipynb
+++ b/notebooks/REMI/REMI_Digitize_and_Transform.ipynb
 %% Cell type:code id: tags:

 ``` python
 # Data selection parameters.
 run = 104  # Run ID.
 in_folder = '/gpfs/exfel/exp/SQS/202101/p002535/raw'  # Partial input path appended with run ID.
 out_folder = '/gpfs/exfel/exp/SQS/202101/p002535/scratch/cal_test'  # Full path to output folder.

 calib_config_path = '/gpfs/exfel/exp/SQS/202101/p002535/usr/config_board2+4.yaml'  # Path to correction and transform configuration

 # These parameters are required by xfel-calibrate but ignored in this notebook.
 cycle = ''  # Proposal cycle, currently not used.
 cal_db_timeout = 0  # Calibration DB timeout, currently not used.
 cal_db_interface = 'foo'  # Calibration DB interface, currently not used.
 karabo_da = 'bar'  # Karabo data aggregator name, currently not used

 # Output parameters.
 karabo_id = 'SQS_REMI_DLD6'  # Karabo device ID root for virtual output device.
 proposal = ''  # Proposal, leave empty for auto detection based on in_folder
 out_aggregator = 'REMI01'  # Aggregator name for output files.
 out_seq_len = 5000  # Number of trains per sequence file in output.
 det_device_id = '{karabo_id}/DET/{det_name}'  # Karabo device ID for virtual output device.
 det_output_key = 'output'  # Pipeline name for fast data output.
 save_raw_triggers = True  # Whether to save trigger position in files.
 save_raw_edges = True  # Whether to save digitized edge positions in files.
 save_raw_amplitudes = True  # Whether to save analog pulse amplitudes in files.
 save_rec_signals = True  # Whether to save reconstructed signals (u1-w2, mcp) in files.
 save_rec_hits = True  # Whether to save reoncstructed hits (x,y,t,m) in files.
 chunks_triggers = [500]  # HDF chunk size for triggers.
 chunks_edges = [500, 7, 50]  # HDF chunk size for edges.
 chunks_amplitudes = [500, 7, 50]  # HDF chunk size for amplitudes.
 chunks_hits = [50, 50]  # HDF chunk size for hits.
 chunks_signals = [50, 50]  # HDF chunk size for signals.
 dataset_compression = 'gzip'  # HDF compression method.
 dataset_compression_opts = 3  # HDF GZIP compression level.

 # Detector parameters.
 quad_anode = False  # Reconstruction assumes a hex anode by default, change for quad anodes.

 # Trigger parameters.
 ppt_source = 'SQS_RR_UTC/TSYS/TIMESERVER:outputBunchPattern'
 ignore_fel = False  # Ignore any FEL entries in the PPT.
 ignore_ppl = False  # Ignore any PPL entries in the PPT.
 ppl_offset = 0  # In units of the PPT.
 laser_ppt_mask = -1  # Bit mask for used laser, negative to auto-detect from instrument.
 instrument_sase = 3  # Which SASE we're running at for PPT decoding.
 first_pulse_offset = 10000  # Sample position where the first pulse begins, ignored when PPT is reconstructed.
 single_pulse_length = 25000  # How many samples if there's only one pulse.
 pulse_start_offset = 0  # Signal offset at the start of each pulse.
 pulse_end_offset = 0  # Signal offset at the end of each pulse.

 # PPT reconstruction parameters.
 reconstruct_ppt = False  # Reconstruct PPT from some trigger edges.
 trigger_edge_channel = '4_D'  # Channel to use for triggering.
 trigger_edge_offset = 0  # Offset to apply to the first trigger edge position to compute first pulse offset.
 fake_ppt_offset = 0  # Offset in reconstructed PPT for pulses.

 # Parallelization parameters.
 mp_find_triggers = 0.5  # Parallelization for finding triggers.
 mp_find_edges = 0.5  # Parallelization for digitizing analog signal.
 mt_avg_trace = 2  # Parallelization for trace averaging.
 mp_rec_hits = 1.0  # Parallelization for hit reconstruction.
 ```

 %% Cell type:code id: tags:

 ``` python
 from datetime import datetime
 from logging import warning
 from pathlib import Path
 import re

 import numpy as np
 import matplotlib.pyplot as plt
 from matplotlib.colors import LogNorm
 from threadpoolctl import threadpool_limits

 import h5py

 import pasha as psh
 from euxfel_bunch_pattern import indices_at_sase, indices_at_laser
 from extra_data import RunDirectory, by_id
 from extra_remi import Analysis, trigger_dt
 from extra_remi.util import timing
 from extra_remi.rd_resort import signal_dt, hit_dt
 from extra_remi.files import DataFile, sequence_pulses

 if quad_anode:
    from extra_remi.plots import plot_detector_diagnostics_quad as plot_detector_diagnostics
 else:
    from extra_remi.plots import plot_detector_diagnostics_hex as plot_detector_diagnostics

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 def finite_flattened_slice(array, slice_=np.s_[:]):
    """Return flattened and finite values for a given slice."""
    sliced_array = array[slice_]
    return sliced_array[np.isfinite(sliced_array)]
 ```

 %% Cell type:code id: tags:

 ``` python
 calib_config_path = Path(calib_config_path)

 if not calib_config_path.is_file():
    # If the path cannot be resolved right now, try the same path relative to in_folder.
    calib_config_path = Path(in_folder) / calib_config_path

    if not calib_config_path.is_file():
        # Disallow implicit config file creation.
        raise ValueError('calib_config_path not found - neither absolute nor relative to in_folder')

 remi = Analysis(calib_config_path, use_hex=not quad_anode)

 with timing('open_run'):
    dc = remi.prepare_dc(RunDirectory(Path(in_folder) / f'r{run:04d}', inc_suspect_trains=True),
                         require_ppt=not reconstruct_ppt)
 ```

 %% Cell type:markdown id: tags:

 # Transformation parameters

 %% Cell type:code id: tags:

 ``` python
 def print_leaf(leaf, indent=0):
    for key, value in leaf.items():
        if isinstance(value, dict):
            print(indent * 4 * ' ' + key)
            print_leaf(value, indent=indent+1)
        else:
            print(indent * 4 * ' ' + f'{key}: {value}')

 print_leaf(remi.tree)
 ```

 %% Cell type:markdown id: tags:

 # Pulse and trigger information

 %% Cell type:markdown id: tags:

 ### Read PPT from file or reconstruct PPT for older data

 %% Cell type:code id: tags:

 ``` python
 if reconstruct_ppt:
    # Take up to the first hundred trains for now.
    # Could be done for each train individually, but likely not necessary for now.
    trigger_trace = dc[remi['digitizer']['source'], remi['digitizer']['key_pattern'].format(trigger_edge_channel)] \
        [:100].ndarray().mean(axis=0).astype(np.float64)
    trigger_trace -= trigger_trace[0]  # Use simple offset correction.

    fake_ppt = np.zeros(2700, dtype=np.uint32)

    discr_func, discr_params = remi.get_discriminator([trigger_edge_channel])

    edges = np.zeros(1000, dtype=np.float64)
    num_pulses = discr_func(trigger_trace, edges=edges, **discr_params[0])
    edges = edges[:num_pulses]

    first_edge = edges[0]
    rel_edges = np.round(edges - first_edge)
    edge_diff = rel_edges[1] - rel_edges[0]

    if not np.allclose(rel_edges[1:] - rel_edges[:-1], edge_diff):
        raise ValueError('PPT reconstruction for unstable edge intervals not supported')

    pulse_spacing = edge_diff / (2 * remi['digitizer']['clock_factor'])  # In units of PPT

    if not float.is_integer(pulse_spacing):
        raise ValueError('PPT reconstruction encountered non-integer pulse spacing')

    pulse_spacing = int(pulse_spacing)

    # Taken from euxfel_bunch_pattern/__init__.py
    from euxfel_bunch_pattern import DESTINATION_T4D, DESTINATION_T5D, PHOTON_LINE_DEFLECTION
    if instrument_sase == 1:
        flag = DESTINATION_T4D
    elif instrument_sase == 2:
        flag = DESTINATION_T5D
    elif instrument_sase == 3:
        flag = DESTINATION_T4D | PHOTON_LINE_DEFLECTION

    first_pulse_offset = int(first_edge + trigger_edge_offset)  # Overwrite notebook argument.
    fake_ppt[fake_ppt_offset:fake_ppt_offset + (pulse_spacing * num_pulses):pulse_spacing] = flag

    from pasha.functor import Functor, gen_split_slices
    class FakeKeyDataFunctor(Functor):
        """Functor appearing KeyData-like with constant data.

        This functor serves a constant data row for a given number
        of train IDs the same way a KeyData object would.
        """

        def __init__(self, row, train_ids):
            self.row = row
            self.train_ids = train_ids

        def split(self, num_workers):
            return gen_split_slices(len(self.train_ids), n_parts=num_workers)

        def iterate(self, share):
            it = zip(range(*share.indices(len(self.train_ids))), self.train_ids)

            for index, train_id in it:
                yield index, train_id, self.row

    ppt_data = FakeKeyDataFunctor(fake_ppt, dc.train_ids)

    fig, ax = plt.subplots(num=99, figsize=(9, 6), clear=True, ncols=1, nrows=1)

    ax.set_title('Edge trigger signal')
    ax.plot(trigger_trace, lw=1, label=f'Mean {trigger_edge_channel} trace')
    ax.vlines(edges, trigger_trace.min()*1.1, trigger_trace.max()*1.1,
              color='red', linewidth=3, alpha=0.3, label='Edge positions')

    ax.set_xlabel('Samples')
    ax.set_ylabel('Intensity / ADU')
    ax.legend()

 else:
    ppt_data = dc[ppt_source, 'data.bunchPatternTable']
 ```

 %% Cell type:markdown id: tags:

 ### Count pulses per train and compute offsets

 %% Cell type:code id: tags:

 ``` python
 # Based on the pulse pattern tables, three global variables are obtained:
 #
 # * `pulse_counts [int32: len(dc.train_ids)]` containing the number of pulses per train.
 # * `pulse_offsets [int32: len(dc.train_ids)]` containing the global offset for the first pulse of each train.
 # * `num_pulses = pulse_counts.sum(axis=0)`

 def get_pulse_positions(ppt, sase, laser, ppl_offset):
    # Combine FEL and PPL positions.

    fel_pos = indices_at_sase(ppt, sase) if not ignore_fel else np.array([])
    ppl_pos = indices_at_laser(ppt, laser) if not ignore_ppl else np.array([])

    if len(fel_pos) > 0:
        # Move PPL up to the FEL position.
        ppl_pos += fel_pos[0] + ppl_offset

    return np.union1d(fel_pos, ppl_pos), fel_pos, ppl_pos

 if laser_ppt_mask < 0:
    # If laser PPT mask is not specified, try to figure it out from device IDs.
    from euxfel_bunch_pattern import PPL_BITS

    instrument = karabo_id[:karabo_id.index('_')]

    try:
        laser_ppt_mask = PPL_BITS[f'LP_{instrument}']
    except KeyError:
        raise ValueError(f'Laser PPT mask unknown for instrument `{instrument}`')

 with timing('pulse_info'):
    psh.set_default_context('processes', num_workers=remi.get_num_workers(mp_find_triggers))

    # Build the pulse index
    pulse_counts = psh.alloc(shape=len(dc.train_ids), dtype=np.uint64)
    has_ppt = psh.alloc(shape=len(dc.train_ids), dtype=bool, fill=False)

    def count_pulses(wid, index, tid, ppt):
        pulse_counts[index] = len(get_pulse_positions(ppt, instrument_sase, laser_ppt_mask, ppl_offset)[0])
        has_ppt[index] = True

    psh.map(count_pulses, ppt_data)

    # Fill any missing values with the highest.
    pulse_counts[has_ppt == False] = pulse_counts.max()

    # Compute offsets based on pulse counts.
    pulse_offsets = np.zeros_like(pulse_counts)
    np.cumsum(pulse_counts[:-1], out=pulse_offsets[1:])

    # Total number of pulses.
    num_pulses = int(pulse_counts.sum())
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(num=1, ncols=1, nrows=1, figsize=(9, 4), clear=True)

 ax.set_title('Pulse count')
 ax.plot(dc.train_ids, pulse_counts, lw=1)
 ax.set_xlabel('Train ID')
 ax.set_ylabel('Number of pulses')
 ax.set_ylim(0, max(300, pulse_counts.max() + 10))
 ax.ticklabel_format(style='plain')
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Find triggers

 The trigger defines the boundary of a pulse on the digitizer trace, which is stored per train.

 %% Cell type:code id: tags:

 ``` python
 # A trigger defines the boundary of a pulse on the digitizer trace stored per train. This cell creates a
 # global variable:
 # * `triggers [(start: int32, stop: int32, offset: float64, fel: bool, ppl: bool): num_pulses]`
 #   containing the triggers for each pulse.
 #
 # This uses the pulse puttern table to locate the pulse positions on the trace. Only number of pulses and
 # their distance can be drawn this way, leaving the absolute offset for the very first pulse to be
 # configured via `trigger/ppt/first_pulse_offset`. If a PPL is used, it will be included in the trigger
 # pattern. The ppt_offset parameter allows taking into account an offset betwen PPL and FEL.

 psh.set_default_context('processes', num_workers=remi.get_num_workers(mp_find_triggers))

 triggers = psh.alloc(shape=(num_pulses,), dtype=trigger_dt, fill=(-1, -1, np.nan, -1, 0, 0))

 clock_factor = remi['digitizer']['clock_factor']

 def trigger_by_ppt(worker_id, index, train_id, ppt):
    all_pos, fel_pos, ppl_pos = get_pulse_positions(ppt, instrument_sase, laser_ppt_mask, ppl_offset)
    num_pulses = len(all_pos)

    if num_pulses == 0:
        return
    elif len(ppl_pos) == 0 and ppl_offset < 0:
        # No PPL pulses, but a negative offset is configured. This will cause
        # first_pulse_offset to start early and most likely miss pulses at the
        # end, so we correct by adding the ppl_offset to relative positions
        # when computing trace positions.
        pos_corr = abs(ppl_offset)
    else:
        pos_corr = 0


    rel_pos = all_pos - all_pos[0]

    if num_pulses > 1:
        pulse_len = np.unique(rel_pos[1:] - rel_pos[:-1]).min()
    elif num_pulses == 1:
        pulse_len = single_pulse_length

    start_frac = first_pulse_offset + (rel_pos + pos_corr) * 2 * clock_factor
    start_int = start_frac.astype(int)

    pulse_offset = pulse_offsets[index]
    pulse_count = pulse_counts[index]

    train_triggers = triggers[pulse_offset:pulse_offset+pulse_count]
    train_triggers['start'] = start_int + pulse_start_offset
    train_triggers['stop'] = start_int + int(pulse_len * 2 * clock_factor) - 1 + pulse_end_offset
    train_triggers['offset'] = start_frac - start_int
    train_triggers['pulse'] = all_pos.astype(np.int16)
    train_triggers['fel'] = [pos in fel_pos for pos in all_pos]
    train_triggers['ppl'] = [pos in ppl_pos for pos in all_pos]


 if ignore_fel and ignore_ppl:
    # Both FEL and PPL are ignored, use virtual full train triggers.
    print('WARNING: Both FEL and PPL pulses are ignored, '
          'virtual trigger is inserted covering the entire train')

    # Overwrite global pulse statistics computed before,
    num_pulses = len(dc.train_ids)
    triggers = np.empty(num_pulses, dtype=trigger_dt)

    pulse_counts[:] = 1
    pulse_counts = pulse_counts.astype(np.int32)
    pulse_offsets = np.arange(len(pulse_counts)).astype(np.int32)

    # Obtain minimal trace length.
    min_trace_len = min([
        dc[src, key].entry_shape[0]
        for det_name in remi['detector'].keys()
        for src, key in remi.get_detector_sourcekeys(det_name)
    ])

    triggers['start'] = first_pulse_offset
    triggers['stop'] = min_trace_len
    triggers['offset'] = 0.0
    triggers['pulse'] = -1
    triggers['fel'] = False
    triggers['ppl'] = False

 else:
    with timing('find_triggers'):
        psh.map(trigger_by_ppt, ppt_data)

    if (np.unique(triggers['pulse'][1:] - triggers['pulse'][:-1]) > 0).sum() > 1:
        # There is more than one delta between pulse entries across all pulses. This is not
        # necessarily a problem, as the pattern could simply have changed in between trains
        # with each train being split properly.
        # If there's more than one delta in a single train, this likely points to a mismatch
        # of FEL and PPL repetition rate. This is most likely not intended.

        one = np.uint64(1)  # Because np.uint64 + int = np.float64
        pulse_deltas = set()

        for pulse_id, (offset, count) in enumerate(zip(pulse_offsets, pulse_counts)):
            deltas = triggers['pulse'][offset+one:offset+count] - triggers['pulse'][offset:offset+count-one]

            if len(np.unique(deltas)) > 1:
                for delta in deltas:
                    pulse_deltas.add(delta)

        if len(pulse_deltas) > 1:
            delta_str = ', '.join([str(x) for x in sorted(pulse_deltas)])
            warning(f'Different pulse lengths (PPT: {delta_str}) encountered within single trains, '
                    f'separated pulse spectra may split up signals!')
        else:
            warning('Different pulse lengths encountered across trains, separation may be unstable!')
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, (lx, rx) = plt.subplots(num=2, ncols=2, nrows=1, figsize=(9, 4), clear=True,
                             gridspec_kw=dict(top=0.75))

 # Display ~400 pulses or 10 trains, whatever is lower
 n_trains = max(abs(pulse_offsets - 200).argmin(), 5)

 visible_triggers = triggers[:pulse_offsets[n_trains]]

 pulse_index = np.arange(len(visible_triggers))
 pumped = visible_triggers['fel'] & visible_triggers['ppl']
 fel_only = visible_triggers['fel'] & ~pumped
 ppl_only = visible_triggers['ppl'] & ~pumped

 lx.plot(pulse_index[pumped], visible_triggers[pumped]['start'], ' .', ms=3, c='C0', label='FEL+PPL')
 lx.plot(pulse_index[fel_only], visible_triggers[fel_only]['start'], '.', ms=3, c='C1', label='FEL-only')
 lx.plot(pulse_index[ppl_only], visible_triggers[ppl_only]['start'], '.', ms=2, c='C2', label='PPL-only')

 max_start = visible_triggers['start'].max()

 lx.vlines(pulse_offsets[:n_trains], 0, max_start, color='grey', linewidth=1, alpha=0.2)
 lx.tick_params(right=True)

 lx.set_xlabel('Pulse index')
 lx.set_xlim(-15, pulse_offsets[n_trains]+15)

 lx.set_ylabel('Trigger position')
 lx.set_ylim(-max_start // 20, max_start + max_start // 20)

 lx.legend(fontsize='small', loc='lower right')

 train_lx = lx.twiny()
 train_lx.set_xlabel('Train ID', labelpad=8)
 train_lx.set_xlim(lx.get_xlim())
 train_lx.set_xticks(pulse_offsets[:n_trains])
 train_lx.set_xticklabels([str(int(x)) for x in dc.train_ids[:n_trains]],
                         rotation=-45, fontsize='x-small')

 rx.plot(triggers['start'], lw=0.2)

 rx.set_xlabel('Pulse index')
 rx.tick_params(left=False, labelleft=False, right=True, labelright=True)

 pass
 ```

 %% Cell type:markdown id: tags:

 # Analog signal to digital edges

 %% Cell type:markdown id: tags:

 ### Find edges in analog signal

 %% Cell type:code id: tags:

 ``` python
 psh.set_default_context('processes', num_workers=remi.get_num_workers(mp_find_edges))
 threadpool_limits(limits=remi.get_num_workers(mt_avg_trace))

 det_data = {}

 for i, (det_name, det) in enumerate(remi['detector'].items()):
    det_sourcekeys = remi.get_detector_sourcekeys(det_name)
    det_get_traces = remi.get_traces_getter(det_name)
    trace_len = dc[next(iter(det_sourcekeys))].entry_shape[0]

    edges = psh.alloc(shape=(num_pulses, 7, det['max_hits']),
                      dtype=np.float64, fill=np.nan)
    amplitudes = psh.alloc(shape=(num_pulses, 7, det['max_hits']),
                           dtype=np.float64, fill=np.nan)
    avg_traces = psh.alloc_per_worker(shape=(7, trace_len), dtype=np.float64)

    def prepare_edge_worker(worker_id):
        correct_func = remi.get_baseline_corrector()
        discr_func, discr_params = remi.get_discriminator(det['channels'])

        source_name = remi['digitizer']['source']
        bl_start, bl_stop, _ = remi.get_baseline_limits(trace_len)
        bl_sym = remi['digitizer']['baseline_symmetry']
        time_cal = remi.get_time_calibration()

        traces_corr = np.empty((7, trace_len), dtype=np.float64)
        baselines = np.empty(bl_sym, dtype=np.float64)
        yield

    @psh.with_init(prepare_edge_worker)
    def find_edges(worker_id, index, train_id, data):
        try:
            data = det_get_traces(data[source_name])
        except KeyError:
            return

        for channel_idx in range(7):
            correct_func(data[channel_idx], traces_corr[channel_idx],
                         baselines, bl_start, bl_stop)

        avg_traces[worker_id] += traces_corr

        pulses_slice = np.s_[pulse_offsets[index]:pulse_offsets[index]+pulse_counts[index]]

        for trigger, pulse_edges, pulse_amplitudes in zip(
            triggers[pulses_slice], edges[pulses_slice], amplitudes[pulses_slice]
        ):
            trigger_slice = np.s_[trigger['start']:trigger['stop']]

            for trace, channel_params, channel_edges, channel_amplitudes in zip(
                traces_corr, discr_params, pulse_edges, pulse_amplitudes
            ):
                discr_func(trace[trigger_slice], edges=channel_edges,
                           amplitudes=channel_amplitudes, **channel_params)

    with timing(f'find_edges, {det_name}'):
        psh.map(find_edges, dc.select(det_sourcekeys))

    if not np.isfinite(edges).any():
        warning(f'No edges found for {det_name}')

    fig, (ux, bx) = plt.subplots(num=110+i, ncols=1, nrows=2, figsize=(9.5, 8), clear=True,
                                 gridspec_kw=dict(left=0.1, right=0.98, top=0.98, bottom=0.1, hspace=0.25))

    fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')

    for edge_idx, edge_name in enumerate(['u1', 'u2', 'v1', 'v2', 'w1', 'w2', 'mcp']):
        ux.hist(finite_flattened_slice(amplitudes, np.s_[:, edge_idx, :]),
                bins=1000, range=(0, 2048), histtype='step', lw=1,
                color=f'C{edge_idx}' if edge_idx < 6 else 'k', label=edge_name)

        cur_edges = finite_flattened_slice(edges, np.s_[:, edge_idx, :])
        bx.hist(cur_edges - np.floor(cur_edges), bins=500, range=(0, 1), histtype='step',
                lw=1, color=f'C{edge_idx}' if edge_idx < 6 else 'k', label=edge_name)

    ux.legend()
    ux.set_title('Pulse height distributions')
    ux.set_xlabel('Pulse height')
    ux.set_yscale('log')
    ux.set_xlim(0, 2048)
-    ux.set_ylim(10, 1.5*ux.get_xlim()[1])
+    ux.set_ylim(10, 1.5*ux.get_ylim()[1])

    bx.set_title('Fractional edge distributions')
    bx.set_xlabel('Edge positions - ⌊edge positions⌋')
    bx.set_yscale('log')
    bx.set_xlim(-0.05, 1.2)
    bx.legend()

    # Properly offset edges to their trigger offset and convert to time.
    # This is not done earlier to preserve the information for plotting.
    edges += triggers['offset'][:, None, None]
    edges *= remi.get_time_calibration()

    det_data[det_name] = {
        'edges': edges,
        'amplitudes': amplitudes,
        'avg_trace': avg_traces.sum(axis=0) / len(dc.train_ids)
    }
 ```

 %% Cell type:markdown id: tags:

 ### Global average of analog signals

 %% Cell type:code id: tags:

 ``` python
 for i, det_name in enumerate(remi['detector'].keys()):
    fig, axs = plt.subplots(num=10+i, nrows=7, figsize=(9.5, 8), clear=True,
                            gridspec_kw=dict(left=0.1, right=0.98, top=0.98, bottom=0.1))
    fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')

    for edge_idx, edge_name in enumerate(['u1', 'u2', 'v1', 'v2', 'w1', 'w2', 'mcp']):
        axs[edge_idx].plot(det_data[det_name]['avg_trace'][edge_idx], lw=1)
        axs[edge_idx].tick_params(labelbottom=False)
        axs[edge_idx].set_ylabel(edge_name)

    axs[-1].tick_params(labelbottom=True)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Sample for found edges

 %% Cell type:code id: tags:

 ``` python
 for i, det_name in enumerate(remi['detector'].keys()):
    edges = det_data[det_name]['edges']

    fig = plt.figure(num=100+i, figsize=(9.5, 8))
    grid = fig.add_gridspec(ncols=2, nrows=4, left=0.1, right=0.98, top=0.98, bottom=0.1)

    fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')

    for signal_idx, signal_name in enumerate(['u1', 'u2', 'v1', 'v2', 'w1', 'w2', 'mcp']):
        row = (1 + signal_idx // 2) if signal_idx < 6 else 0
        col = (signal_idx % 2) if signal_idx < 6 else np.s_[:]
        ax = fig.add_subplot(grid[row, col])

        finite_edges = np.isfinite(edges[:, signal_idx, 0])
        if not finite_edges.any():
            warning(f'No edges found for {det_name}/{signal_name}')
            continue

        pulse_idx = np.uint64(finite_edges.nonzero()[0][0])  # Is combined with other uint64 values below.
        train_idx = (pulse_idx >= pulse_offsets).nonzero()[0][-1]
        trigger = triggers[pulse_idx]

        sourcekey = remi.get_channel_sourcekey(
            remi['detector'][det_name]['channels'][signal_idx])
        train_trace = dc[sourcekey].select_trains(np.s_[train_idx:train_idx+1]).ndarray()[0]
        corr_trace = np.zeros_like(train_trace, dtype=np.float64)

        remi.get_baseline_corrector()(
            train_trace, corr_trace,
            np.empty(remi['digitizer']['baseline_symmetry'], dtype=np.float64),
            *remi.get_baseline_limits(len(train_trace))[:2])

        pulse_trace = corr_trace[np.s_[trigger['start']:trigger['stop']]]

        x_time = remi.get_time_calibration() * (np.arange(len(pulse_trace)) + trigger['offset'])

        ax.plot(x_time, pulse_trace, lw=1)
        ax.set_xlim(x_time[0], x_time[-1])
        ax.set_ylim(-200, pulse_trace.max()*1.1)
        ax.text(x_time[-1], pulse_trace.max(),
                f'T{train_idx} P{pulse_idx - pulse_offsets[train_idx]} ',
                va='top', ha='right')
        ax.tick_params(labelbottom=False)
        ax.set_ylabel(signal_name)

        ax.vlines(edges[pulse_idx, signal_idx, :], *ax.get_ylim(), color='red', linewidth=1)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Digitized channel spectra

 %% Cell type:code id: tags:

 ``` python
 for i, det_name in enumerate(remi['detector'].keys()):
    fig = plt.figure(num=20+i, figsize=(9.5, 6))

    edges = det_data[det_name]['edges']

    min_edge = np.nanmin(edges)
    max_edge = np.nanmax(edges)

    grid = fig.add_gridspec(ncols=3, nrows=3, left=0.08, right=0.98, top=0.95, hspace=0.4)

    fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')

    numx = fig.add_subplot(grid[0, 0])
    numx.set_title('Edges per pulse')

    agg_window = num_pulses // 60
    max_num_edges = 0.0
    max_spectral_intensity = 0
    hist_axs = []

    for edge_idx, edge_name in enumerate(['u1', 'u2', 'v1', 'v2', 'w1', 'w2', 'mcp']):
        if edge_idx < 6:
            row = 1 + edge_idx % 2
            col = edge_idx // 2
        else:
            row = 0
            col = np.s_[1:3]

        ax = fig.add_subplot(grid[row, col])
        ax.set_title(f'TOF spectrum: {edge_name}')

        num_edges = np.isfinite(edges[:, edge_idx, :]).sum(axis=1)
        num_edges = num_edges[:((len(num_edges) // agg_window) * agg_window)]
        num_edges = num_edges.reshape(-1, agg_window).mean(axis=1)

        if (num_edges == 0).all():
            warning(f'No edges found for {det_name}/{edge_name}')
            continue

        if edge_idx < 6:
            plot_kwargs = dict(c=f'C{edge_idx}', ls='solid', lw=1.0)
        else:
            plot_kwargs = dict(c='k', ls='dashed', lw=1.0)

        numx.plot(np.arange(len(num_edges)) * agg_window, num_edges, label=edge_name, **plot_kwargs)
        max_num_edges = max(max_num_edges, num_edges.max())

        y, _, _ = ax.hist(finite_flattened_slice(edges, np.s_[:, edge_idx, :]),
                          bins=int((max_edge - min_edge) // 5), range=(min_edge, max_edge),
                          color=plot_kwargs['c'], histtype='step', linewidth=1)
        hist_axs.append(ax)

        max_spectral_intensity = max(max_spectral_intensity, y.max())

    numx.tick_params(labelbottom=False)
    numx.set_ylim(0, 1.2*max_num_edges)

    for ax in hist_axs:
        ax.set_ylim(0, max_spectral_intensity*1.1)
        ax.ticklabel_format(axis='y', style='sci', scilimits=(0, 3))
 pass
 ```

 %% Cell type:markdown id: tags:

 # Detector diagnostics

 %% Cell type:code id: tags:

 ``` python
 for i, det_name in enumerate(remi['detector'].keys()):
    edges = det_data[det_name]['edges']

    sort = remi.get_dld_sorter(det_name)

    sum_shifts = sort.sum_shifts if sort.sum_shifts != (0.0, 0.0, 0.0) else None

    is_valid = remi.get_presort_mask(edges, edge_idx=0, w=not quad_anode,
                                     sum_limit=max(sort.uncorrected_time_sum_half_widths),
                                     sum_shifts=sum_shifts)

    if not is_valid.any():
        warning(f'No valid preliminary edge combinations found for {det_name}')

    signals, sums = remi.get_signals_and_sums(edges, indices=sort.channel_indices, sum_shifts=sum_shifts,
                                              mask=is_valid)
    fig = plot_detector_diagnostics(signals=signals, sums=sums, fig_num=30+i, im_scale=1.5,
                                    sum_range=max(sort.uncorrected_time_sum_half_widths),
                                    sorter=sort)
    fig.text(0.02, 0.98, det_name.upper() + ' before corrections', rotation=90, ha='left', va='top', size='x-large')

    if remi['detector'][det_name]['use_sum_correction'] or remi['detector'][det_name]['use_pos_correction']:
        n_masked = is_valid.sum()
        signals = np.full((n_masked, 3), np.nan, dtype=np.float64)
        sums = np.full((n_masked, 3), np.nan, dtype=np.float64)

        sort.correct(edges[is_valid], signals, sums)
        fig = plot_detector_diagnostics(signals=signals, sums=sums, fig_num=40+i, im_scale=1.5,
                                        sum_range=max(sort.uncorrected_time_sum_half_widths),
                                        sorter=sort)
        fig.text(0.02, 0.98, det_name.upper() + ' after corrections', rotation=90, ha='left', va='top', size='x-large')
 pass
 ```

 %% Cell type:markdown id: tags:

 # Hit reconstruction

 %% Cell type:code id: tags:

 ``` python
 psh.set_default_context('processes', num_workers=remi.get_num_workers(mp_rec_hits))

 for det_name, det in remi['detector'].items():
    edges = det_data[det_name]['edges']

    signals = psh.alloc(shape=(num_pulses, 50), dtype=signal_dt, fill=np.nan)
    hits = psh.alloc(shape=(num_pulses, 50), dtype=hit_dt, fill=(np.nan, np.nan, np.nan, -1))
    hit_counts = psh.alloc(shape=len(dc.train_ids), dtype=np.uint32)

    def prepare_hit_worker(worker_id):
        sort = remi.get_dld_sorter(det_name)
        yield

    @psh.with_init(prepare_hit_worker)
    def reconstruct_hits(worker_id, index, train_id):
        hit_counts[index] += sort.run_on_train(
            edges, signals, hits, pulse_offsets[index], pulse_offsets[index] + pulse_counts[index])

    with timing(f'rec_hits, {det_name}'):
        psh.map(reconstruct_hits, dc.train_ids)

    det_data[det_name].update(signals=signals, hits=hits, hit_counts=hit_counts)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(num=50+i, figsize=(9.5, 4), ncols=1, clear=True,
                       gridspec_kw=dict(top=0.92, right=0.98, left=0.05, bottom=0.12))

 max_num_hits = 0.0

 for det_name in remi['detector'].keys():
    agg_window = num_pulses // min(1000, num_pulses)

    num_hits = np.isfinite(det_data[det_name]['hits']['x']).sum(axis=1)
    num_hits = num_hits[:(len(num_hits) // agg_window) * agg_window]
    num_hits = num_hits.reshape(-1, agg_window).mean(axis=1)
    max_num_hits = max(max_num_hits, num_hits.max())

    ax.plot(np.arange(0, (num_pulses // agg_window) * agg_window, agg_window), num_hits,
            lw=1, label=det_name.upper())

 ax.set_title('Hits per pulse')
 ax.set_xlabel('Pulse index')
 ax.set_ylim(0, max_num_hits*1.1)
 ax.legend()
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Reconstruction methods
 Each hit may be reconstructed by one of 19 different methods. These differ by the number of real signals across the channels, which could be combined to form the hit. Each of these methods is designed by a number between `0` and `19` (with empty hits using `-1`), which can be found in the `m` key of a hit, e.g.:

 * `0`: All six anode signals and the corresponding MCP signal were found.
 * `4`: One signal on layer `u` is missing, all other signals for this event were found.
 * `18`: Only one anode signal on each layer was found and the MCP signal is missing. There is no way to check whether this combination of signals is actually valid.

 | Method | `u+v+w +mcp` |
 | - | - |
 | 0 | `2+2+2 +1` |
 | 1 | `0+2+2 +1` |
 | 2 | `2+0+2 +1` |
 | 3 | `2+2+0 +1` |
 | 4 | `1+2+2 +1` (2 permutations) |
 | 5 | `2+1+2 +1` (2 permutations) |
 | 6 | `2+2+1 +1` (2 permutations) |
 | 7 | `2+2+2 +0` |
 | 8 | `0+2+2 +0` |
 | 9 | `2+0+2 +0` |
 | 10 | `2+2+0 +0` |
 | 11 | `1+2+2 +0` (2 permutations) |
 | 12 | `2+1+2 +0` (2 permutations) |
 | 13 | `2+2+1 +0` (2 permutations) |
 | 14 | `2+1+1 +1` `1+2+1 +1` `1+1+2 +1` (12 permutations) |
 | 15 | `2+1+0 +1` `2+0+1 +1` `1+2+0 +1` `1+0+2 +1` `0+2+1 +1` `0+1+2 +1` (12 permutations) |
 | 16 | `1+1+1 +1` (8 permutations) |
 | 17 | `2+1+1 +0` `1+2+1 +0` `1+1+2 +0` (12 permutations) |
 | 18 | `1+1+1 +0` (8 permutations) |
 | 19 | `2+1+0 +0` `2+0+1 +0` `1+2+0 +0` `1+0+2 +0` `0+1+2 +0` `0+2+1 +0` (12 permutations) |

 * For hits reconstructed with method `> 10`, extra attention should be given to ensure they add meaningful signal.
 * Any method `> 14` has to considered risky, because neither a time sum nor the position can be checked. If the scale factors and/or `w` shift are not correct, then the number of events reconstructed with the risky methods will increase. They will most likely be *ghost hits*, which do not correspond to actual impacts on the detector.

 %% Cell type:code id: tags:

 ``` python
 for i, det_name in enumerate(remi['detector'].keys()):
    hits = det_data[det_name]['hits']

    fig, ax = plt.subplots(num=60+i, figsize=(9.5, 5), ncols=1, clear=True,
                           gridspec_kw=dict(left=0.08, right=0.91, top=0.8))

    fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')

    if not (hits['m'] >= 0).any():
        warning(f'No hits found for {det_name}')
        continue

    method_bins = np.bincount(hits['m'][hits['m'] >= 0], minlength=20)
    ax.bar(np.arange(20), method_bins, width=0.5)

    ax.set_xlabel('Reconstruction method')
    ax.set_xlim(-0.5, 19.5)
    ax.set_xticks(np.arange(20))

    ax.set_ylabel('Number of hits')
    ax.set_ylim(0, method_bins.max()*1.05)
    ylims = ax.get_ylim()

    ax.tick_params(which='both', right=True, labelright=True)

    num_risky = method_bins[15:].sum()
    num_total = method_bins.sum()

    ax.text(14.2, method_bins.max(), f'{(100*(num_total-num_risky)/num_total):.2g}%',
            va='top', ha='right', color='black')
    ax.text(14.8, method_bins.max(), f'{(100*num_risky/num_total):.2g}%',
            va='top', ha='left', color='red')

    ax.fill([14.5, 19.5, 19.5, 14.5], [ylims[0], ylims[0], ylims[1], ylims[1]], c='r', alpha=0.2)

    labelx = ax.twiny()
    labelx.set_xlim(*ax.get_xlim())
    labelx.set_xticks(ax.get_xticks())
    labelx.set_xticklabels([
        '2+2+2 +1',
        '0+2+2 +1', '2+0+2 +1', '2+2+0 +1',
        '1+2+2 +1', '2+1+2 +1', '2+2+1 +1',
        '2+2+2 +0',
        '0+2+2 +0', '2+0+2 +0', '2+2+0 +0', '1+2+2 +0', '2+1+2 +0', '2+2+1 +0',
        '2+1+1 +1',
        '2+1+0 +1',
        '1+1+1 +1',
        '2+1+1 +0',
        '1+1+1 +0',
        '2+1+0 +0',
    ], rotation=90)

    min_rel_tick = np.ceil((ax.get_ylim()[0] / num_total) / 0.1) * 0.1
    max_rel_tick = np.floor((method_bins.max() / num_total) / 0.1) * 0.1

    rely = ax.twinx()
    rely.set_ylim(*ax.get_ylim())
    rely.set_yticks(np.arange(0.0, max_rel_tick+0.01, 0.1)*num_total)
    rely.set_yticks(np.arange(0.0, ylims[1]/num_total, 0.02)*num_total, minor=True)
    rely.set_yticklabels([f'{(y/num_total)*100:.0f}%' for y in rely.get_yticks()])
    rely.set_ylabel('Percentage of total hits')
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Detector image and fishes

 %% Cell type:code id: tags:

 ``` python
 for i, det_name in enumerate(remi['detector'].keys()):
    flat_hits = det_data[det_name]['hits'].reshape(-1)
    flat_hits = flat_hits[np.isfinite(flat_hits[:]['x'])]
    flat_hits = flat_hits[flat_hits['m'] <= 10]

    fig = plt.figure(num=70+i, figsize=(9, 13.5))

    fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')
    fig.text(0.02, 0.02, det_name.upper(), rotation=90, ha='left', va='bottom', size='x-large')

    imp = fig.add_axes([0.1 + 0.25/2, 0.56, 0.6, 0.4])
    txp = fig.add_axes([0.1, 0.28, 0.85, 0.22])
    typ = fig.add_axes([0.1, 0.04, 0.85, 0.22])

    if flat_hits.size == 0:
        warning(f'No hits found for {det_name}')
        continue

    im_radius = remi['detector'][det_name]['mcp_radius']*1.1

    imp.hist2d(flat_hits['x'], flat_hits['y'], bins=(256, 256),
               range=[[-im_radius, im_radius], [-im_radius, im_radius]], norm=LogNorm())
    imp.xaxis.set_label_position('top')
    imp.set_xlabel('X / mm')
    imp.set_ylabel('Y / mm')
    imp.tick_params(right=True, labelright=True, top=True, labeltop=True)
    imp.grid()

    min_tof = flat_hits['t'].min()
    max_tof = flat_hits['t'].max()

    num_tof_bins = int((max_tof - min_tof) // 5)

    if num_tof_bins == 0:
        warning(f'All TOFs limited to single bin for {det_name}')
        continue

    for ax, dim_label in zip([txp, typ], ['x', 'y']):
        ax.hist2d(flat_hits['t'], flat_hits[dim_label], bins=(num_tof_bins, 256),
                   range=[[min_tof, max_tof], [-im_radius, im_radius]], norm=LogNorm())
        ax.set_ylabel(f'{dim_label.upper()} / mm')

    typ.set_xlabel('Time-of-flight / ns')
    txp.tick_params(bottom=True, labelbottom=False, top=True, labeltop=True, right=True, labelright=True)
    typ.tick_params(right=True, labelright=True, top=True)
 pass
 ```

 %% Cell type:markdown id: tags:

 # Transformed data files

 %% Cell type:code id: tags:

 ``` python
 # Try to figure out proposal number from in_folder to work with older files.
 m = re.match(r'p(\d{6})', Path(in_folder).parts[-2])
 if not proposal and m is not None:
    proposal = int(m[1])

 seq_len = out_seq_len if out_seq_len > 0 else len(dc.files[0].train_ids)
 dataset_kwargs = {k[8:]: v for k, v in locals().items() if k.startswith('dataset_compression')}

 control_sources = [det_device_id.format(karabo_id=karabo_id, det_name=det_name.upper())
                   for det_name in remi['detector']]

 channels = []
 if save_raw_triggers or save_raw_edges:
    channels.append('raw')
 if save_rec_signals or save_rec_hits:
    channels.append('rec')

 instrument_channels = [
    f'{device_id}:{det_output_key}/{channel}'
    for device_id in control_sources
    for channel in channels
 ]
 ```

 %% Cell type:code id: tags:

 ``` python
 Path(out_folder).mkdir(parents=True, exist_ok=True)
 print('Writing sequence files', flush=True, end='')

 t_write = timing('write_files')
 t_write.__enter__()

 for seq_id, train_mask, pulse_mask in sequence_pulses(dc.train_ids, pulse_counts, pulse_offsets, seq_len):
    seq_train_ids = dc.train_ids[train_mask]

    with DataFile.from_details(out_folder, out_aggregator, run, seq_id) as outp:
        outp.create_metadata(like=dc, proposal=proposal, run=run, sequence=seq_id,
                             control_sources=control_sources, instrument_channels=instrument_channels)
        outp.create_index(
            seq_train_ids,
            timestamps=dc.select_trains(by_id[seq_train_ids]).train_timestamps().astype(np.uint64)
        )

        for det_name in remi['detector']:
            cur_device_id = det_device_id.format(karabo_id=karabo_id, det_name=det_name.upper())
            cur_max_hits = remi['detector'][det_name]['max_hits']

            cur_control_data = outp.create_control_source(cur_device_id)
            # Manually manipulate the file here, still creates the index properly.
            remi.attach_detector_config(det_name, cur_control_data.get_run_group())
            cur_control_data.create_index(len(seq_train_ids))

            cur_fast_data = outp.create_instrument_source(f'{cur_device_id}:{det_output_key}')

            cur_data = det_data[det_name]

            if save_raw_triggers:
                cur_fast_data.create_key('raw.triggers', triggers[pulse_mask],
                                         maxshape=(None,) + triggers.shape[1:],
                                         chunks=tuple(chunks_triggers), **dataset_kwargs)

            if save_raw_edges:
                cur_fast_data.create_key('raw.edges', cur_data['edges'][pulse_mask],
                                         maxshape=(None,) + cur_data['edges'].shape[1:],
                                         chunks=tuple(chunks_edges if chunks_edges[-1] <= cur_max_hits
                                                      else chunks_edges[:-1] + [cur_max_hits]),
                                         **dataset_kwargs)

            if save_raw_amplitudes:
                cur_fast_data.create_key('raw.amplitudes', cur_data['amplitudes'][pulse_mask],
                                         maxshape=(None,) + cur_data['amplitudes'].shape[1:],
                                         chunks=tuple(chunks_amplitudes if chunks_amplitudes[-1] <= cur_max_hits
                                                      else chunks_amplitudes[:-1] + [cur_max_hits]),
                                         **dataset_kwargs)

            if save_rec_signals:
                cur_fast_data.create_key('rec.signals', cur_data['signals'][pulse_mask],
                                         maxshape=(None,) + cur_data['signals'].shape[1:],
                                         chunks=tuple(chunks_signals if chunks_signals[-1] <= cur_max_hits
                                                      else chunks_signals[:-1] + [cur_max_hits]),
                                         **dataset_kwargs)

            if save_rec_hits:
                cur_fast_data.create_key('rec.hits', cur_data['hits'][pulse_mask],
                                         maxshape=(None,) + hits.shape[1:],
                                         chunks=tuple(chunks_hits if chunks_hits[-1] <= cur_max_hits
                                                      else chunks_hits[:-1] + [cur_max_hits]),
                                         **dataset_kwargs)

            cur_fast_data.create_index(raw=pulse_counts[train_mask], rec=pulse_counts[train_mask])

    print('.', flush=True, end='')

 print('')
 t_write.__exit__()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Data selection parameters.
 run = 104  # Run ID.
 in_folder = '/gpfs/exfel/exp/SQS/202101/p002535/raw'  # Partial input path appended with run ID.
 out_folder = '/gpfs/exfel/exp/SQS/202101/p002535/scratch/cal_test'  # Full path to output folder.

 calib_config_path = '/gpfs/exfel/exp/SQS/202101/p002535/usr/config_board2+4.yaml'  # Path to correction and transform configuration

 # These parameters are required by xfel-calibrate but ignored in this notebook.
 cycle = ''  # Proposal cycle, currently not used.
 cal_db_timeout = 0  # Calibration DB timeout, currently not used.
 cal_db_interface = 'foo'  # Calibration DB interface, currently not used.
 karabo_da = 'bar'  # Karabo data aggregator name, currently not used

 # Output parameters.
 karabo_id = 'SQS_REMI_DLD6'  # Karabo device ID root for virtual output device.
 proposal = ''  # Proposal, leave empty for auto detection based on in_folder
 out_aggregator = 'REMI01'  # Aggregator name for output files.
 out_seq_len = 5000  # Number of trains per sequence file in output.
 det_device_id = '{karabo_id}/DET/{det_name}'  # Karabo device ID for virtual output device.
 det_output_key = 'output'  # Pipeline name for fast data output.
 save_raw_triggers = True  # Whether to save trigger position in files.
 save_raw_edges = True  # Whether to save digitized edge positions in files.
 save_raw_amplitudes = True  # Whether to save analog pulse amplitudes in files.
 save_rec_signals = True  # Whether to save reconstructed signals (u1-w2, mcp) in files.
 save_rec_hits = True  # Whether to save reoncstructed hits (x,y,t,m) in files.
 chunks_triggers = [500]  # HDF chunk size for triggers.
 chunks_edges = [500, 7, 50]  # HDF chunk size for edges.
 chunks_amplitudes = [500, 7, 50]  # HDF chunk size for amplitudes.
 chunks_hits = [50, 50]  # HDF chunk size for hits.
 chunks_signals = [50, 50]  # HDF chunk size for signals.
 dataset_compression = 'gzip'  # HDF compression method.
 dataset_compression_opts = 3  # HDF GZIP compression level.

 # Detector parameters.
 quad_anode = False  # Reconstruction assumes a hex anode by default, change for quad anodes.

 # Trigger parameters.
 ppt_source = 'SQS_RR_UTC/TSYS/TIMESERVER:outputBunchPattern'
 ignore_fel = False  # Ignore any FEL entries in the PPT.
 ignore_ppl = False  # Ignore any PPL entries in the PPT.
 ppl_offset = 0  # In units of the PPT.
 laser_ppt_mask = -1  # Bit mask for used laser, negative to auto-detect from instrument.
 instrument_sase = 3  # Which SASE we're running at for PPT decoding.
 first_pulse_offset = 10000  # Sample position where the first pulse begins, ignored when PPT is reconstructed.
 single_pulse_length = 25000  # How many samples if there's only one pulse.
 pulse_start_offset = 0  # Signal offset at the start of each pulse.
 pulse_end_offset = 0  # Signal offset at the end of each pulse.

 # PPT reconstruction parameters.
 reconstruct_ppt = False  # Reconstruct PPT from some trigger edges.
 trigger_edge_channel = '4_D'  # Channel to use for triggering.
 trigger_edge_offset = 0  # Offset to apply to the first trigger edge position to compute first pulse offset.
 fake_ppt_offset = 0  # Offset in reconstructed PPT for pulses.

 # Parallelization parameters.
 mp_find_triggers = 0.5  # Parallelization for finding triggers.
 mp_find_edges = 0.5  # Parallelization for digitizing analog signal.
 mt_avg_trace = 2  # Parallelization for trace averaging.
 mp_rec_hits = 1.0  # Parallelization for hit reconstruction.
 ```

 %% Cell type:code id: tags:

 ``` python
 from datetime import datetime
 from logging import warning
 from pathlib import Path
 import re

 import numpy as np
 import matplotlib.pyplot as plt
 from matplotlib.colors import LogNorm
 from threadpoolctl import threadpool_limits

 import h5py

 import pasha as psh
 from euxfel_bunch_pattern import indices_at_sase, indices_at_laser
 from extra_data import RunDirectory, by_id
 from extra_remi import Analysis, trigger_dt
 from extra_remi.util import timing
 from extra_remi.rd_resort import signal_dt, hit_dt
 from extra_remi.files import DataFile, sequence_pulses

 if quad_anode:
    from extra_remi.plots import plot_detector_diagnostics_quad as plot_detector_diagnostics
 else:
    from extra_remi.plots import plot_detector_diagnostics_hex as plot_detector_diagnostics

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 def finite_flattened_slice(array, slice_=np.s_[:]):
    """Return flattened and finite values for a given slice."""
    sliced_array = array[slice_]
    return sliced_array[np.isfinite(sliced_array)]
 ```

 %% Cell type:code id: tags:

 ``` python
 calib_config_path = Path(calib_config_path)

 if not calib_config_path.is_file():
    # If the path cannot be resolved right now, try the same path relative to in_folder.
    calib_config_path = Path(in_folder) / calib_config_path

    if not calib_config_path.is_file():
        # Disallow implicit config file creation.
        raise ValueError('calib_config_path not found - neither absolute nor relative to in_folder')

 remi = Analysis(calib_config_path, use_hex=not quad_anode)

 with timing('open_run'):
    dc = remi.prepare_dc(RunDirectory(Path(in_folder) / f'r{run:04d}', inc_suspect_trains=True),
                         require_ppt=not reconstruct_ppt)
 ```

 %% Cell type:markdown id: tags:

 # Transformation parameters

 %% Cell type:code id: tags:

 ``` python
 def print_leaf(leaf, indent=0):
    for key, value in leaf.items():
        if isinstance(value, dict):
            print(indent * 4 * ' ' + key)
            print_leaf(value, indent=indent+1)
        else:
            print(indent * 4 * ' ' + f'{key}: {value}')

 print_leaf(remi.tree)
 ```

 %% Cell type:markdown id: tags:

 # Pulse and trigger information

 %% Cell type:markdown id: tags:

 ### Read PPT from file or reconstruct PPT for older data

 %% Cell type:code id: tags:

 ``` python
 if reconstruct_ppt:
    # Take up to the first hundred trains for now.
    # Could be done for each train individually, but likely not necessary for now.
    trigger_trace = dc[remi['digitizer']['source'], remi['digitizer']['key_pattern'].format(trigger_edge_channel)] \
        [:100].ndarray().mean(axis=0).astype(np.float64)
    trigger_trace -= trigger_trace[0]  # Use simple offset correction.

    fake_ppt = np.zeros(2700, dtype=np.uint32)

    discr_func, discr_params = remi.get_discriminator([trigger_edge_channel])

    edges = np.zeros(1000, dtype=np.float64)
    num_pulses = discr_func(trigger_trace, edges=edges, **discr_params[0])
    edges = edges[:num_pulses]

    first_edge = edges[0]
    rel_edges = np.round(edges - first_edge)
    edge_diff = rel_edges[1] - rel_edges[0]

    if not np.allclose(rel_edges[1:] - rel_edges[:-1], edge_diff):
        raise ValueError('PPT reconstruction for unstable edge intervals not supported')

    pulse_spacing = edge_diff / (2 * remi['digitizer']['clock_factor'])  # In units of PPT

    if not float.is_integer(pulse_spacing):
        raise ValueError('PPT reconstruction encountered non-integer pulse spacing')

    pulse_spacing = int(pulse_spacing)

    # Taken from euxfel_bunch_pattern/__init__.py
    from euxfel_bunch_pattern import DESTINATION_T4D, DESTINATION_T5D, PHOTON_LINE_DEFLECTION
    if instrument_sase == 1:
        flag = DESTINATION_T4D
    elif instrument_sase == 2:
        flag = DESTINATION_T5D
    elif instrument_sase == 3:
        flag = DESTINATION_T4D | PHOTON_LINE_DEFLECTION

    first_pulse_offset = int(first_edge + trigger_edge_offset)  # Overwrite notebook argument.
    fake_ppt[fake_ppt_offset:fake_ppt_offset + (pulse_spacing * num_pulses):pulse_spacing] = flag

    from pasha.functor import Functor, gen_split_slices
    class FakeKeyDataFunctor(Functor):
        """Functor appearing KeyData-like with constant data.

        This functor serves a constant data row for a given number
        of train IDs the same way a KeyData object would.
        """

        def __init__(self, row, train_ids):
            self.row = row
            self.train_ids = train_ids

        def split(self, num_workers):
            return gen_split_slices(len(self.train_ids), n_parts=num_workers)

        def iterate(self, share):
            it = zip(range(*share.indices(len(self.train_ids))), self.train_ids)

            for index, train_id in it:
                yield index, train_id, self.row

    ppt_data = FakeKeyDataFunctor(fake_ppt, dc.train_ids)

    fig, ax = plt.subplots(num=99, figsize=(9, 6), clear=True, ncols=1, nrows=1)

    ax.set_title('Edge trigger signal')
    ax.plot(trigger_trace, lw=1, label=f'Mean {trigger_edge_channel} trace')
    ax.vlines(edges, trigger_trace.min()*1.1, trigger_trace.max()*1.1,
              color='red', linewidth=3, alpha=0.3, label='Edge positions')

    ax.set_xlabel('Samples')
    ax.set_ylabel('Intensity / ADU')
    ax.legend()

 else:
    ppt_data = dc[ppt_source, 'data.bunchPatternTable']
 ```

 %% Cell type:markdown id: tags:

 ### Count pulses per train and compute offsets

 %% Cell type:code id: tags:

 ``` python
 # Based on the pulse pattern tables, three global variables are obtained:
 #
 # * `pulse_counts [int32: len(dc.train_ids)]` containing the number of pulses per train.
 # * `pulse_offsets [int32: len(dc.train_ids)]` containing the global offset for the first pulse of each train.
 # * `num_pulses = pulse_counts.sum(axis=0)`

 def get_pulse_positions(ppt, sase, laser, ppl_offset):
    # Combine FEL and PPL positions.

    fel_pos = indices_at_sase(ppt, sase) if not ignore_fel else np.array([])
    ppl_pos = indices_at_laser(ppt, laser) if not ignore_ppl else np.array([])

    if len(fel_pos) > 0:
        # Move PPL up to the FEL position.
        ppl_pos += fel_pos[0] + ppl_offset

    return np.union1d(fel_pos, ppl_pos), fel_pos, ppl_pos

 if laser_ppt_mask < 0:
    # If laser PPT mask is not specified, try to figure it out from device IDs.
    from euxfel_bunch_pattern import PPL_BITS

    instrument = karabo_id[:karabo_id.index('_')]

    try:
        laser_ppt_mask = PPL_BITS[f'LP_{instrument}']
    except KeyError:
        raise ValueError(f'Laser PPT mask unknown for instrument `{instrument}`')

 with timing('pulse_info'):
    psh.set_default_context('processes', num_workers=remi.get_num_workers(mp_find_triggers))

    # Build the pulse index
    pulse_counts = psh.alloc(shape=len(dc.train_ids), dtype=np.uint64)
    has_ppt = psh.alloc(shape=len(dc.train_ids), dtype=bool, fill=False)

    def count_pulses(wid, index, tid, ppt):
        pulse_counts[index] = len(get_pulse_positions(ppt, instrument_sase, laser_ppt_mask, ppl_offset)[0])
        has_ppt[index] = True

    psh.map(count_pulses, ppt_data)

    # Fill any missing values with the highest.
    pulse_counts[has_ppt == False] = pulse_counts.max()

    # Compute offsets based on pulse counts.
    pulse_offsets = np.zeros_like(pulse_counts)
    np.cumsum(pulse_counts[:-1], out=pulse_offsets[1:])

    # Total number of pulses.
    num_pulses = int(pulse_counts.sum())
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(num=1, ncols=1, nrows=1, figsize=(9, 4), clear=True)

 ax.set_title('Pulse count')
 ax.plot(dc.train_ids, pulse_counts, lw=1)
 ax.set_xlabel('Train ID')
 ax.set_ylabel('Number of pulses')
 ax.set_ylim(0, max(300, pulse_counts.max() + 10))
 ax.ticklabel_format(style='plain')
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Find triggers

 The trigger defines the boundary of a pulse on the digitizer trace, which is stored per train.

 %% Cell type:code id: tags:

 ``` python
 # A trigger defines the boundary of a pulse on the digitizer trace stored per train. This cell creates a
 # global variable:
 # * `triggers [(start: int32, stop: int32, offset: float64, fel: bool, ppl: bool): num_pulses]`
 #   containing the triggers for each pulse.
 #
 # This uses the pulse puttern table to locate the pulse positions on the trace. Only number of pulses and
 # their distance can be drawn this way, leaving the absolute offset for the very first pulse to be
 # configured via `trigger/ppt/first_pulse_offset`. If a PPL is used, it will be included in the trigger
 # pattern. The ppt_offset parameter allows taking into account an offset betwen PPL and FEL.

 psh.set_default_context('processes', num_workers=remi.get_num_workers(mp_find_triggers))

 triggers = psh.alloc(shape=(num_pulses,), dtype=trigger_dt, fill=(-1, -1, np.nan, -1, 0, 0))

 clock_factor = remi['digitizer']['clock_factor']

 def trigger_by_ppt(worker_id, index, train_id, ppt):
    all_pos, fel_pos, ppl_pos = get_pulse_positions(ppt, instrument_sase, laser_ppt_mask, ppl_offset)
    num_pulses = len(all_pos)

    if num_pulses == 0:
        return
    elif len(ppl_pos) == 0 and ppl_offset < 0:
        # No PPL pulses, but a negative offset is configured. This will cause
        # first_pulse_offset to start early and most likely miss pulses at the
        # end, so we correct by adding the ppl_offset to relative positions
        # when computing trace positions.
        pos_corr = abs(ppl_offset)
    else:
        pos_corr = 0


    rel_pos = all_pos - all_pos[0]

    if num_pulses > 1:
        pulse_len = np.unique(rel_pos[1:] - rel_pos[:-1]).min()
    elif num_pulses == 1:
        pulse_len = single_pulse_length

    start_frac = first_pulse_offset + (rel_pos + pos_corr) * 2 * clock_factor
    start_int = start_frac.astype(int)

    pulse_offset = pulse_offsets[index]
    pulse_count = pulse_counts[index]

    train_triggers = triggers[pulse_offset:pulse_offset+pulse_count]
    train_triggers['start'] = start_int + pulse_start_offset
    train_triggers['stop'] = start_int + int(pulse_len * 2 * clock_factor) - 1 + pulse_end_offset
    train_triggers['offset'] = start_frac - start_int
    train_triggers['pulse'] = all_pos.astype(np.int16)
    train_triggers['fel'] = [pos in fel_pos for pos in all_pos]
    train_triggers['ppl'] = [pos in ppl_pos for pos in all_pos]


 if ignore_fel and ignore_ppl:
    # Both FEL and PPL are ignored, use virtual full train triggers.
    print('WARNING: Both FEL and PPL pulses are ignored, '
          'virtual trigger is inserted covering the entire train')

    # Overwrite global pulse statistics computed before,
    num_pulses = len(dc.train_ids)
    triggers = np.empty(num_pulses, dtype=trigger_dt)

    pulse_counts[:] = 1
    pulse_counts = pulse_counts.astype(np.int32)
    pulse_offsets = np.arange(len(pulse_counts)).astype(np.int32)

    # Obtain minimal trace length.
    min_trace_len = min([
        dc[src, key].entry_shape[0]
        for det_name in remi['detector'].keys()
        for src, key in remi.get_detector_sourcekeys(det_name)
    ])

    triggers['start'] = first_pulse_offset
    triggers['stop'] = min_trace_len
    triggers['offset'] = 0.0
    triggers['pulse'] = -1
    triggers['fel'] = False
    triggers['ppl'] = False

 else:
    with timing('find_triggers'):
        psh.map(trigger_by_ppt, ppt_data)

    if (np.unique(triggers['pulse'][1:] - triggers['pulse'][:-1]) > 0).sum() > 1:
        # There is more than one delta between pulse entries across all pulses. This is not
        # necessarily a problem, as the pattern could simply have changed in between trains
        # with each train being split properly.
        # If there's more than one delta in a single train, this likely points to a mismatch
        # of FEL and PPL repetition rate. This is most likely not intended.

        one = np.uint64(1)  # Because np.uint64 + int = np.float64
        pulse_deltas = set()

        for pulse_id, (offset, count) in enumerate(zip(pulse_offsets, pulse_counts)):
            deltas = triggers['pulse'][offset+one:offset+count] - triggers['pulse'][offset:offset+count-one]

            if len(np.unique(deltas)) > 1:
                for delta in deltas:
                    pulse_deltas.add(delta)

        if len(pulse_deltas) > 1:
            delta_str = ', '.join([str(x) for x in sorted(pulse_deltas)])
            warning(f'Different pulse lengths (PPT: {delta_str}) encountered within single trains, '
                    f'separated pulse spectra may split up signals!')
        else:
            warning('Different pulse lengths encountered across trains, separation may be unstable!')
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, (lx, rx) = plt.subplots(num=2, ncols=2, nrows=1, figsize=(9, 4), clear=True,
                             gridspec_kw=dict(top=0.75))

 # Display ~400 pulses or 10 trains, whatever is lower
 n_trains = max(abs(pulse_offsets - 200).argmin(), 5)

 visible_triggers = triggers[:pulse_offsets[n_trains]]

 pulse_index = np.arange(len(visible_triggers))
 pumped = visible_triggers['fel'] & visible_triggers['ppl']
 fel_only = visible_triggers['fel'] & ~pumped
 ppl_only = visible_triggers['ppl'] & ~pumped

 lx.plot(pulse_index[pumped], visible_triggers[pumped]['start'], ' .', ms=3, c='C0', label='FEL+PPL')
 lx.plot(pulse_index[fel_only], visible_triggers[fel_only]['start'], '.', ms=3, c='C1', label='FEL-only')
 lx.plot(pulse_index[ppl_only], visible_triggers[ppl_only]['start'], '.', ms=2, c='C2', label='PPL-only')

 max_start = visible_triggers['start'].max()

 lx.vlines(pulse_offsets[:n_trains], 0, max_start, color='grey', linewidth=1, alpha=0.2)
 lx.tick_params(right=True)

 lx.set_xlabel('Pulse index')
 lx.set_xlim(-15, pulse_offsets[n_trains]+15)

 lx.set_ylabel('Trigger position')
 lx.set_ylim(-max_start // 20, max_start + max_start // 20)

 lx.legend(fontsize='small', loc='lower right')

 train_lx = lx.twiny()
 train_lx.set_xlabel('Train ID', labelpad=8)
 train_lx.set_xlim(lx.get_xlim())
 train_lx.set_xticks(pulse_offsets[:n_trains])
 train_lx.set_xticklabels([str(int(x)) for x in dc.train_ids[:n_trains]],
                         rotation=-45, fontsize='x-small')

 rx.plot(triggers['start'], lw=0.2)

 rx.set_xlabel('Pulse index')
 rx.tick_params(left=False, labelleft=False, right=True, labelright=True)

 pass
 ```

 %% Cell type:markdown id: tags:

 # Analog signal to digital edges

 %% Cell type:markdown id: tags:

 ### Find edges in analog signal

 %% Cell type:code id: tags:

 ``` python
 psh.set_default_context('processes', num_workers=remi.get_num_workers(mp_find_edges))
 threadpool_limits(limits=remi.get_num_workers(mt_avg_trace))

 det_data = {}

 for i, (det_name, det) in enumerate(remi['detector'].items()):
    det_sourcekeys = remi.get_detector_sourcekeys(det_name)
    det_get_traces = remi.get_traces_getter(det_name)
    trace_len = dc[next(iter(det_sourcekeys))].entry_shape[0]

    edges = psh.alloc(shape=(num_pulses, 7, det['max_hits']),
                      dtype=np.float64, fill=np.nan)
    amplitudes = psh.alloc(shape=(num_pulses, 7, det['max_hits']),
                           dtype=np.float64, fill=np.nan)
    avg_traces = psh.alloc_per_worker(shape=(7, trace_len), dtype=np.float64)

    def prepare_edge_worker(worker_id):
        correct_func = remi.get_baseline_corrector()
        discr_func, discr_params = remi.get_discriminator(det['channels'])

        source_name = remi['digitizer']['source']
        bl_start, bl_stop, _ = remi.get_baseline_limits(trace_len)
        bl_sym = remi['digitizer']['baseline_symmetry']
        time_cal = remi.get_time_calibration()

        traces_corr = np.empty((7, trace_len), dtype=np.float64)
        baselines = np.empty(bl_sym, dtype=np.float64)
        yield

    @psh.with_init(prepare_edge_worker)
    def find_edges(worker_id, index, train_id, data):
        try:
            data = det_get_traces(data[source_name])
        except KeyError:
            return

        for channel_idx in range(7):
            correct_func(data[channel_idx], traces_corr[channel_idx],
                         baselines, bl_start, bl_stop)

        avg_traces[worker_id] += traces_corr

        pulses_slice = np.s_[pulse_offsets[index]:pulse_offsets[index]+pulse_counts[index]]

        for trigger, pulse_edges, pulse_amplitudes in zip(
            triggers[pulses_slice], edges[pulses_slice], amplitudes[pulses_slice]
        ):
            trigger_slice = np.s_[trigger['start']:trigger['stop']]

            for trace, channel_params, channel_edges, channel_amplitudes in zip(
                traces_corr, discr_params, pulse_edges, pulse_amplitudes
            ):
                discr_func(trace[trigger_slice], edges=channel_edges,
                           amplitudes=channel_amplitudes, **channel_params)

    with timing(f'find_edges, {det_name}'):
        psh.map(find_edges, dc.select(det_sourcekeys))

    if not np.isfinite(edges).any():
        warning(f'No edges found for {det_name}')

    fig, (ux, bx) = plt.subplots(num=110+i, ncols=1, nrows=2, figsize=(9.5, 8), clear=True,
                                 gridspec_kw=dict(left=0.1, right=0.98, top=0.98, bottom=0.1, hspace=0.25))

    fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')

    for edge_idx, edge_name in enumerate(['u1', 'u2', 'v1', 'v2', 'w1', 'w2', 'mcp']):
        ux.hist(finite_flattened_slice(amplitudes, np.s_[:, edge_idx, :]),
                bins=1000, range=(0, 2048), histtype='step', lw=1,
                color=f'C{edge_idx}' if edge_idx < 6 else 'k', label=edge_name)

        cur_edges = finite_flattened_slice(edges, np.s_[:, edge_idx, :])
        bx.hist(cur_edges - np.floor(cur_edges), bins=500, range=(0, 1), histtype='step',
                lw=1, color=f'C{edge_idx}' if edge_idx < 6 else 'k', label=edge_name)

    ux.legend()
    ux.set_title('Pulse height distributions')
    ux.set_xlabel('Pulse height')
    ux.set_yscale('log')
    ux.set_xlim(0, 2048)
-    ux.set_ylim(10, 1.5*ux.get_xlim()[1])
+    ux.set_ylim(10, 1.5*ux.get_ylim()[1])

    bx.set_title('Fractional edge distributions')
    bx.set_xlabel('Edge positions - ⌊edge positions⌋')
    bx.set_yscale('log')
    bx.set_xlim(-0.05, 1.2)
    bx.legend()

    # Properly offset edges to their trigger offset and convert to time.
    # This is not done earlier to preserve the information for plotting.
    edges += triggers['offset'][:, None, None]
    edges *= remi.get_time_calibration()

    det_data[det_name] = {
        'edges': edges,
        'amplitudes': amplitudes,
        'avg_trace': avg_traces.sum(axis=0) / len(dc.train_ids)
    }
 ```

 %% Cell type:markdown id: tags:

 ### Global average of analog signals

 %% Cell type:code id: tags:

 ``` python
 for i, det_name in enumerate(remi['detector'].keys()):
    fig, axs = plt.subplots(num=10+i, nrows=7, figsize=(9.5, 8), clear=True,
                            gridspec_kw=dict(left=0.1, right=0.98, top=0.98, bottom=0.1))
    fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')

    for edge_idx, edge_name in enumerate(['u1', 'u2', 'v1', 'v2', 'w1', 'w2', 'mcp']):
        axs[edge_idx].plot(det_data[det_name]['avg_trace'][edge_idx], lw=1)
        axs[edge_idx].tick_params(labelbottom=False)
        axs[edge_idx].set_ylabel(edge_name)

    axs[-1].tick_params(labelbottom=True)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Sample for found edges

 %% Cell type:code id: tags:

 ``` python
 for i, det_name in enumerate(remi['detector'].keys()):
    edges = det_data[det_name]['edges']

    fig = plt.figure(num=100+i, figsize=(9.5, 8))
    grid = fig.add_gridspec(ncols=2, nrows=4, left=0.1, right=0.98, top=0.98, bottom=0.1)

    fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')

    for signal_idx, signal_name in enumerate(['u1', 'u2', 'v1', 'v2', 'w1', 'w2', 'mcp']):
        row = (1 + signal_idx // 2) if signal_idx < 6 else 0
        col = (signal_idx % 2) if signal_idx < 6 else np.s_[:]
        ax = fig.add_subplot(grid[row, col])

        finite_edges = np.isfinite(edges[:, signal_idx, 0])
        if not finite_edges.any():
            warning(f'No edges found for {det_name}/{signal_name}')
            continue

        pulse_idx = np.uint64(finite_edges.nonzero()[0][0])  # Is combined with other uint64 values below.
        train_idx = (pulse_idx >= pulse_offsets).nonzero()[0][-1]
        trigger = triggers[pulse_idx]

        sourcekey = remi.get_channel_sourcekey(
            remi['detector'][det_name]['channels'][signal_idx])
        train_trace = dc[sourcekey].select_trains(np.s_[train_idx:train_idx+1]).ndarray()[0]
        corr_trace = np.zeros_like(train_trace, dtype=np.float64)

        remi.get_baseline_corrector()(
            train_trace, corr_trace,
            np.empty(remi['digitizer']['baseline_symmetry'], dtype=np.float64),
            *remi.get_baseline_limits(len(train_trace))[:2])

        pulse_trace = corr_trace[np.s_[trigger['start']:trigger['stop']]]

        x_time = remi.get_time_calibration() * (np.arange(len(pulse_trace)) + trigger['offset'])

        ax.plot(x_time, pulse_trace, lw=1)
        ax.set_xlim(x_time[0], x_time[-1])
        ax.set_ylim(-200, pulse_trace.max()*1.1)
        ax.text(x_time[-1], pulse_trace.max(),
                f'T{train_idx} P{pulse_idx - pulse_offsets[train_idx]} ',
                va='top', ha='right')
        ax.tick_params(labelbottom=False)
        ax.set_ylabel(signal_name)

        ax.vlines(edges[pulse_idx, signal_idx, :], *ax.get_ylim(), color='red', linewidth=1)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Digitized channel spectra

 %% Cell type:code id: tags:

 ``` python
 for i, det_name in enumerate(remi['detector'].keys()):
    fig = plt.figure(num=20+i, figsize=(9.5, 6))

    edges = det_data[det_name]['edges']

    min_edge = np.nanmin(edges)
    max_edge = np.nanmax(edges)

    grid = fig.add_gridspec(ncols=3, nrows=3, left=0.08, right=0.98, top=0.95, hspace=0.4)

    fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')

    numx = fig.add_subplot(grid[0, 0])
    numx.set_title('Edges per pulse')

    agg_window = num_pulses // 60
    max_num_edges = 0.0
    max_spectral_intensity = 0
    hist_axs = []

    for edge_idx, edge_name in enumerate(['u1', 'u2', 'v1', 'v2', 'w1', 'w2', 'mcp']):
        if edge_idx < 6:
            row = 1 + edge_idx % 2
            col = edge_idx // 2
        else:
            row = 0
            col = np.s_[1:3]

        ax = fig.add_subplot(grid[row, col])
        ax.set_title(f'TOF spectrum: {edge_name}')

        num_edges = np.isfinite(edges[:, edge_idx, :]).sum(axis=1)
        num_edges = num_edges[:((len(num_edges) // agg_window) * agg_window)]
        num_edges = num_edges.reshape(-1, agg_window).mean(axis=1)

        if (num_edges == 0).all():
            warning(f'No edges found for {det_name}/{edge_name}')
            continue

        if edge_idx < 6:
            plot_kwargs = dict(c=f'C{edge_idx}', ls='solid', lw=1.0)
        else:
            plot_kwargs = dict(c='k', ls='dashed', lw=1.0)

        numx.plot(np.arange(len(num_edges)) * agg_window, num_edges, label=edge_name, **plot_kwargs)
        max_num_edges = max(max_num_edges, num_edges.max())

        y, _, _ = ax.hist(finite_flattened_slice(edges, np.s_[:, edge_idx, :]),
                          bins=int((max_edge - min_edge) // 5), range=(min_edge, max_edge),
                          color=plot_kwargs['c'], histtype='step', linewidth=1)
        hist_axs.append(ax)

        max_spectral_intensity = max(max_spectral_intensity, y.max())

    numx.tick_params(labelbottom=False)
    numx.set_ylim(0, 1.2*max_num_edges)

    for ax in hist_axs:
        ax.set_ylim(0, max_spectral_intensity*1.1)
        ax.ticklabel_format(axis='y', style='sci', scilimits=(0, 3))
 pass
 ```

 %% Cell type:markdown id: tags:

 # Detector diagnostics

 %% Cell type:code id: tags:

 ``` python
 for i, det_name in enumerate(remi['detector'].keys()):
    edges = det_data[det_name]['edges']

    sort = remi.get_dld_sorter(det_name)

    sum_shifts = sort.sum_shifts if sort.sum_shifts != (0.0, 0.0, 0.0) else None

    is_valid = remi.get_presort_mask(edges, edge_idx=0, w=not quad_anode,
                                     sum_limit=max(sort.uncorrected_time_sum_half_widths),
                                     sum_shifts=sum_shifts)

    if not is_valid.any():
        warning(f'No valid preliminary edge combinations found for {det_name}')

    signals, sums = remi.get_signals_and_sums(edges, indices=sort.channel_indices, sum_shifts=sum_shifts,
                                              mask=is_valid)
    fig = plot_detector_diagnostics(signals=signals, sums=sums, fig_num=30+i, im_scale=1.5,
                                    sum_range=max(sort.uncorrected_time_sum_half_widths),
                                    sorter=sort)
    fig.text(0.02, 0.98, det_name.upper() + ' before corrections', rotation=90, ha='left', va='top', size='x-large')

    if remi['detector'][det_name]['use_sum_correction'] or remi['detector'][det_name]['use_pos_correction']:
        n_masked = is_valid.sum()
        signals = np.full((n_masked, 3), np.nan, dtype=np.float64)
        sums = np.full((n_masked, 3), np.nan, dtype=np.float64)

        sort.correct(edges[is_valid], signals, sums)
        fig = plot_detector_diagnostics(signals=signals, sums=sums, fig_num=40+i, im_scale=1.5,
                                        sum_range=max(sort.uncorrected_time_sum_half_widths),
                                        sorter=sort)
        fig.text(0.02, 0.98, det_name.upper() + ' after corrections', rotation=90, ha='left', va='top', size='x-large')
 pass
 ```

 %% Cell type:markdown id: tags:

 # Hit reconstruction

 %% Cell type:code id: tags:

 ``` python
 psh.set_default_context('processes', num_workers=remi.get_num_workers(mp_rec_hits))

 for det_name, det in remi['detector'].items():
    edges = det_data[det_name]['edges']

    signals = psh.alloc(shape=(num_pulses, 50), dtype=signal_dt, fill=np.nan)
    hits = psh.alloc(shape=(num_pulses, 50), dtype=hit_dt, fill=(np.nan, np.nan, np.nan, -1))
    hit_counts = psh.alloc(shape=len(dc.train_ids), dtype=np.uint32)

    def prepare_hit_worker(worker_id):
        sort = remi.get_dld_sorter(det_name)
        yield

    @psh.with_init(prepare_hit_worker)
    def reconstruct_hits(worker_id, index, train_id):
        hit_counts[index] += sort.run_on_train(
            edges, signals, hits, pulse_offsets[index], pulse_offsets[index] + pulse_counts[index])

    with timing(f'rec_hits, {det_name}'):
        psh.map(reconstruct_hits, dc.train_ids)

    det_data[det_name].update(signals=signals, hits=hits, hit_counts=hit_counts)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(num=50+i, figsize=(9.5, 4), ncols=1, clear=True,
                       gridspec_kw=dict(top=0.92, right=0.98, left=0.05, bottom=0.12))

 max_num_hits = 0.0

 for det_name in remi['detector'].keys():
    agg_window = num_pulses // min(1000, num_pulses)

    num_hits = np.isfinite(det_data[det_name]['hits']['x']).sum(axis=1)
    num_hits = num_hits[:(len(num_hits) // agg_window) * agg_window]
    num_hits = num_hits.reshape(-1, agg_window).mean(axis=1)
    max_num_hits = max(max_num_hits, num_hits.max())

    ax.plot(np.arange(0, (num_pulses // agg_window) * agg_window, agg_window), num_hits,
            lw=1, label=det_name.upper())

 ax.set_title('Hits per pulse')
 ax.set_xlabel('Pulse index')
 ax.set_ylim(0, max_num_hits*1.1)
 ax.legend()
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Reconstruction methods
 Each hit may be reconstructed by one of 19 different methods. These differ by the number of real signals across the channels, which could be combined to form the hit. Each of these methods is designed by a number between `0` and `19` (with empty hits using `-1`), which can be found in the `m` key of a hit, e.g.:

 * `0`: All six anode signals and the corresponding MCP signal were found.
 * `4`: One signal on layer `u` is missing, all other signals for this event were found.
 * `18`: Only one anode signal on each layer was found and the MCP signal is missing. There is no way to check whether this combination of signals is actually valid.

 | Method | `u+v+w +mcp` |
 | - | - |
 | 0 | `2+2+2 +1` |
 | 1 | `0+2+2 +1` |
 | 2 | `2+0+2 +1` |
 | 3 | `2+2+0 +1` |
 | 4 | `1+2+2 +1` (2 permutations) |
 | 5 | `2+1+2 +1` (2 permutations) |
 | 6 | `2+2+1 +1` (2 permutations) |
 | 7 | `2+2+2 +0` |
 | 8 | `0+2+2 +0` |
 | 9 | `2+0+2 +0` |
 | 10 | `2+2+0 +0` |
 | 11 | `1+2+2 +0` (2 permutations) |
 | 12 | `2+1+2 +0` (2 permutations) |
 | 13 | `2+2+1 +0` (2 permutations) |
 | 14 | `2+1+1 +1` `1+2+1 +1` `1+1+2 +1` (12 permutations) |
 | 15 | `2+1+0 +1` `2+0+1 +1` `1+2+0 +1` `1+0+2 +1` `0+2+1 +1` `0+1+2 +1` (12 permutations) |
 | 16 | `1+1+1 +1` (8 permutations) |
 | 17 | `2+1+1 +0` `1+2+1 +0` `1+1+2 +0` (12 permutations) |
 | 18 | `1+1+1 +0` (8 permutations) |
 | 19 | `2+1+0 +0` `2+0+1 +0` `1+2+0 +0` `1+0+2 +0` `0+1+2 +0` `0+2+1 +0` (12 permutations) |

 * For hits reconstructed with method `> 10`, extra attention should be given to ensure they add meaningful signal.
 * Any method `> 14` has to considered risky, because neither a time sum nor the position can be checked. If the scale factors and/or `w` shift are not correct, then the number of events reconstructed with the risky methods will increase. They will most likely be *ghost hits*, which do not correspond to actual impacts on the detector.

 %% Cell type:code id: tags:

 ``` python
 for i, det_name in enumerate(remi['detector'].keys()):
    hits = det_data[det_name]['hits']

    fig, ax = plt.subplots(num=60+i, figsize=(9.5, 5), ncols=1, clear=True,
                           gridspec_kw=dict(left=0.08, right=0.91, top=0.8))

    fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')

    if not (hits['m'] >= 0).any():
        warning(f'No hits found for {det_name}')
        continue

    method_bins = np.bincount(hits['m'][hits['m'] >= 0], minlength=20)
    ax.bar(np.arange(20), method_bins, width=0.5)

    ax.set_xlabel('Reconstruction method')
    ax.set_xlim(-0.5, 19.5)
    ax.set_xticks(np.arange(20))

    ax.set_ylabel('Number of hits')
    ax.set_ylim(0, method_bins.max()*1.05)
    ylims = ax.get_ylim()

    ax.tick_params(which='both', right=True, labelright=True)

    num_risky = method_bins[15:].sum()
    num_total = method_bins.sum()

    ax.text(14.2, method_bins.max(), f'{(100*(num_total-num_risky)/num_total):.2g}%',
            va='top', ha='right', color='black')
    ax.text(14.8, method_bins.max(), f'{(100*num_risky/num_total):.2g}%',
            va='top', ha='left', color='red')

    ax.fill([14.5, 19.5, 19.5, 14.5], [ylims[0], ylims[0], ylims[1], ylims[1]], c='r', alpha=0.2)

    labelx = ax.twiny()
    labelx.set_xlim(*ax.get_xlim())
    labelx.set_xticks(ax.get_xticks())
    labelx.set_xticklabels([
        '2+2+2 +1',
        '0+2+2 +1', '2+0+2 +1', '2+2+0 +1',
        '1+2+2 +1', '2+1+2 +1', '2+2+1 +1',
        '2+2+2 +0',
        '0+2+2 +0', '2+0+2 +0', '2+2+0 +0', '1+2+2 +0', '2+1+2 +0', '2+2+1 +0',
        '2+1+1 +1',
        '2+1+0 +1',
        '1+1+1 +1',
        '2+1+1 +0',
        '1+1+1 +0',
        '2+1+0 +0',
    ], rotation=90)

    min_rel_tick = np.ceil((ax.get_ylim()[0] / num_total) / 0.1) * 0.1
    max_rel_tick = np.floor((method_bins.max() / num_total) / 0.1) * 0.1

    rely = ax.twinx()
    rely.set_ylim(*ax.get_ylim())
    rely.set_yticks(np.arange(0.0, max_rel_tick+0.01, 0.1)*num_total)
    rely.set_yticks(np.arange(0.0, ylims[1]/num_total, 0.02)*num_total, minor=True)
    rely.set_yticklabels([f'{(y/num_total)*100:.0f}%' for y in rely.get_yticks()])
    rely.set_ylabel('Percentage of total hits')
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Detector image and fishes

 %% Cell type:code id: tags:

 ``` python
 for i, det_name in enumerate(remi['detector'].keys()):
    flat_hits = det_data[det_name]['hits'].reshape(-1)
    flat_hits = flat_hits[np.isfinite(flat_hits[:]['x'])]
    flat_hits = flat_hits[flat_hits['m'] <= 10]

    fig = plt.figure(num=70+i, figsize=(9, 13.5))

    fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')
    fig.text(0.02, 0.02, det_name.upper(), rotation=90, ha='left', va='bottom', size='x-large')

    imp = fig.add_axes([0.1 + 0.25/2, 0.56, 0.6, 0.4])
    txp = fig.add_axes([0.1, 0.28, 0.85, 0.22])
    typ = fig.add_axes([0.1, 0.04, 0.85, 0.22])

    if flat_hits.size == 0:
        warning(f'No hits found for {det_name}')
        continue

    im_radius = remi['detector'][det_name]['mcp_radius']*1.1

    imp.hist2d(flat_hits['x'], flat_hits['y'], bins=(256, 256),
               range=[[-im_radius, im_radius], [-im_radius, im_radius]], norm=LogNorm())
    imp.xaxis.set_label_position('top')
    imp.set_xlabel('X / mm')
    imp.set_ylabel('Y / mm')
    imp.tick_params(right=True, labelright=True, top=True, labeltop=True)
    imp.grid()

    min_tof = flat_hits['t'].min()
    max_tof = flat_hits['t'].max()

    num_tof_bins = int((max_tof - min_tof) // 5)

    if num_tof_bins == 0:
        warning(f'All TOFs limited to single bin for {det_name}')
        continue

    for ax, dim_label in zip([txp, typ], ['x', 'y']):
        ax.hist2d(flat_hits['t'], flat_hits[dim_label], bins=(num_tof_bins, 256),
                   range=[[min_tof, max_tof], [-im_radius, im_radius]], norm=LogNorm())
        ax.set_ylabel(f'{dim_label.upper()} / mm')

    typ.set_xlabel('Time-of-flight / ns')
    txp.tick_params(bottom=True, labelbottom=False, top=True, labeltop=True, right=True, labelright=True)
    typ.tick_params(right=True, labelright=True, top=True)
 pass
 ```

 %% Cell type:markdown id: tags:

 # Transformed data files

 %% Cell type:code id: tags:

 ``` python
 # Try to figure out proposal number from in_folder to work with older files.
 m = re.match(r'p(\d{6})', Path(in_folder).parts[-2])
 if not proposal and m is not None:
    proposal = int(m[1])

 seq_len = out_seq_len if out_seq_len > 0 else len(dc.files[0].train_ids)
 dataset_kwargs = {k[8:]: v for k, v in locals().items() if k.startswith('dataset_compression')}

 control_sources = [det_device_id.format(karabo_id=karabo_id, det_name=det_name.upper())
                   for det_name in remi['detector']]

 channels = []
 if save_raw_triggers or save_raw_edges:
    channels.append('raw')
 if save_rec_signals or save_rec_hits:
    channels.append('rec')

 instrument_channels = [
    f'{device_id}:{det_output_key}/{channel}'
    for device_id in control_sources
    for channel in channels
 ]
 ```

 %% Cell type:code id: tags:

 ``` python
 Path(out_folder).mkdir(parents=True, exist_ok=True)
 print('Writing sequence files', flush=True, end='')

 t_write = timing('write_files')
 t_write.__enter__()

 for seq_id, train_mask, pulse_mask in sequence_pulses(dc.train_ids, pulse_counts, pulse_offsets, seq_len):
    seq_train_ids = dc.train_ids[train_mask]

    with DataFile.from_details(out_folder, out_aggregator, run, seq_id) as outp:
        outp.create_metadata(like=dc, proposal=proposal, run=run, sequence=seq_id,
                             control_sources=control_sources, instrument_channels=instrument_channels)
        outp.create_index(
            seq_train_ids,
            timestamps=dc.select_trains(by_id[seq_train_ids]).train_timestamps().astype(np.uint64)
        )

        for det_name in remi['detector']:
            cur_device_id = det_device_id.format(karabo_id=karabo_id, det_name=det_name.upper())
            cur_max_hits = remi['detector'][det_name]['max_hits']

            cur_control_data = outp.create_control_source(cur_device_id)
            # Manually manipulate the file here, still creates the index properly.
            remi.attach_detector_config(det_name, cur_control_data.get_run_group())
            cur_control_data.create_index(len(seq_train_ids))

            cur_fast_data = outp.create_instrument_source(f'{cur_device_id}:{det_output_key}')

            cur_data = det_data[det_name]

            if save_raw_triggers:
                cur_fast_data.create_key('raw.triggers', triggers[pulse_mask],
                                         maxshape=(None,) + triggers.shape[1:],
                                         chunks=tuple(chunks_triggers), **dataset_kwargs)

            if save_raw_edges:
                cur_fast_data.create_key('raw.edges', cur_data['edges'][pulse_mask],
                                         maxshape=(None,) + cur_data['edges'].shape[1:],
                                         chunks=tuple(chunks_edges if chunks_edges[-1] <= cur_max_hits
                                                      else chunks_edges[:-1] + [cur_max_hits]),
                                         **dataset_kwargs)

            if save_raw_amplitudes:
                cur_fast_data.create_key('raw.amplitudes', cur_data['amplitudes'][pulse_mask],
                                         maxshape=(None,) + cur_data['amplitudes'].shape[1:],
                                         chunks=tuple(chunks_amplitudes if chunks_amplitudes[-1] <= cur_max_hits
                                                      else chunks_amplitudes[:-1] + [cur_max_hits]),
                                         **dataset_kwargs)

            if save_rec_signals:
                cur_fast_data.create_key('rec.signals', cur_data['signals'][pulse_mask],
                                         maxshape=(None,) + cur_data['signals'].shape[1:],
                                         chunks=tuple(chunks_signals if chunks_signals[-1] <= cur_max_hits
                                                      else chunks_signals[:-1] + [cur_max_hits]),
                                         **dataset_kwargs)

            if save_rec_hits:
                cur_fast_data.create_key('rec.hits', cur_data['hits'][pulse_mask],
                                         maxshape=(None,) + hits.shape[1:],
                                         chunks=tuple(chunks_hits if chunks_hits[-1] <= cur_max_hits
                                                      else chunks_hits[:-1] + [cur_max_hits]),
                                         **dataset_kwargs)

            cur_fast_data.create_index(raw=pulse_counts[train_mask], rec=pulse_counts[train_mask])

    print('.', flush=True, end='')

 print('')
 t_write.__exit__()
 ```

--- a/notebooks/ePix100/Characterize_FlatFields_ePix100_NBC.ipynb
+++ b/notebooks/ePix100/Characterize_FlatFields_ePix100_NBC.ipynb
--- a/notebooks/generic/overallmodules_Darks_Summary_NBC.ipynb
+++ b/notebooks/generic/overallmodules_Darks_Summary_NBC.ipynb
 %% Cell type:code id: tags:

 ``` python
 # Author: European XFEL Detector Group, Version: 1.0

 #  Summary for processed of dark calibration constants and a comparison with previous injected constants.

 out_folder = "/gpfs/exfel/data/scratch/kluyvert/lpd-dark-p900320-r26_27_28" # path to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 karabo_id = "FXE_DET_LPD1M-1" # detector instance
 gain_names = ['High gain', 'Medium gain', 'Low gain'] # a list of gain names to be used in plotting
 threshold_names = ['HG-MG threshold', 'MG_LG threshold'] # a list of gain names to be used in plotting
 local_output = True  # Boolean indicating that local constants were stored in the out_folder

 # Skip the whole notebook if local_output is false in the preceding notebooks.
 if not local_output:
    print('No local constants saved. Skipping summary plots')
    import sys
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
-import copy
-import os
 import warnings
-from collections import OrderedDict
 from pathlib import Path

 warnings.filterwarnings('ignore')

-import glob
-
 import h5py
 import matplotlib
 import numpy as np
 import pasha as psh
 import yaml
 from IPython.display import Latex, Markdown, display

 matplotlib.use("agg")
 import matplotlib.gridspec as gridspec
 import matplotlib.pyplot as plt

 %matplotlib inline
 import extra_geom
 import tabulate
 from cal_tools.ana_tools import get_range
+from cal_tools.enums import BadPixels
 from cal_tools.plotting import show_processed_modules
 from cal_tools.tools import CalibrationMetadata, module_index_to_qm
 from XFELDetAna.plotting.simpleplot import simplePlot
 ```

 %% Cell type:code id: tags:

 ``` python
+def bp_entry(bp):
+    return [f"{bp.name:<30s}", f"{bp.value:032b}", f"{int(bp.value)}"]
+```
+
+%% Cell type:code id: tags:
+
+``` python
 if "AGIPD" in karabo_id:
    if "SPB" in karabo_id:
        dinstance = "AGIPD1M1"
        nmods = 16
    elif "MID" in karabo_id:
        dinstance = "AGIPD1M2"
        nmods = 16
    elif "HED" in karabo_id:
        dinstance = "AGIPD500K"
        nmods = 8
    # This list needs to be in that order as later Adaptive or fixed gain is
    # decided based on the condition for the Offset constant.
    expected_constants = ['Offset', 'Noise', 'ThresholdsDark', 'BadPixelsDark']
-    display(Markdown("""

-# Summary of AGIPD dark characterization #
+    table = []
+    badpixels = [
+        BadPixels.OFFSET_OUT_OF_THRESHOLD,
+        BadPixels.NOISE_OUT_OF_THRESHOLD,
+        BadPixels.OFFSET_NOISE_EVAL_ERROR,
+        BadPixels.GAIN_THRESHOLDING_ERROR,
+    ]
+    for bp in badpixels:
+        table.append(bp_entry(bp))

-The following report shows a set of dark images taken with the AGIPD detector to deduce detector offsets, noise, bad-pixel maps and thresholding. All four types of constants are evaluated per-pixel and per-memory cell.
+    display(Markdown("""
+# Summary of AGIPD dark characterization #

+The following report shows a set of dark images taken with the AGIPD detector to deduce detector offsets,
+noise, bad-pixel maps and thresholding. All four types of constants are evaluated per-pixel and per-memory cell.

-**The offset** ($O$) is defined as the median ($M$) of the dark signal ($Ds$) over trains ($t$) for a given pixel ($x,y$) and memory cell ($c$).
+**The offset** ($O$) is defined as the median ($M$) of the dark signal ($Ds$) over trains ($t$) for a given pixel
+($x,y$) and memory cell ($c$).

 **The noise** $N$ is the standard deviation $\sigma$ of the dark signal.

 $$ O_{x,y,c} = M(Ds)_{t} ,\,\,\,\,\,\, N_{x,y,c} = \sigma(Ds)_{t}$$

-**The bad pixel** mask is encoded as a bit mask.
+**The bad pixel** mask is encoded as a bit mask."""))

+    display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["Name", "bit value", "integer value"])))
+    display(Markdown("""
 **"OFFSET_OUT_OF_THRESHOLD":**

 Offset outside of bounds:

 $$M(O)_{x,y} - \sigma(O)_{x,y} * \mathrm{thresholds\_offset\_sigma} < O < M(O)_{x,y} + \sigma(O)_{x,y} * \mathrm{thresholds\_offset\_sigma} $$

 or offset outside of hard limits

 $$ \mathrm{thresholds\_offset\_hard}_\mathrm{low} < O < \mathrm{thresholds\_offset\_hard}_\mathrm{high} $$

 **"NOISE_OUT_OF_THRESHOLD":**

 Noise outside of bounds:

 $$M(N)_{x,y} - \sigma(N)_{x,y} * \mathrm{thresholds\_noise\_sigma} < N < M(N)_{x,y} + \sigma(N)_{x,y} * \mathrm{thresholds\_noise\_sigma} $$

 or noise outside of hard limits

 $$\mathrm{thresholds\_noise\_hard}_\mathrm{low} < N < \mathrm{thresholds\_noise\_hard}_\mathrm{high} $$

 **"OFFSET_NOISE_EVAL_ERROR":**

 Offset and Noise both not $nan$ values

 Values: $\mathrm{thresholds\_offset\_sigma}$, $\mathrm{thresholds\_offset\_hard}$, $\mathrm{thresholds\_noise\_sigma}$, $\mathrm{thresholds\_noise\_hard}$ are given as parameters.

-"**\"GAIN_THRESHOLDING_ERROR\":**
+**"GAIN_THRESHOLDING_ERROR":**

 Bad gain separated pixels with sigma separation less than gain_separation_sigma_threshold

 $$ sigma\_separation = \\frac{\mathrm{gain\_offset} - \mathrm{previous\_gain\_offset}}{\sqrt{\mathrm{gain\_offset_{std}}^\mathrm{2} + \mathrm{previuos\_gain\_offset_{std}}^\mathrm{2}}}$$
 $$ Bad\_separation = sigma\_separation < \mathrm{gain\_separation\_sigma\_threshold} $$

 """))

+
 elif "LPD" in karabo_id:
    dinstance = "LPD1M1"
    nmods = 16
    expected_constants = ['Offset', 'Noise', 'BadPixelsDark']
+    table = []
+    badpixels = [
+        BadPixels.OFFSET_OUT_OF_THRESHOLD,
+        BadPixels.NOISE_OUT_OF_THRESHOLD,
+        BadPixels.OFFSET_NOISE_EVAL_ERROR,
+    ]
+    for bp in badpixels:
+        table.append(bp_entry(bp))
    display(Markdown("""

 # Summary of LPD dark characterization #

 The following report shows a set of dark images taken with the LPD detector to deduce detector offsets, noise, bad-pixel maps. All three types of constants are evaluated per-pixel and per-memory cell.

 **The offset** ($O$) is defined as the median ($M$) of the dark signal ($Ds$) over trains ($t$) for a given pixel ($x,y$) and memory cell ($c$).

 **The noise** $N$ is the standard deviation $\sigma$ of the dark signal.

 $$ O_{x,y,c} = M(Ds)_{t} ,\,\,\,\,\,\, N_{x,y,c} = \sigma(Ds)_{t}$$

-**The bad pixel** mask is encoded as a bit mask.
+**The bad pixel** mask is encoded as a bit mask."""))
+    display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["Name", "bit value", "integer value"])))
+    display(Markdown("""

 **"OFFSET_OUT_OF_THRESHOLD":**

 Offset outside of bounds:

 $$M(O)_{x,y} - \sigma(O)_{x,y} * \mathrm{thresholds\_offset\_sigma} < O < M(O)_{x,y} + \sigma(O)_{x,y} * \mathrm{thresholds\_offset\_sigma} $$

 or offset outside of hard limits

 $$ \mathrm{thresholds\_offset\_hard}_\mathrm{low} < O < \mathrm{thresholds\_offset\_hard}_\mathrm{high} $$

 **"NOISE_OUT_OF_THRESHOLD":**

 Noise outside of bounds:

 $$M(N)_{x,y} - \sigma(N)_{x,y} * \mathrm{thresholds\_noise\_sigma} < N < M(N)_{x,y} + \sigma(N)_{x,y} * \mathrm{thresholds\_noise\_sigma} $$

 or noise outside of hard limits

 $$\mathrm{thresholds\_noise\_hard}_\mathrm{low} < N < \mathrm{thresholds\_noise\_hard}_\mathrm{high} $$

 **"OFFSET_NOISE_EVAL_ERROR":**

 Offset and Noise both not $nan$ values

 "Values: $\\mathrm{thresholds\\_offset\\_sigma}$, $\\mathrm{thresholds\\_offset\\_hard}$, $\\mathrm{thresholds\\_noise\\_sigma}$, $\\mathrm{thresholds\\_noise\\_hard}$ are given as parameters.\n",
 """))
 elif "DSSC" in karabo_id:
    dinstance = "DSSC1M1"
    nmods = 16
    expected_constants = ['Offset', 'Noise']
    display(Markdown("""

 # Summary of DSSC dark characterization #

    """))
 ```

 %% Cell type:code id: tags:

 ``` python
 out_folder = Path(out_folder)
 metadata = CalibrationMetadata(metadata_folder or out_folder)
 mod_mapping = metadata.setdefault("modules-mapping", {})
 old_constant_metadata = {}
 for fn in out_folder.glob("module_metadata_*.yml"):
    with fn.open("r") as fd:
        fdict = yaml.safe_load(fd)
    module = fdict["module"]
    mod_mapping[module] = fdict["pdu"]
    old_constant_metadata[module] = fdict["old-constants"]

 metadata.save()
 ```

 %% Cell type:code id: tags:

 ``` python
 # In AGIPD fixed gain mode, ThresholdsDark is not expected
 if 'AGIPD' in karabo_id:
    for i in range(nmods):
        qm = module_index_to_qm(i)
        if not mod_mapping.get(qm):
            continue
        mod_pdu = mod_mapping[qm]
        fpath = out_folder / f"const_Offset_{mod_pdu}.h5"
        if not fpath.exists():
            continue

        with h5py.File(fpath, 'r') as f:
            if 'Gain mode' in f['condition']:
                if f["condition"]["Gain mode"]["value"][()]:
                    expected_constants.remove("ThresholdsDark")
        break
 ```

 %% Cell type:markdown id: tags:

 Preparing newly injected and previous constants from produced local folder in out_folder.

 %% Cell type:code id: tags:

 ``` python
 # Get shape, dtype, and number of files for each constant.
 # Also build lists of the files involved, to be loaded in parallel in a later cell.
 const_shape_and_dtype = {}
 found_module_nums = set()
 pieces_to_load = []
 pieces_to_load_prev = []

 for cname in expected_constants:
    for i in range(nmods):
        qm = module_index_to_qm(i)
        if not mod_mapping.get(qm):
            continue
        mod_pdu = mod_mapping[qm]
        fpath = out_folder / f"const_{cname}_{mod_pdu}.h5"
        if not fpath.exists():
            continue

        pieces_to_load.append((cname, i, fpath))
        found_module_nums.add(i)

        # try finding old constants using paths from CalCat store
        if qm not in old_constant_metadata:
            continue
        qm_mdata = old_constant_metadata[qm]

        if cname not in qm_mdata:
            continue

        fpath_prev = qm_mdata[cname]["filepath"]
        h5path_prev = qm_mdata[cname]["h5path"]

        if fpath_prev and h5path_prev:
            pieces_to_load_prev.append((cname, i, fpath_prev, h5path_prev))

    # Get the constant shape from one of the module files
    with h5py.File(fpath, 'r') as f:
        const_shape_and_dtype[cname] = (f['data'].shape, f['data'].dtype)

 # Allocate arrays for these constants (without space for missing modules)
 nmods_found = len(found_module_nums)
 constants = {
    cname: psh.alloc((nmods_found,) + module_const_shape, dtype=dt, fill=0)
    for cname, (module_const_shape, dt) in const_shape_and_dtype.items()
 }
 prev_const = {
    cname: psh.alloc((nmods_found,) + module_const_shape, dtype=dt, fill=0)
    for cname, (module_const_shape, dt) in const_shape_and_dtype.items()
 }
 ```

 %% Cell type:code id: tags:

 ``` python
 # Load the constant data in parallel
 found_module_nums = sorted(found_module_nums)
 mod_names = [module_index_to_qm(n) for n in found_module_nums]

 def load_piece(wid, ix, entry):
    cname, mod_no, fpath = entry
    mod_ix = found_module_nums.index(mod_no)

    with h5py.File(fpath, 'r') as f:
        f['data'].read_direct(constants[cname][mod_ix])

 psh.map(load_piece, pieces_to_load)
 print(f"Loaded constant data from {len(pieces_to_load)} files")

 def load_piece_prev(wid, ix, entry):
    cname, mod_no, fpath, h5path = entry
    mod_ix = found_module_nums.index(mod_no)

    with h5py.File(fpath, 'r') as f:
        f[h5path]['data'].read_direct(prev_const[cname][mod_ix])

 psh.map(load_piece_prev, pieces_to_load_prev)
 print(f"Loaded previous constant data from {len(pieces_to_load_prev)} files")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('## Processed modules'))
 show_processed_modules(dinstance, constants, mod_names, mode="processed")
 ```

 %% Cell type:markdown id: tags:

 ## Summary figures across Modules ##

 The following plots give an overview of calibration constants averaged across pixels and memory cells. A bad pixel mask is applied.

 %% Cell type:code id: tags:

 ``` python
 if "LPD" in dinstance:
    geom = extra_geom.LPD_1MGeometry.from_quad_positions(quad_pos=[(11.4, 299),
                                                                   (-11.5, 8),
                                                                   (254.5, -16),
                                                                   (278.5, 275)])
    module_shape = (256, 256)

 elif dinstance in ('AGIPD1M1', 'AGIPD1M2'):
    geom = extra_geom.AGIPD_1MGeometry.from_quad_positions(quad_pos=[(-525, 625),
                                                                     (-550, -10),
                                                                     (520, -160),
                                                                     (542.5, 475)])
    module_shape = (512, 128)

 elif dinstance == "AGIPD500K":
    geom = extra_geom.AGIPD_500K2GGeometry.from_origin()
    module_shape = (512, 128)

 elif "DSSC" in dinstance:
    module_shape = (128, 512)
    quadpos = [(-130, 5), (-130, -125), (5, -125), (5, 5)]
    geom = extra_geom.DSSC_1MGeometry.from_quad_positions(quadpos)
 ```

 %% Cell type:code id: tags:

 ``` python
 def plot_const_and_delta(const, delta, const_name, gain_name):
    gs = gridspec.GridSpec(2, 2)
    fig = plt.figure(figsize=(24, 32))

    ax0 = fig.add_subplot(gs[0, :])
    vmin, vmax = get_range(const, 2)
    geom.plot_data_fast(
        const, vmin=vmin, vmax=vmax, ax=ax0, colorbar={
            'shrink': 0.9, 'pad': 0.05, 'label': 'ADUs'
    })
    ax0.set_title(f"{const_name} - {gain_name}", fontsize=15)

    if np.count_nonzero(delta) == np.count_nonzero(np.isnan(delta)):
        fig.text(0.5, 0.4, "No difference from previous constant",
                 ha='center', va='center', fontsize=15)
        return

    # Plot delta from previous constant
    ax1 = fig.add_subplot(gs[1, 0])
    vmin, vmax = get_range(delta, 2)
    vmax = max(vmax, abs(vmin))  # Center around zero
    geom.plot_data_fast(
        delta, vmin=-vmax, vmax=vmax, ax=ax1, cmap="RdBu", colorbar={
            'shrink': 0.6, 'pad': 0.1, 'label': 'ADUs'
    })
    ax1.set_title(f"Difference with previous {const_name} - {gain_name}", fontsize=15)

    # Plot % delta from previous constant
    delta_pct = delta / const * 100
    ax2 = fig.add_subplot(gs[1, 1])
    vmin, vmax = get_range(delta_pct, 2)
    vmax = max(vmax, abs(vmin))  # Center around zero
    geom.plot_data_fast(
        delta_pct, vmin=-vmax, vmax=vmax, ax=ax2, cmap="RdBu", colorbar={
            'shrink': 0.6, 'pad': 0.1, 'label': '%'
    })
    ax2.set_title("Percentage difference", fontsize=15)
 ```

 %% Cell type:code id: tags:

 ``` python
 psh_ctx = psh.ProcessContext(nmods)
 ```

 %% Cell type:code id: tags:

 ``` python
 gainstages = 1

 for const_name, const in constants.items():
    if const_name == 'BadPixelsDark':
        continue
    # Check if constant gain available in constant e.g. AGIPD, LPD
    if len(const.shape) == 5:
        gainstages = 3
    else:
        gainstages = 1

    display(Markdown(f'##### {const_name}'))
    print_once = True
    for gain in range(gainstages):
        if const_name == 'ThresholdsDark':
            if gain > 1:
                continue
            glabel = threshold_names[gain]
        else:
            glabel = gain_names[gain]

        stacked_const = psh_ctx.alloc((nmods,) + module_shape, dtype=np.float64, fill=0)
        stacked_delta = psh_ctx.alloc((nmods,) + module_shape, dtype=np.float64, fill=0)

        def average_module(wid, i, _):
            qm = module_index_to_qm(i)
            if qm in mod_names:
                m_idx = mod_names.index(qm)
                # Check if constant shape of 5 indices e.g. AGIPD, LPD
                if const.ndim == 5:
                    values = np.nanmean(const[m_idx, :, :, :, gain], axis=2)
                    prev_val = np.nanmean(prev_const[const_name][m_idx, :, :, :, gain], axis=2)
                else:
                    values = np.nanmean(const[m_idx, :, :, :], axis=2)
                    prev_val = np.nanmean(prev_const[const_name][m_idx, :, :, :], axis=2)
                values[values == 0] = np.nan
                prev_val[prev_val == 0] = np.nan
                stacked_const[i] = np.moveaxis(values, 0, -1)
                stacked_delta[i] = np.moveaxis(values - prev_val, 0, -1)
            else:
                # if module not available fill space with nan
                stacked_const[i] = np.nan

        psh_ctx.map(average_module, range(nmods))

        # Plotting constant overall modules.
        display(Markdown(f'###### {glabel} ######'))

        plot_const_and_delta(stacked_const, stacked_delta, const_name, glabel)

        plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Loop over modules and constants
 for const_name, const in constants.items():
    if const_name == 'BadPixelsDark':
        continue  # Displayed separately below

    display(Markdown(f'### Summary across Modules - {const_name}'))

    for gain in range(gainstages):
        if const_name == 'ThresholdsDark':
            if gain == 2:
                continue
            glabel = threshold_names[gain]
        else:
            glabel = gain_names[gain]

        if const.ndim == 5:
            data = const[:, :, :, :, gain]
        else:
            data = const

        # Bad pixels are per gain stage, and you need thresholds to pick the gain
        # stage, so we don't mask the thresholds.
        if ('BadPixelsDark' in constants) and (const_name != 'ThresholdsDark'):
            label = f'{const_name} value [ADU], good pixels only'
            if const.ndim == 5:
                goodpix = constants['BadPixelsDark'][:, :, :, :, gain] == 0
            else:
                goodpix = constants['BadPixelsDark'] == 0
        else:
            label = f'{const_name} value [ADU], good and bad pixels'
            goodpix = [True] * data.shape[0]

        # Reduce data in parallel (one worker per module):
        datamean = psh_ctx.alloc(data.shape[:1] + data.shape[3:], data.dtype)
        datastd = psh_ctx.alloc(data.shape[:1] + data.shape[3:], data.dtype)

        def average_mem_cells(wid, i, _):
            datamean[i] = np.mean(data[i], axis=(0, 1), where=goodpix[i])
            datastd[i] = np.std(data[i], axis=(0, 1), where=goodpix[i])
        psh_ctx.map(average_mem_cells, range(data.shape[0]))

        fig = plt.figure(figsize=(15, 6), tight_layout={
                         'pad': 0.2, 'w_pad': 1.3, 'h_pad': 1.3})
        ax = fig.add_subplot(121)

        d = []
        for im, mod in enumerate(datamean):
            d.append({'x': np.arange(mod.shape[0]),
                      'y': mod,
                      'drawstyle': 'steps-pre',
                      'label': mod_names[im],
                      })

        _ = simplePlot(d, figsize=(10, 10), xrange=(-12, 510),
                            x_label='Memory Cell ID',
                            y_label=label,
                            use_axis=ax,
                            title=glabel,
                            title_position=[0.5, 1.18],
                            legend='outside-top-ncol6-frame', legend_size='18%',
                            legend_pad=0.00)

        # Plot standard deviation
        ax = fig.add_subplot(122)
        if "BadPixelsDark" in constants.keys():
            label = f'$\sigma$ {const_name} [ADU], good pixels only'
        else:
            label = f'$\sigma$ {const_name} [ADU], good and bad pixels'
        d = []
        for im, mod in enumerate(datastd):
            d.append({'x': np.arange(mod.shape[0]),
                      'y': mod,
                      'drawstyle': 'steps-pre',
                      'label': mod_names[im],
                      })

        _ = simplePlot(d, figsize=(10, 10), xrange=(-12, 510),
                            x_label='Memory Cell ID',
                            y_label=label,
                            use_axis=ax,
                            title=f'{glabel} $\sigma$',
                            title_position=[0.5, 1.18],
                            legend='outside-top-ncol6-frame', legend_size='18%',
                            legend_pad=0.00)

        plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 if 'BadPixelsDark' in constants:
    display(Markdown(f'### Summary across Modules - BadPixelsDark'))

    bad_px_dark = constants['BadPixelsDark']

    for gain in range(gainstages):
        glabel = gain_names[gain]

        if bad_px_dark.ndim == 5:
            data = bad_px_dark[:, :, :, :, gain]
        else:
            data = bad_px_dark[:, :, :, :]

        bad_px_per_cell = np.count_nonzero(data, axis=(1, 2))
        fraction_bad_per_cell = bad_px_per_cell / (data.shape[1] * data.shape[2])
        fraction_bad_per_cell[fraction_bad_per_cell == 1.0] = np.nan

        fig = plt.figure(figsize=(15, 6), tight_layout={
                         'pad': 0.2, 'w_pad': 1.3, 'h_pad': 1.3})
        ax = fig.add_subplot(1, 1, 1)

        d = []
        for im, mod in enumerate(fraction_bad_per_cell):
            d.append({'x': np.arange(mod.shape[0]),
                      'y': mod,
                      'drawstyle': 'steps-pre',
                      'label': mod_names[im],
                      })

        _ = simplePlot(d, figsize=(10, 10), xrange=(-12, 510),
                            x_label='Memory Cell ID',
                            y_label='Fraction of bad pixels',
                            use_axis=ax,
                            title=glabel,
                            title_position=[0.5, 1.18],
                            legend='outside-top-ncol6-frame', legend_size='18%',
                            legend_pad=0.00)

        plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Summary tables across Modules ##

 Tables show values averaged across all pixels and memory cells of a given detector module.

 %% Cell type:code id: tags:

 ``` python
 if u'$' in tabulate.LATEX_ESCAPE_RULES:
    del(tabulate.LATEX_ESCAPE_RULES[u'$'])

 if u'\\' in tabulate.LATEX_ESCAPE_RULES:
    del(tabulate.LATEX_ESCAPE_RULES[u'\\'])
 ```

 %% Cell type:code id: tags:

 ``` python
 head = ['Module', 'High gain', 'Medium gain', 'Low gain']
 head_th = ['Module', 'HG_MG threshold', 'MG_LG threshold']
 for const_name, const in constants.items():
    if const_name == 'BadPixelsDark':
        continue  # Handled below

    if const.ndim == 4:  # Add gain dimension if not present
        const = const[:, :, : , :, np.newaxis]

    if ('BadPixelsDark' in constants) and (const_name != 'ThresholdsDark'):
        goodpix = constants['BadPixelsDark'] == 0
        if goodpix.ndim == 4:
            goodpix = goodpix[:, :, : , :, np.newaxis]

        label = f'### Average {const_name} [ADU], good pixels only'
    else:
        goodpix = [True] * const.shape[0]
        label = f'### Average {const_name} [ADU], good and bad pixels'

    # Reduce data in parallel (one worker per module)
    mean_by_module_gain = psh.alloc(const.shape[:1] + const.shape[4:], const.dtype)
    std_by_module_gain  = psh.alloc(const.shape[:1] + const.shape[4:], const.dtype)

    def average_module(wid, i, _):
        mean_by_module_gain[i] = np.mean(const[i], axis=(0, 1, 2), where=goodpix[i])
        std_by_module_gain[i]  = np.std (const[i], axis=(0, 1, 2), where=goodpix[i])
    psh_ctx.map(average_module, range(const.shape[0]))

    table = []

    for i_mod, mod in enumerate(mod_names):
        t_line = [mod]
        for gain in range(gainstages):
            if const_name == 'ThresholdsDark' and gain == 2:
                continue

            datamean = mean_by_module_gain[i_mod, gain]
            datastd = std_by_module_gain[i_mod, gain]
            t_line.append(f'{datamean:6.1f} $\\pm$ {datastd:6.1f}')

        table.append(t_line)

    display(Markdown(label))
    header = head_th if const_name == 'ThresholdsDark' else head
    md = display(Latex(tabulate.tabulate(
        table, tablefmt='latex', headers=header)))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Bad pixels summary table
 if 'BadPixelsDark' in constants:
    bad_px_dark = constants['BadPixelsDark']

    table = []

    for i_mod, mod in enumerate(mod_names):

        t_line = [mod]
        for gain in range(gainstages):
            if bad_px_dark.ndim == 5:
                data = bad_px_dark[i_mod, :, :, :, gain]
            else:
                data = bad_px_dark[i_mod]

            datasum = np.count_nonzero(data)
            datamean = datasum / data.size

            t_line.append(f'{datasum:6.0f} ({datamean:6.3f}) ')
            label = '## Number(fraction) of bad pixels'

        table.append(t_line)

    display(Markdown(label))
    md = display(Latex(tabulate.tabulate(
        table, tablefmt='latex', headers=head)))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Author: European XFEL Detector Group, Version: 1.0

 #  Summary for processed of dark calibration constants and a comparison with previous injected constants.

 out_folder = "/gpfs/exfel/data/scratch/kluyvert/lpd-dark-p900320-r26_27_28" # path to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 karabo_id = "FXE_DET_LPD1M-1" # detector instance
 gain_names = ['High gain', 'Medium gain', 'Low gain'] # a list of gain names to be used in plotting
 threshold_names = ['HG-MG threshold', 'MG_LG threshold'] # a list of gain names to be used in plotting
 local_output = True  # Boolean indicating that local constants were stored in the out_folder

 # Skip the whole notebook if local_output is false in the preceding notebooks.
 if not local_output:
    print('No local constants saved. Skipping summary plots')
    import sys
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
-import copy
-import os
 import warnings
-from collections import OrderedDict
 from pathlib import Path

 warnings.filterwarnings('ignore')

-import glob
-
 import h5py
 import matplotlib
 import numpy as np
 import pasha as psh
 import yaml
 from IPython.display import Latex, Markdown, display

 matplotlib.use("agg")
 import matplotlib.gridspec as gridspec
 import matplotlib.pyplot as plt

 %matplotlib inline
 import extra_geom
 import tabulate
 from cal_tools.ana_tools import get_range
+from cal_tools.enums import BadPixels
 from cal_tools.plotting import show_processed_modules
 from cal_tools.tools import CalibrationMetadata, module_index_to_qm
 from XFELDetAna.plotting.simpleplot import simplePlot
 ```

 %% Cell type:code id: tags:

 ``` python
+def bp_entry(bp):
+    return [f"{bp.name:<30s}", f"{bp.value:032b}", f"{int(bp.value)}"]
+```
+
+%% Cell type:code id: tags:
+
+``` python
 if "AGIPD" in karabo_id:
    if "SPB" in karabo_id:
        dinstance = "AGIPD1M1"
        nmods = 16
    elif "MID" in karabo_id:
        dinstance = "AGIPD1M2"
        nmods = 16
    elif "HED" in karabo_id:
        dinstance = "AGIPD500K"
        nmods = 8
    # This list needs to be in that order as later Adaptive or fixed gain is
    # decided based on the condition for the Offset constant.
    expected_constants = ['Offset', 'Noise', 'ThresholdsDark', 'BadPixelsDark']
-    display(Markdown("""

-# Summary of AGIPD dark characterization #
+    table = []
+    badpixels = [
+        BadPixels.OFFSET_OUT_OF_THRESHOLD,
+        BadPixels.NOISE_OUT_OF_THRESHOLD,
+        BadPixels.OFFSET_NOISE_EVAL_ERROR,
+        BadPixels.GAIN_THRESHOLDING_ERROR,
+    ]
+    for bp in badpixels:
+        table.append(bp_entry(bp))

-The following report shows a set of dark images taken with the AGIPD detector to deduce detector offsets, noise, bad-pixel maps and thresholding. All four types of constants are evaluated per-pixel and per-memory cell.
+    display(Markdown("""
+# Summary of AGIPD dark characterization #

+The following report shows a set of dark images taken with the AGIPD detector to deduce detector offsets,
+noise, bad-pixel maps and thresholding. All four types of constants are evaluated per-pixel and per-memory cell.

-**The offset** ($O$) is defined as the median ($M$) of the dark signal ($Ds$) over trains ($t$) for a given pixel ($x,y$) and memory cell ($c$).
+**The offset** ($O$) is defined as the median ($M$) of the dark signal ($Ds$) over trains ($t$) for a given pixel
+($x,y$) and memory cell ($c$).

 **The noise** $N$ is the standard deviation $\sigma$ of the dark signal.

 $$ O_{x,y,c} = M(Ds)_{t} ,\,\,\,\,\,\, N_{x,y,c} = \sigma(Ds)_{t}$$

-**The bad pixel** mask is encoded as a bit mask.
+**The bad pixel** mask is encoded as a bit mask."""))

+    display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["Name", "bit value", "integer value"])))
+    display(Markdown("""
 **"OFFSET_OUT_OF_THRESHOLD":**

 Offset outside of bounds:

 $$M(O)_{x,y} - \sigma(O)_{x,y} * \mathrm{thresholds\_offset\_sigma} < O < M(O)_{x,y} + \sigma(O)_{x,y} * \mathrm{thresholds\_offset\_sigma} $$

 or offset outside of hard limits

 $$ \mathrm{thresholds\_offset\_hard}_\mathrm{low} < O < \mathrm{thresholds\_offset\_hard}_\mathrm{high} $$

 **"NOISE_OUT_OF_THRESHOLD":**

 Noise outside of bounds:

 $$M(N)_{x,y} - \sigma(N)_{x,y} * \mathrm{thresholds\_noise\_sigma} < N < M(N)_{x,y} + \sigma(N)_{x,y} * \mathrm{thresholds\_noise\_sigma} $$

 or noise outside of hard limits

 $$\mathrm{thresholds\_noise\_hard}_\mathrm{low} < N < \mathrm{thresholds\_noise\_hard}_\mathrm{high} $$

 **"OFFSET_NOISE_EVAL_ERROR":**

 Offset and Noise both not $nan$ values

 Values: $\mathrm{thresholds\_offset\_sigma}$, $\mathrm{thresholds\_offset\_hard}$, $\mathrm{thresholds\_noise\_sigma}$, $\mathrm{thresholds\_noise\_hard}$ are given as parameters.

-"**\"GAIN_THRESHOLDING_ERROR\":**
+**"GAIN_THRESHOLDING_ERROR":**

 Bad gain separated pixels with sigma separation less than gain_separation_sigma_threshold

 $$ sigma\_separation = \\frac{\mathrm{gain\_offset} - \mathrm{previous\_gain\_offset}}{\sqrt{\mathrm{gain\_offset_{std}}^\mathrm{2} + \mathrm{previuos\_gain\_offset_{std}}^\mathrm{2}}}$$
 $$ Bad\_separation = sigma\_separation < \mathrm{gain\_separation\_sigma\_threshold} $$

 """))

+
 elif "LPD" in karabo_id:
    dinstance = "LPD1M1"
    nmods = 16
    expected_constants = ['Offset', 'Noise', 'BadPixelsDark']
+    table = []
+    badpixels = [
+        BadPixels.OFFSET_OUT_OF_THRESHOLD,
+        BadPixels.NOISE_OUT_OF_THRESHOLD,
+        BadPixels.OFFSET_NOISE_EVAL_ERROR,
+    ]
+    for bp in badpixels:
+        table.append(bp_entry(bp))
    display(Markdown("""

 # Summary of LPD dark characterization #

 The following report shows a set of dark images taken with the LPD detector to deduce detector offsets, noise, bad-pixel maps. All three types of constants are evaluated per-pixel and per-memory cell.

 **The offset** ($O$) is defined as the median ($M$) of the dark signal ($Ds$) over trains ($t$) for a given pixel ($x,y$) and memory cell ($c$).

 **The noise** $N$ is the standard deviation $\sigma$ of the dark signal.

 $$ O_{x,y,c} = M(Ds)_{t} ,\,\,\,\,\,\, N_{x,y,c} = \sigma(Ds)_{t}$$

-**The bad pixel** mask is encoded as a bit mask.
+**The bad pixel** mask is encoded as a bit mask."""))
+    display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["Name", "bit value", "integer value"])))
+    display(Markdown("""

 **"OFFSET_OUT_OF_THRESHOLD":**

 Offset outside of bounds:

 $$M(O)_{x,y} - \sigma(O)_{x,y} * \mathrm{thresholds\_offset\_sigma} < O < M(O)_{x,y} + \sigma(O)_{x,y} * \mathrm{thresholds\_offset\_sigma} $$

 or offset outside of hard limits

 $$ \mathrm{thresholds\_offset\_hard}_\mathrm{low} < O < \mathrm{thresholds\_offset\_hard}_\mathrm{high} $$

 **"NOISE_OUT_OF_THRESHOLD":**

 Noise outside of bounds:

 $$M(N)_{x,y} - \sigma(N)_{x,y} * \mathrm{thresholds\_noise\_sigma} < N < M(N)_{x,y} + \sigma(N)_{x,y} * \mathrm{thresholds\_noise\_sigma} $$

 or noise outside of hard limits

 $$\mathrm{thresholds\_noise\_hard}_\mathrm{low} < N < \mathrm{thresholds\_noise\_hard}_\mathrm{high} $$

 **"OFFSET_NOISE_EVAL_ERROR":**

 Offset and Noise both not $nan$ values

 "Values: $\\mathrm{thresholds\\_offset\\_sigma}$, $\\mathrm{thresholds\\_offset\\_hard}$, $\\mathrm{thresholds\\_noise\\_sigma}$, $\\mathrm{thresholds\\_noise\\_hard}$ are given as parameters.\n",
 """))
 elif "DSSC" in karabo_id:
    dinstance = "DSSC1M1"
    nmods = 16
    expected_constants = ['Offset', 'Noise']
    display(Markdown("""

 # Summary of DSSC dark characterization #

    """))
 ```

 %% Cell type:code id: tags:

 ``` python
 out_folder = Path(out_folder)
 metadata = CalibrationMetadata(metadata_folder or out_folder)
 mod_mapping = metadata.setdefault("modules-mapping", {})
 old_constant_metadata = {}
 for fn in out_folder.glob("module_metadata_*.yml"):
    with fn.open("r") as fd:
        fdict = yaml.safe_load(fd)
    module = fdict["module"]
    mod_mapping[module] = fdict["pdu"]
    old_constant_metadata[module] = fdict["old-constants"]

 metadata.save()
 ```

 %% Cell type:code id: tags:

 ``` python
 # In AGIPD fixed gain mode, ThresholdsDark is not expected
 if 'AGIPD' in karabo_id:
    for i in range(nmods):
        qm = module_index_to_qm(i)
        if not mod_mapping.get(qm):
            continue
        mod_pdu = mod_mapping[qm]
        fpath = out_folder / f"const_Offset_{mod_pdu}.h5"
        if not fpath.exists():
            continue

        with h5py.File(fpath, 'r') as f:
            if 'Gain mode' in f['condition']:
                if f["condition"]["Gain mode"]["value"][()]:
                    expected_constants.remove("ThresholdsDark")
        break
 ```

 %% Cell type:markdown id: tags:

 Preparing newly injected and previous constants from produced local folder in out_folder.

 %% Cell type:code id: tags:

 ``` python
 # Get shape, dtype, and number of files for each constant.
 # Also build lists of the files involved, to be loaded in parallel in a later cell.
 const_shape_and_dtype = {}
 found_module_nums = set()
 pieces_to_load = []
 pieces_to_load_prev = []

 for cname in expected_constants:
    for i in range(nmods):
        qm = module_index_to_qm(i)
        if not mod_mapping.get(qm):
            continue
        mod_pdu = mod_mapping[qm]
        fpath = out_folder / f"const_{cname}_{mod_pdu}.h5"
        if not fpath.exists():
            continue

        pieces_to_load.append((cname, i, fpath))
        found_module_nums.add(i)

        # try finding old constants using paths from CalCat store
        if qm not in old_constant_metadata:
            continue
        qm_mdata = old_constant_metadata[qm]

        if cname not in qm_mdata:
            continue

        fpath_prev = qm_mdata[cname]["filepath"]
        h5path_prev = qm_mdata[cname]["h5path"]

        if fpath_prev and h5path_prev:
            pieces_to_load_prev.append((cname, i, fpath_prev, h5path_prev))

    # Get the constant shape from one of the module files
    with h5py.File(fpath, 'r') as f:
        const_shape_and_dtype[cname] = (f['data'].shape, f['data'].dtype)

 # Allocate arrays for these constants (without space for missing modules)
 nmods_found = len(found_module_nums)
 constants = {
    cname: psh.alloc((nmods_found,) + module_const_shape, dtype=dt, fill=0)
    for cname, (module_const_shape, dt) in const_shape_and_dtype.items()
 }
 prev_const = {
    cname: psh.alloc((nmods_found,) + module_const_shape, dtype=dt, fill=0)
    for cname, (module_const_shape, dt) in const_shape_and_dtype.items()
 }
 ```

 %% Cell type:code id: tags:

 ``` python
 # Load the constant data in parallel
 found_module_nums = sorted(found_module_nums)
 mod_names = [module_index_to_qm(n) for n in found_module_nums]

 def load_piece(wid, ix, entry):
    cname, mod_no, fpath = entry
    mod_ix = found_module_nums.index(mod_no)

    with h5py.File(fpath, 'r') as f:
        f['data'].read_direct(constants[cname][mod_ix])

 psh.map(load_piece, pieces_to_load)
 print(f"Loaded constant data from {len(pieces_to_load)} files")

 def load_piece_prev(wid, ix, entry):
    cname, mod_no, fpath, h5path = entry
    mod_ix = found_module_nums.index(mod_no)

    with h5py.File(fpath, 'r') as f:
        f[h5path]['data'].read_direct(prev_const[cname][mod_ix])

 psh.map(load_piece_prev, pieces_to_load_prev)
 print(f"Loaded previous constant data from {len(pieces_to_load_prev)} files")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('## Processed modules'))
 show_processed_modules(dinstance, constants, mod_names, mode="processed")
 ```

 %% Cell type:markdown id: tags:

 ## Summary figures across Modules ##

 The following plots give an overview of calibration constants averaged across pixels and memory cells. A bad pixel mask is applied.

 %% Cell type:code id: tags:

 ``` python
 if "LPD" in dinstance:
    geom = extra_geom.LPD_1MGeometry.from_quad_positions(quad_pos=[(11.4, 299),
                                                                   (-11.5, 8),
                                                                   (254.5, -16),
                                                                   (278.5, 275)])
    module_shape = (256, 256)

 elif dinstance in ('AGIPD1M1', 'AGIPD1M2'):
    geom = extra_geom.AGIPD_1MGeometry.from_quad_positions(quad_pos=[(-525, 625),
                                                                     (-550, -10),
                                                                     (520, -160),
                                                                     (542.5, 475)])
    module_shape = (512, 128)

 elif dinstance == "AGIPD500K":
    geom = extra_geom.AGIPD_500K2GGeometry.from_origin()
    module_shape = (512, 128)

 elif "DSSC" in dinstance:
    module_shape = (128, 512)
    quadpos = [(-130, 5), (-130, -125), (5, -125), (5, 5)]
    geom = extra_geom.DSSC_1MGeometry.from_quad_positions(quadpos)
 ```

 %% Cell type:code id: tags:

 ``` python
 def plot_const_and_delta(const, delta, const_name, gain_name):
    gs = gridspec.GridSpec(2, 2)
    fig = plt.figure(figsize=(24, 32))

    ax0 = fig.add_subplot(gs[0, :])
    vmin, vmax = get_range(const, 2)
    geom.plot_data_fast(
        const, vmin=vmin, vmax=vmax, ax=ax0, colorbar={
            'shrink': 0.9, 'pad': 0.05, 'label': 'ADUs'
    })
    ax0.set_title(f"{const_name} - {gain_name}", fontsize=15)

    if np.count_nonzero(delta) == np.count_nonzero(np.isnan(delta)):
        fig.text(0.5, 0.4, "No difference from previous constant",
                 ha='center', va='center', fontsize=15)
        return

    # Plot delta from previous constant
    ax1 = fig.add_subplot(gs[1, 0])
    vmin, vmax = get_range(delta, 2)
    vmax = max(vmax, abs(vmin))  # Center around zero
    geom.plot_data_fast(
        delta, vmin=-vmax, vmax=vmax, ax=ax1, cmap="RdBu", colorbar={
            'shrink': 0.6, 'pad': 0.1, 'label': 'ADUs'
    })
    ax1.set_title(f"Difference with previous {const_name} - {gain_name}", fontsize=15)

    # Plot % delta from previous constant
    delta_pct = delta / const * 100
    ax2 = fig.add_subplot(gs[1, 1])
    vmin, vmax = get_range(delta_pct, 2)
    vmax = max(vmax, abs(vmin))  # Center around zero
    geom.plot_data_fast(
        delta_pct, vmin=-vmax, vmax=vmax, ax=ax2, cmap="RdBu", colorbar={
            'shrink': 0.6, 'pad': 0.1, 'label': '%'
    })
    ax2.set_title("Percentage difference", fontsize=15)
 ```

 %% Cell type:code id: tags:

 ``` python
 psh_ctx = psh.ProcessContext(nmods)
 ```

 %% Cell type:code id: tags:

 ``` python
 gainstages = 1

 for const_name, const in constants.items():
    if const_name == 'BadPixelsDark':
        continue
    # Check if constant gain available in constant e.g. AGIPD, LPD
    if len(const.shape) == 5:
        gainstages = 3
    else:
        gainstages = 1

    display(Markdown(f'##### {const_name}'))
    print_once = True
    for gain in range(gainstages):
        if const_name == 'ThresholdsDark':
            if gain > 1:
                continue
            glabel = threshold_names[gain]
        else:
            glabel = gain_names[gain]

        stacked_const = psh_ctx.alloc((nmods,) + module_shape, dtype=np.float64, fill=0)
        stacked_delta = psh_ctx.alloc((nmods,) + module_shape, dtype=np.float64, fill=0)

        def average_module(wid, i, _):
            qm = module_index_to_qm(i)
            if qm in mod_names:
                m_idx = mod_names.index(qm)
                # Check if constant shape of 5 indices e.g. AGIPD, LPD
                if const.ndim == 5:
                    values = np.nanmean(const[m_idx, :, :, :, gain], axis=2)
                    prev_val = np.nanmean(prev_const[const_name][m_idx, :, :, :, gain], axis=2)
                else:
                    values = np.nanmean(const[m_idx, :, :, :], axis=2)
                    prev_val = np.nanmean(prev_const[const_name][m_idx, :, :, :], axis=2)
                values[values == 0] = np.nan
                prev_val[prev_val == 0] = np.nan
                stacked_const[i] = np.moveaxis(values, 0, -1)
                stacked_delta[i] = np.moveaxis(values - prev_val, 0, -1)
            else:
                # if module not available fill space with nan
                stacked_const[i] = np.nan

        psh_ctx.map(average_module, range(nmods))

        # Plotting constant overall modules.
        display(Markdown(f'###### {glabel} ######'))

        plot_const_and_delta(stacked_const, stacked_delta, const_name, glabel)

        plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Loop over modules and constants
 for const_name, const in constants.items():
    if const_name == 'BadPixelsDark':
        continue  # Displayed separately below

    display(Markdown(f'### Summary across Modules - {const_name}'))

    for gain in range(gainstages):
        if const_name == 'ThresholdsDark':
            if gain == 2:
                continue
            glabel = threshold_names[gain]
        else:
            glabel = gain_names[gain]

        if const.ndim == 5:
            data = const[:, :, :, :, gain]
        else:
            data = const

        # Bad pixels are per gain stage, and you need thresholds to pick the gain
        # stage, so we don't mask the thresholds.
        if ('BadPixelsDark' in constants) and (const_name != 'ThresholdsDark'):
            label = f'{const_name} value [ADU], good pixels only'
            if const.ndim == 5:
                goodpix = constants['BadPixelsDark'][:, :, :, :, gain] == 0
            else:
                goodpix = constants['BadPixelsDark'] == 0
        else:
            label = f'{const_name} value [ADU], good and bad pixels'
            goodpix = [True] * data.shape[0]

        # Reduce data in parallel (one worker per module):
        datamean = psh_ctx.alloc(data.shape[:1] + data.shape[3:], data.dtype)
        datastd = psh_ctx.alloc(data.shape[:1] + data.shape[3:], data.dtype)

        def average_mem_cells(wid, i, _):
            datamean[i] = np.mean(data[i], axis=(0, 1), where=goodpix[i])
            datastd[i] = np.std(data[i], axis=(0, 1), where=goodpix[i])
        psh_ctx.map(average_mem_cells, range(data.shape[0]))

        fig = plt.figure(figsize=(15, 6), tight_layout={
                         'pad': 0.2, 'w_pad': 1.3, 'h_pad': 1.3})
        ax = fig.add_subplot(121)

        d = []
        for im, mod in enumerate(datamean):
            d.append({'x': np.arange(mod.shape[0]),
                      'y': mod,
                      'drawstyle': 'steps-pre',
                      'label': mod_names[im],
                      })

        _ = simplePlot(d, figsize=(10, 10), xrange=(-12, 510),
                            x_label='Memory Cell ID',
                            y_label=label,
                            use_axis=ax,
                            title=glabel,
                            title_position=[0.5, 1.18],
                            legend='outside-top-ncol6-frame', legend_size='18%',
                            legend_pad=0.00)

        # Plot standard deviation
        ax = fig.add_subplot(122)
        if "BadPixelsDark" in constants.keys():
            label = f'$\sigma$ {const_name} [ADU], good pixels only'
        else:
            label = f'$\sigma$ {const_name} [ADU], good and bad pixels'
        d = []
        for im, mod in enumerate(datastd):
            d.append({'x': np.arange(mod.shape[0]),
                      'y': mod,
                      'drawstyle': 'steps-pre',
                      'label': mod_names[im],
                      })

        _ = simplePlot(d, figsize=(10, 10), xrange=(-12, 510),
                            x_label='Memory Cell ID',
                            y_label=label,
                            use_axis=ax,
                            title=f'{glabel} $\sigma$',
                            title_position=[0.5, 1.18],
                            legend='outside-top-ncol6-frame', legend_size='18%',
                            legend_pad=0.00)

        plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 if 'BadPixelsDark' in constants:
    display(Markdown(f'### Summary across Modules - BadPixelsDark'))

    bad_px_dark = constants['BadPixelsDark']

    for gain in range(gainstages):
        glabel = gain_names[gain]

        if bad_px_dark.ndim == 5:
            data = bad_px_dark[:, :, :, :, gain]
        else:
            data = bad_px_dark[:, :, :, :]

        bad_px_per_cell = np.count_nonzero(data, axis=(1, 2))
        fraction_bad_per_cell = bad_px_per_cell / (data.shape[1] * data.shape[2])
        fraction_bad_per_cell[fraction_bad_per_cell == 1.0] = np.nan

        fig = plt.figure(figsize=(15, 6), tight_layout={
                         'pad': 0.2, 'w_pad': 1.3, 'h_pad': 1.3})
        ax = fig.add_subplot(1, 1, 1)

        d = []
        for im, mod in enumerate(fraction_bad_per_cell):
            d.append({'x': np.arange(mod.shape[0]),
                      'y': mod,
                      'drawstyle': 'steps-pre',
                      'label': mod_names[im],
                      })

        _ = simplePlot(d, figsize=(10, 10), xrange=(-12, 510),
                            x_label='Memory Cell ID',
                            y_label='Fraction of bad pixels',
                            use_axis=ax,
                            title=glabel,
                            title_position=[0.5, 1.18],
                            legend='outside-top-ncol6-frame', legend_size='18%',
                            legend_pad=0.00)

        plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Summary tables across Modules ##

 Tables show values averaged across all pixels and memory cells of a given detector module.

 %% Cell type:code id: tags:

 ``` python
 if u'$' in tabulate.LATEX_ESCAPE_RULES:
    del(tabulate.LATEX_ESCAPE_RULES[u'$'])

 if u'\\' in tabulate.LATEX_ESCAPE_RULES:
    del(tabulate.LATEX_ESCAPE_RULES[u'\\'])
 ```

 %% Cell type:code id: tags:

 ``` python
 head = ['Module', 'High gain', 'Medium gain', 'Low gain']
 head_th = ['Module', 'HG_MG threshold', 'MG_LG threshold']
 for const_name, const in constants.items():
    if const_name == 'BadPixelsDark':
        continue  # Handled below

    if const.ndim == 4:  # Add gain dimension if not present
        const = const[:, :, : , :, np.newaxis]

    if ('BadPixelsDark' in constants) and (const_name != 'ThresholdsDark'):
        goodpix = constants['BadPixelsDark'] == 0
        if goodpix.ndim == 4:
            goodpix = goodpix[:, :, : , :, np.newaxis]

        label = f'### Average {const_name} [ADU], good pixels only'
    else:
        goodpix = [True] * const.shape[0]
        label = f'### Average {const_name} [ADU], good and bad pixels'

    # Reduce data in parallel (one worker per module)
    mean_by_module_gain = psh.alloc(const.shape[:1] + const.shape[4:], const.dtype)
    std_by_module_gain  = psh.alloc(const.shape[:1] + const.shape[4:], const.dtype)

    def average_module(wid, i, _):
        mean_by_module_gain[i] = np.mean(const[i], axis=(0, 1, 2), where=goodpix[i])
        std_by_module_gain[i]  = np.std (const[i], axis=(0, 1, 2), where=goodpix[i])
    psh_ctx.map(average_module, range(const.shape[0]))

    table = []

    for i_mod, mod in enumerate(mod_names):
        t_line = [mod]
        for gain in range(gainstages):
            if const_name == 'ThresholdsDark' and gain == 2:
                continue

            datamean = mean_by_module_gain[i_mod, gain]
            datastd = std_by_module_gain[i_mod, gain]
            t_line.append(f'{datamean:6.1f} $\\pm$ {datastd:6.1f}')

        table.append(t_line)

    display(Markdown(label))
    header = head_th if const_name == 'ThresholdsDark' else head
    md = display(Latex(tabulate.tabulate(
        table, tablefmt='latex', headers=header)))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Bad pixels summary table
 if 'BadPixelsDark' in constants:
    bad_px_dark = constants['BadPixelsDark']

    table = []

    for i_mod, mod in enumerate(mod_names):

        t_line = [mod]
        for gain in range(gainstages):
            if bad_px_dark.ndim == 5:
                data = bad_px_dark[i_mod, :, :, :, gain]
            else:
                data = bad_px_dark[i_mod]

            datasum = np.count_nonzero(data)
            datamean = datasum / data.size

            t_line.append(f'{datasum:6.0f} ({datamean:6.3f}) ')
            label = '## Number(fraction) of bad pixels'

        table.append(t_line)

    display(Markdown(label))
    md = display(Latex(tabulate.tabulate(
        table, tablefmt='latex', headers=head)))
 ```

--- a/src/cal_tools/agipdlib.py
+++ b/src/cal_tools/agipdlib.py
 import os
 import posixpath
 import zlib
+from dataclasses import dataclass, field
 from datetime import datetime
+from logging import warning
 from multiprocessing import Manager
 from multiprocessing.pool import ThreadPool
 from typing import List, Optional
@@ -10,45 +12,40 @@ import h5py
 import numpy as np
 import sharedmem
 from dateutil import parser
-from extra_data import DataCollection, H5File, by_id, components
+from extra_data import DataCollection, H5File, RunDirectory, by_id, components

 from cal_tools import agipdalgs as calgs
 from cal_tools.agipdutils import (
    baseline_correct_via_noise,
    baseline_correct_via_stripe,
+    cast_array_inplace,
    correct_baseline_via_hist,
    correct_baseline_via_hist_asic,
    make_noisy_adc_mask,
    match_asic_borders,
    melt_snowy_pixels,
-    cast_array_inplace
 )
 from cal_tools.enums import AgipdGainMode, BadPixels, SnowResolution
 from cal_tools.h5_copy_except import h5_copy_except_paths


+@dataclass
 class AgipdCtrl:
-    def __init__(
-        self,
-        run_dc: DataCollection,
-        image_src: str,
-        ctrl_src: str,
-        raise_error: bool = True,
-    ):
-        """ Initialize AgipdCondition class to read
-        all required AGIPD parameters.
-
-        :param run_dc: Run data collection with expected sources
-        to read needed parameters.
-        :param image_src: H5 source for image data.
-        :param ctrl_src: H5 source for control (slow) data.
-        :param raise_error: Boolean to raise errors for missing
-        sources and keys.
-        """
-        self.run_dc = run_dc
-        self.image_src = image_src
-        self.ctrl_src = ctrl_src
-        self.raise_error = raise_error
+    """Access AGIPD control parameters from a single run.
+
+    Args:
+        run_dc (DataCollection): Run data collection with expected sources
+            to read needed parameters.
+        image_src (str): H5 source for image data.
+        ctrl_src (str): H5 source for control (slow) data.
+        raise_error (bool): Boolean to raise errors for missing
+            sources and keys.
+        run: (int, optional): Run number.
+    """
+    run_dc: DataCollection
+    image_src: str
+    ctrl_src: str
+    raise_error: bool = False

    def _get_num_cells_ctrl(self) -> Optional[int]:
        """Get number of cells from CONTROL source."""
@@ -296,6 +293,171 @@ class AgipdCtrl:
        return 12


+@dataclass
+class AgipdCtrlRuns:
+    """Get AGIPD control parameters across several runs,
+    e.g. 3 runs for darks.
+
+    Args:
+        raw_folder (str): The RAW folder path.
+        runs (list): The list of runs to read the operating conditions.
+        image_src (str): H5 source for image data.
+        ctrl_src (str): H5 source for control (slow) data.
+    """
+    raw_folder: str
+    runs: List[int]
+    image_src: str
+    ctrl_src: str
+    sort_dark_runs_enabled: bool = False
+
+    adaptive_gain_modes = [AgipdGainMode.ADAPTIVE_GAIN] * 3
+    fixed_gain_modes = [
+        AgipdGainMode.FIXED_HIGH_GAIN,
+        AgipdGainMode.FIXED_MEDIUM_GAIN,
+        AgipdGainMode.FIXED_LOW_GAIN,
+    ]
+
+    def __post_init__(self):
+        # validate that all runs belong to the same
+        self.run_ctrls = [
+            AgipdCtrl(
+                run_dc=RunDirectory(f"{self.raw_folder}/r{r:04d}"),
+                image_src=self.image_src,
+                ctrl_src=self.ctrl_src,
+                ) for r in self.runs]
+        self.gain_modes = self.get_gain_modes()
+        if self.sort_dark_runs_enabled:
+            self.sort_dark_runs()
+
+    def _validate_same_value(self, name, values):
+            if len(set(values)) != 1:
+                # Should we raise an error and stop processing?
+                warning(
+                    f"{name} is not the same for all runs {self.runs}"
+                    f" with values of {values}, respectively.")
+
+    def sort_dark_runs(self):
+        """Order dark runs based on run patterns for Adaptive mode
+        or gain modes for Fixed mode.
+        """
+        assert len(self.runs) == 3, f"AGIPD dark runs are expected to be 3. {len(self.runs)} runs are given."  # noqa
+        # Expected patterns:
+        # XRay: 0, DarkHG: 1, DarkMG: 2, DarkLG: 3, PC: 4 and CS: 5.
+        sort_by = None
+        sort_values = []
+        if self.gain_modes == self.adaptive_gain_modes:  # Adaptive gain # sort by patterns
+            # Patterns -> DarkHG: 1, DarkMG: 2, DarkLG: 3
+            if "AGIPD1M" in self.ctrl_src:
+                sort_by = "patternTypeIndex"
+            elif "AGIPD500K" in self.ctrl_src:
+                sort_by = "expTypeIndex"
+
+            for c in self.run_ctrls:
+                sort_values.append(
+                    c.run_dc[self.ctrl_src, sort_by].as_single_value())
+
+        # Check if a mix of adaptive and fixed gain runs.
+        elif any(gm == AgipdGainMode.ADAPTIVE_GAIN for gm in self.gain_modes):
+            raise ValueError(
+                f"Given runs {self.runs} have a mix of ADAPTIVE and "
+                f"FIXED gain modes: {self.gain_modes}.")
+        else:  # Fixed gain: Patterns is X-Ray: 0 for all runs.
+            sort_by = "gainModeIndex"
+            sort_values = [int(gm) for gm in self.gain_modes]
+
+        zipped_lists = zip(sort_values, self.runs, self.run_ctrls)
+
+        # Sort the lists based on the patterns
+        sorted_zipped_lists = sorted(zipped_lists, key=lambda item: item[0])
+        _, sorted_runs, sorted_run_ctrls = zip(*sorted_zipped_lists)
+        if sorted_runs != self.runs:
+            Warning("Given dark runs are unsorted. Runs will be sorted from"
+                    f" {self.runs} with {sort_by}:"
+                    f" {sort_values} to {sorted_runs}.")
+            # Update run_ctrls and runs order
+            self.runs = list(sorted_runs)
+            self.run_ctrls = list(sorted_run_ctrls)
+            self.gain_modes = self.get_gain_modes()
+
+    def fixed_gain_mode(self):
+        """Check if runs are in fixed gain mode.
+
+        Raises:
+            ValueError: Unexpected gain modes for the dark runs
+
+        Returns:
+            bool: runs are in fixed gain mode.
+        """
+        if self.gain_modes == self.adaptive_gain_modes:
+            return False
+        elif self.gain_modes == self.fixed_gain_modes:
+            return True
+        else:
+            raise ValueError(f"Unexpected runs' gain modes: {self.gain_modes}")
+
+    def get_gain_modes(self):
+        """Get runs' gain modes.
+        Returns:
+            list: `AgipdGainMode`s
+        """
+        return [c.get_gain_mode() for c in self.run_ctrls]
+
+    def get_integration_time(self):
+        """
+        Returns:
+            float: Integration time
+        """
+        integration_times = [c.get_integration_time() for c in self.run_ctrls]
+        self._validate_same_value("Integration Time", integration_times)
+        return integration_times[0]
+
+    def get_bias_voltage(self, karabo_id_control: str = None):
+        """
+        Args:
+            karabo_id_control (str):
+                Karabo ID for control device.
+
+        Returns:
+            int: Bias voltage.
+        """
+        bias_voltages = [
+            c.get_bias_voltage(karabo_id_control) for c in self.run_ctrls]
+        self._validate_same_value("Bias Voltage", bias_voltages)
+        return bias_voltages[0]
+
+    def get_memory_cells(self):
+        """
+        Returns:
+            int: number of memory cells.
+        """
+        memory_cells = [c.get_num_cells() for c in self.run_ctrls]
+        self._validate_same_value("Memory cells", memory_cells)
+        return memory_cells[0]
+
+    def get_gain_setting(self, creation_time: Optional[datetime] = None):
+        """
+        Args:
+            creation_time (Optional[datetime], optional):
+                Creation time for the runs.
+
+        Returns:
+            float: Gain Setting
+        """
+        gain_settings = [
+            c.get_gain_setting(creation_time) for c in self.run_ctrls]
+        self._validate_same_value("Gain Setting", gain_settings)
+        return gain_settings[0]
+
+    def get_acq_rate(self):
+        """
+        Returns:
+            float: Acquisition rate
+        """
+        acquisition_rates = [c.get_acq_rate() for c in self.run_ctrls]
+        self._validate_same_value("acquisition_rate", acquisition_rates)
+        return acquisition_rates[0]
+
+
 class CellSelection:
    """Selection of detector memory cells (abstract class)"""
    row_size = 32

--- a/src/cal_tools/enums.py
+++ b/src/cal_tools/enums.py
@@ -48,7 +48,6 @@ class AgipdGainMode(IntEnum):

 class JungfrauSettings(Enum):
    """Jungfrau run gain settings."""
-    # old setting, new setting, new mode
    GAIN_0 = "gain0"
    HIGH_GAIN_0 = "highgain0"


--- a/src/cal_tools/jungfraulib.py
+++ b/src/cal_tools/jungfraulib.py
-from typing import Optional, Tuple
+from logging import warning
+from typing import Tuple

 import extra_data

-from cal_tools.enums import JungfrauGainMode, JungfrauSettings
+from cal_tools.enums import JungfrauGainMode as JGM
+from cal_tools.enums import JungfrauSettings


 def _old_settings_to_new(settings: str, index: int) -> str:
@@ -98,22 +100,112 @@ class JungfrauCtrl():
        else:  # JungfrauSettings.GAIN_0
            return 0

-    def get_gain_mode(self) -> int:
-        """Get gain mode value. Fixed `1` or Adaptive `1`.
-        - `0` if run_mode = dynamic, forceswitchg1, forceswitchg2, or None.
-        - `1` if run_mode = fixg1 or fixg2.
-        """
+    def get_gain_mode_str(self):
        # Check if run_mode is of an old settings to convert
        # into new mode value.
-        if self.run_mode in [m.value for m in JungfrauGainMode]:
-            mode = self.run_mode
+        if self.run_mode in [m.value for m in JGM]:
+            return self.run_mode
        else:
-            mode = _old_settings_to_new(self.run_mode, 1)
-        
-        if mode in [
-            JungfrauGainMode.FIX_GAIN_1.value,
-            JungfrauGainMode.FIX_GAIN_2.value,
-        ]:
+            return _old_settings_to_new(self.run_mode, 1)
+
+    def get_gain_mode(self) -> int:
+        """Get gain mode value. Fixed `1` or Adaptive `0`.
+        Returns:
+            (int): gain mode parameter condition
+        """
+        gm_enum = self.get_gain_mode_str()
+
+        if gm_enum in [JGM.FIX_GAIN_1.value, JGM.FIX_GAIN_2.value]:
            return 1
-        else:  # DYNAMIC, FORCE_SWITCH_G1, or FORCE_SWITCH_G2
+        else:  # DYNAMIC, FORCE_SWITCH_G1, FORCE_SWITCH_G2 or None
            return 0
+
+
+MODES_ORDER = {
+    JGM.DYNAMIC.value: 0,
+    JGM.FORCE_SWITCH_HG1.value: 1,
+    JGM.FORCE_SWITCH_HG2.value: 2,
+    JGM.FIX_GAIN_1.value: 3,
+    JGM.FIX_GAIN_2.value: 4,
+}
+EXPECTED_RUN_ORDER = [
+    [  # Adaptive operation mode pattern
+        JGM.DYNAMIC.value,
+        JGM.FORCE_SWITCH_HG1.value,
+        JGM.FORCE_SWITCH_HG2.value
+    ],
+    [  # Fixed operation mode pattern
+        JGM.DYNAMIC.value,
+        JGM.FIX_GAIN_1.value,
+        JGM.FIX_GAIN_2.value
+    ],
+]
+
+
+def sort_runs_by_gain(
+    raw_folder,
+    runs,
+    ctrl_src,
+    modes_order=MODES_ORDER,
+    expected_run_order=EXPECTED_RUN_ORDER
+):
+    """Validate the 3 dark runs given for Jungfrau.
+
+    Args:
+        raw_folder (str): RAW folder for the validated dark runs.
+        runs (list): [High run, Medium run, Low run].
+        ctrl_src (str): Control source path for slow data.
+        modes_order (dict): Gain modes order to sort the runs by.
+        expected_run_order (list):Expected dark runs order to process.
+    Raises:
+        ValueError: Wrong given dark runs
+    """
+    assert len(runs) == 3, "Wrong number of runs. expected a list of 3 runs."
+
+    run_gm_mapping = dict()
+    for run in runs:
+        ctrl_data = JungfrauCtrl(
+            extra_data.RunDirectory(f"{raw_folder}/r{run:04d}/"),
+            ctrl_src)
+        gm = ctrl_data.get_gain_mode_str()
+        run_gm_mapping[run] = gm
+
+    # 1st legacy case before having run.settings in data.
+    if all(value is None for value in run_gm_mapping.values()):
+        warning("run.settings is not stored in the data "
+                f"to read. Hence assuming gain_mode = {gm}"
+                " for adaptive old data.")
+        return runs
+
+    run_gm_mapping = dict(sorted(
+        run_gm_mapping.items(),
+        key=lambda item: modes_order[item[1]]
+        ))
+    if list(run_gm_mapping.keys()) != runs:
+        warning("Given dark runs are unsorted. "
+                f"Runs will be sorted from {runs} of gain modes "
+                f"{list(run_gm_mapping.values())} to "
+                f"{list(run_gm_mapping.keys())}")
+
+    runs = list(run_gm_mapping.keys())
+    modes = list(run_gm_mapping.values())
+
+    legacy_adaptive = [
+        JGM.DYNAMIC.value,
+        JGM.DYNAMIC.value,
+        JGM.FORCE_SWITCH_HG1.value
+    ]
+
+    # 2nd legacy case with CTRL/MDL bug resulting in wrong run settings.
+    if modes == legacy_adaptive:
+        warning(f"run.settings for medium and low gain runs"
+                f" are wrong {modes[1:]}. This is an expected "
+                f"bug for old raw data. "
+                "Assuming this is an adaptive gain runs.")
+    elif not modes in expected_run_order:
+        raise ValueError("Wrong dark runs are given. "
+                         f"The given three runs are {runs} with "
+                         f"wrong gain modes {modes}."
+                         "Please verify the selected 3 dark runs to process.")
+
+    return runs
--- a/src/xfel_calibrate/notebooks.py
+++ b/src/xfel_calibrate/notebooks.py
@@ -234,6 +234,12 @@ notebooks = {
                            "use function": "balance_sequences",
                            "cluster cores": 4},
        },
+        "FF": {
+            "notebook": "notebooks/ePix100/Characterize_FlatFields_ePix100_NBC.ipynb",
+            "concurrency": {"parameter": None,
+                            "default concurrency": None,
+                            "cluster cores": 4},
+        },
    },
    "EPIX10K": {
        "DARK": {

--- a/tests/test_agipdlib.py
+++ b/tests/test_agipdlib.py
 from datetime import datetime

+import pytest
 from extra_data import RunDirectory

-from cal_tools.agipdlib import AgipdCtrl
+from cal_tools.agipdlib import AgipdCtrl, AgipdCtrlRuns
+from cal_tools.enums import AgipdGainMode

 SPB_AGIPD_INST_SRC = 'SPB_DET_AGIPD1M-1/DET/0CH0:xtdf'
-CTRL_SRC = 'SPB_IRU_AGIPD1M1/MDL/FPGA_COMP'
+SPB_AGIPD_KARABO_CTRL_ID = 'SPB_IRU_AGIPD1M1'
+CTRL_SRC = f'{SPB_AGIPD_KARABO_CTRL_ID}/MDL/FPGA_COMP'


 def test_get_acq_rate_ctrl(mock_agipd1m_run):
@@ -136,8 +139,8 @@ def test_get_bias_voltage(
    # Read bias voltage for HED_DET_AGIPD500K from slow data.
    agipd_ctrl = AgipdCtrl(
        run_dc=RunDirectory(mock_agipd500k_run),
-        image_src=SPB_AGIPD_INST_SRC,
-        ctrl_src=CTRL_SRC)
+        image_src=None,
+        ctrl_src=None)
    bias_voltage = agipd_ctrl.get_bias_voltage(
        karabo_id_control="HED_EXP_AGIPD500K2G")

@@ -198,3 +201,128 @@ def test_get_gain_mode(mock_agipd1m_run):
    gain_mode = agipd_ctrl.get_gain_mode()
    assert isinstance(gain_mode, int)
    assert gain_mode == 0
+
+
+"""Testing `AgipdCtrlRuns`"""
+
+TEST_RAW_FOLDER = "/gpfs/exfel/exp/CALLAB/202130/p900203/raw/"
+SPB_FIXED_RUNS = [9011, 9012, 9013]
+SPB_ADAPTIVE_RUNS = [9015, 9016, 9017]
+
+FIXED_CTRL_RUNS = AgipdCtrlRuns(
+    raw_folder=TEST_RAW_FOLDER,
+    runs=SPB_FIXED_RUNS,
+    image_src=SPB_AGIPD_INST_SRC,
+    ctrl_src=CTRL_SRC,
+)
+ADAPTIVE_CTRL_RUNS = AgipdCtrlRuns(
+    raw_folder=TEST_RAW_FOLDER,
+    runs=SPB_ADAPTIVE_RUNS,
+    image_src=SPB_AGIPD_INST_SRC,
+    ctrl_src=CTRL_SRC,
+)
+
+
+@pytest.mark.requires_gpfs
+def test_get_memory_cells_runs():
+    assert FIXED_CTRL_RUNS.get_memory_cells() == 352
+
+    assert ADAPTIVE_CTRL_RUNS.get_memory_cells() == 352
+
+
+@pytest.mark.requires_gpfs
+def test_get_bias_voltage_runs():
+    assert FIXED_CTRL_RUNS.get_bias_voltage(SPB_AGIPD_KARABO_CTRL_ID) == 300
+
+    assert ADAPTIVE_CTRL_RUNS.get_bias_voltage(SPB_AGIPD_KARABO_CTRL_ID) == 300
+
+
+@pytest.mark.requires_gpfs
+def test_get_integration_time_runs():
+    assert FIXED_CTRL_RUNS.get_integration_time() == 12
+
+    assert ADAPTIVE_CTRL_RUNS.get_integration_time() == 20
+
+
+@pytest.mark.requires_gpfs
+def test_get_acquisition_rate_runs():
+    assert FIXED_CTRL_RUNS.get_acq_rate() == 1.1
+
+    assert ADAPTIVE_CTRL_RUNS.get_acq_rate() == 1.1
+
+
+@pytest.mark.requires_gpfs
+def test_get_gain_setting_runs():
+    assert FIXED_CTRL_RUNS.get_gain_setting() == 0
+
+    assert ADAPTIVE_CTRL_RUNS.get_gain_setting() == 0
+
+
+@pytest.mark.requires_gpfs
+def test_get_gain_mode_runs():
+    assert FIXED_CTRL_RUNS.get_gain_modes() == [
+        AgipdGainMode.FIXED_HIGH_GAIN,
+        AgipdGainMode.FIXED_MEDIUM_GAIN,
+        AgipdGainMode.FIXED_LOW_GAIN
+    ]
+
+    assert ADAPTIVE_CTRL_RUNS.get_gain_modes() == [
+        AgipdGainMode.ADAPTIVE_GAIN]*3
+
+
+@pytest.mark.requires_gpfs
+def test_fixed_gain_mode():
+    assert FIXED_CTRL_RUNS.fixed_gain_mode()
+
+    assert not ADAPTIVE_CTRL_RUNS.fixed_gain_mode()
+
+
+@pytest.mark.requires_gpfs
+def test_raise_fixed_gain_mode():
+    adaptive_fixed_ctrls = AgipdCtrlRuns(
+        raw_folder=TEST_RAW_FOLDER,
+        runs=[9011, 9016, 9017],
+        image_src=SPB_AGIPD_INST_SRC,
+        ctrl_src=CTRL_SRC,
+    )
+    with pytest.raises(ValueError):
+        adaptive_fixed_ctrls.fixed_gain_mode()
+
+
+@pytest.mark.requires_gpfs
+@pytest.mark.parametrize(
+    "runs,expected",
+    [
+        ([9013, 9011, 9012], [9011, 9012, 9013]),
+        ([9017, 9016, 9015], [9015, 9016, 9017]),
+    ],
+)
+def test_sort_dark_runs(runs, expected):
+    runs_ctrls = AgipdCtrlRuns(
+        raw_folder=TEST_RAW_FOLDER,
+        runs=runs,
+        image_src=SPB_AGIPD_INST_SRC,
+        ctrl_src=CTRL_SRC,
+    )
+    runs_ctrls.sort_dark_runs()
+    assert runs_ctrls.runs == expected
+
+
+def test_raise_sort_dark_runs():
+    with pytest.raises(ValueError):
+        adaptive_fixed_ctrls = AgipdCtrlRuns(
+            raw_folder=TEST_RAW_FOLDER,
+            runs=[9011, 9016, 9017],
+            image_src=SPB_AGIPD_INST_SRC,
+            ctrl_src=CTRL_SRC,
+            sort_dark_runs_enabled=True
+        )
+
+    adaptive_fixed_ctrls = AgipdCtrlRuns(
+        raw_folder=TEST_RAW_FOLDER,
+        runs=[9011, 9016, 9017],
+        image_src=SPB_AGIPD_INST_SRC,
+        ctrl_src=CTRL_SRC,
+    )
+    with pytest.raises(ValueError):
+        adaptive_fixed_ctrls.sort_dark_runs()
--- a/tests/test_jungfraulib.py
+++ b/tests/test_jungfraulib.py
 import pytest
 from extra_data import RunDirectory

-from cal_tools.jungfraulib import JungfrauCtrl
+from cal_tools.jungfraulib import JungfrauCtrl, sort_runs_by_gain

 # TODO: replace with mocked RAW data as in tests/test_agipdlib.py
 JF = JungfrauCtrl(
@@ -45,3 +45,22 @@ def test_get_gain_setting(settings, result):
 def test_get_gain_mode(mode, result):
    JF.run_mode = mode
    assert JF.get_gain_mode() == result
+
+@pytest.mark.parametrize(
+    'original_runs,sorted_runs',
+    [
+        ([9035, 9036, 9037], [9035, 9036, 9037]),
+        ([9035, 9037, 9036], [9035, 9036, 9037]),
+        ([9033, 9032, 9031], [9031, 9032, 9033]),
+        ([9033, 9031, 9032], [9031, 9032, 9033]),
+    ]
+)
+# TODO: missing fixed gain dark runs for JUNGFRAU from test proposal.
+# TODO: missing fixed and adaptive runs after the JF control updated.
+def test_sort_runs_by_gain(original_runs, sorted_runs):
+    raw_folder = "/gpfs/exfel/exp/CALLAB/202130/p900203/raw"
+    validated_runs = sort_runs_by_gain(
+        raw_folder=raw_folder,
+        runs=original_runs,
+        ctrl_src="FXE_XAD_JF1M/DET/CONTROL")
+    assert validated_runs == sorted_runs
--- a/tests/test_reference_runs/callab_tests.py
+++ b/tests/test_reference_runs/callab_tests.py
@@ -25,9 +25,10 @@ automated_test_config = {
            "out-folder": "{}/{}/{}",
            # "/gpfs/exfel/exp/SPB/202131/p900215/raw"
            "in-folder": "/gpfs/exfel/exp/CALLAB/202130/p900203/raw",
-            "run-high": "9011",  # Original run: "91"
+            # Unsorted dark runs
+            "run-high": "9013",  # Original run "93"
            "run-med": "9012",  # Original run: "92"
-            "run-low": "9013",  # Original run "93"
+            "run-low": "9011",  # Original run: "91"
            "karabo-id-control": "SPB_IRU_AGIPD1M1",
            "karabo-id": "SPB_DET_AGIPD1M-1",
            "ctrl-source-template": "{}/MDL/FPGA_COMP",
@@ -173,9 +174,10 @@ automated_test_config = {
            "out-folder": "{}/{}/{}",
            # "/gpfs/exfel/exp/HED/202131/p900228/raw"
            "in-folder": "/gpfs/exfel/exp/CALLAB/202130/p900203/raw",
+            # Unsorted dark runs
            "run-high": "9023", # Original run: "25",
-            "run-med": "9024",  # Original run: "26",
-            "run-low": "9025",  # Original run: "27",
+            "run-med": "9025",  # Original run: "27",
+            "run-low": "9024",  # Original run: "26",
            "karabo-id-control": "HED_EXP_AGIPD500K2G",
            "karabo-id": "HED_DET_AGIPD500K2G",
            "ctrl-source-template": "{}/MDL/FPGA_COMP",
@@ -379,9 +381,10 @@ automated_test_config = {
            "out-folder": "{}/{}/{}",
            # "/gpfs/exfel/exp/HED/202102/p002656/raw"
            "in-folder": "/gpfs/exfel/exp/CALLAB/202130/p900203/raw",
-            "run-high": "9039",  # Original run: "219",
-            "run-med": "9040",  # Original run: "220",
-            "run-low": "9041",  # Original run: "221",
+            # The 3 runs are arranged in a wrong way on purpose.
+            "run-high": "9040",  # Original run: "219",
+            "run-med": "9041",  # Original run: "220",
+            "run-low": "9039",  # Original run: "221",
            "karabo-id": "HED_IA1_JF500K2",
            "karabo-da": "JNGFR02",
        },

--- a/webservice/webservice.py
+++ b/webservice/webservice.py
@@ -1283,7 +1283,7 @@ class ActionsServer:
            # Notebooks require one or three runs, depending on the
            # detector type and operation mode.
            triple = any(det in karabo_id for det in
-                         ["LPD", "AGIPD", "JUNGFRAU", "JF", "JNGFR", "GH2"])
+                         ["LPD", "AGIPD", "JUNGFRAU", "JF", "JNGFR", "GH2", "G2"])

            # This fails silently if the hardcoded strings above are
            # ever changed (triple = False) but the underlying notebook
No results found