Compare revisions

Nuno Duarte · Nuno Duarte · Nuno Duarte · Nuno Duarte · Nuno Duarte · Nuno Duarte
--- a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
+++ b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
 %% Cell type:markdown id: tags:

 # AGIPD Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/MID/202201/p002834/raw" # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/esobolev/pycal_litfrm/p002834/r0225"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 train_ids = [-1] # train IDs to correct, set to -1 for all, range allowed
 run = 225 # runs to process, required

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_template = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 index_source_template = 'INDEX/{}/DET/{}:xtdf/'  # path in the HDF5 file to images
 ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information
 karabo_id_control = "MID_EXP_AGIPD1M1" # karabo-id for control device

 slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants, loaded in precorrection notebook

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milliseconds
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants

 mem_cells = 0  # Number of memory cells used, set to 0 to automatically infer
 bias_voltage = 0  # bias voltage, set to 0 to use stored value in slow data.
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)
-photon_energy = 9.2 # photon energy in keV
 overwrite = True # set to True if existing data should be overwritten
 max_pulses = [0, 352, 1] # range list [st, end, step] of memory cell indices to be processed within a train. 3 allowed maximum list input elements.
 mem_cells_db = 0 # set to a value different than 0 to use this value for DB queries
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction parameters
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
 cm_dark_fraction = 0.66 # threshold for fraction of  empty pixels to consider module enough dark to perform CM correction
 cm_dark_range = [-50.,30] # range for signal value ADU for pixel to be consider as a dark pixel
 cm_n_itr = 4 # number of iterations for common mode correction
 hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
 mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
 noisy_adc_threshold = 0.25 # threshold to mask complete adc
 ff_gain = 7.2 # conversion gain for absolute FlatField constants, while applying xray_gain
+photon_energy = -1.0 # photon energy in keV, non-positive value for XGM autodetection

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 zero_nans = False # set NaN values in corrected data to 0
 zero_orange = False # set to 0 very negative and very large values in corrected data
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
 corr_asic_diag = False # if set, diagonal drop offs on ASICs are corrected
 force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold
 force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold
 mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
 common_mode = False # Common mode correction
 melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
 mask_zero_std = False # Mask pixels with zero standard deviation across train
 low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain
 round_photons = False  # Round to absolute number of photons, only use with gain corrections

 # Optional auxiliary devices
 use_ppu_device = ''  # Device ID for a pulse picker device to only process picked trains, empty string to disable
 ppu_train_offset = 0  # When using the pulse picker, offset between the PPU's sequence start and actually picked train

 use_litframe_finder = 'off' # Process only illuminated frames: 'off' - disable, 'device' - use online device data, 'offline' - use offline algorithm, 'auto' - choose online/offline source automatically (default)
 litframe_device_id = '' # Device ID for a lit frame finder device, empty string to auto detection
 energy_threshold = -1000 # The low limit for the energy (uJ) exposed by frames subject to processing. If -1000, selection by pulse energy is disabled

 use_xgm_device = ''  # DoocsXGM device ID to obtain actual photon energy, operating condition else.

 # Output parameters
 recast_image_data = ''  # Cast data to a different dtype before saving
 compress_fields = ['gain', 'mask']  # Datasets in image group to compress.

 # Plotting parameters
 skip_plots = False # exit after writing corrected files and metadata
 cell_id_preview = 1 # cell Id used for preview in single-shot plots

 # Parallelization parameters
 chunk_size = 1000  # Size of chunk for image-wise correction
 n_cores_correct = 16 # Number of chunks to be processed in parallel
 n_cores_files = 4 # Number of files to be processed in parallel
 sequences_per_node = 2 # number of sequence files per cluster node if run as SLURM job, set to 0 to not run SLURM parallel
 max_nodes = 8 # Maximum number of SLURM jobs to split correction work into
 max_tasks_per_worker = -1  # the number of tasks a correction pool worker process can complete before it will exit and be replaced with a fresh worker process. Leave as -1 to keep worker alive as long as pool.

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import os
 import math
 import multiprocessing
 import re
 import traceback
 import warnings
 from datetime import timedelta
+from logging import warn
 from pathlib import Path
 from time import perf_counter

 import tabulate
 from dateutil import parser
 from IPython.display import Latex, Markdown, display

 warnings.filterwarnings('ignore')
 import matplotlib
 import matplotlib.pyplot as plt
 import yaml
 from extra_data import H5File, RunDirectory, stack_detector_data, by_id
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from matplotlib import cm as colormap
 from matplotlib.colors import LogNorm

 matplotlib.use("agg")
 %matplotlib inline
 import numpy as np
 import seaborn as sns

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")

 import cal_tools
 import seaborn as sns
 from cal_tools import agipdalgs as calgs
 from cal_tools.agipdlib import (
    AgipdCorrections,
    AgipdCtrl,
    CellRange,
    LitFrameSelection,
 )
 from cal_tools.ana_tools import get_range
 from cal_tools.enums import AgipdGainMode, BadPixels
 from cal_tools.step_timing import StepTimer

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_folder = in_folder / f'r{run:04d}'
 ```

 %% Cell type:markdown id: tags:

 ## Evaluated parameters ##

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the hierarchy and dependability for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["zero_nans"] = zero_nans
    corr_bools["zero_orange"] = zero_orange
    corr_bools["mask_noisy_adc"] = mask_noisy_adc
    corr_bools["force_hg_if_below"] = force_hg_if_below
    corr_bools["force_mg_if_below"] = force_mg_if_below
    corr_bools["common_mode"] = common_mode
    corr_bools["melt_snow"] = melt_snow
    corr_bools["mask_zero_std"] = mask_zero_std
    corr_bools["low_medium_gap"] = low_medium_gap
    corr_bools["round_photons"] = round_photons

 # Many corrections don't apply to fixed gain mode; will explicitly disable later if detected
 disable_for_fixed_gain = [
    "adjust_mg_baseline",
    "blc_set_min",
    "force_hg_if_below",
    "force_mg_if_below",
    "low_medium_gap",
    "melt_snow",
    "rel_gain"
 ]
 ```

 %% Cell type:code id: tags:

 ``` python
 if sequences == [-1]:
    sequences = None

 dc = RunDirectory(run_folder)

 ctrl_src = ctrl_source_template.format(karabo_id_control)
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 index_src = index_source_template.format(karabo_id, receiver_template)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create output folder
 out_folder.mkdir(parents=True, exist_ok=True)

 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 # Evaluate requested modules
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 print("Process modules:", ', '.join(cal_tools.tools.module_index_to_qm(x) for x in modules))
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_ppu_device:
    # Obtain trains to process if using a pulse picker device.

    # Will throw an uncaught exception if the device is wrong.
    seq_start = dc[use_ppu_device, 'trainTrigger.sequenceStart.value'].ndarray()

    # The trains picked are the unique values of trainTrigger.sequenceStart
    # minus the first (previous trigger before this run).
    train_ids = np.unique(seq_start)[1:] + ppu_train_offset

    print(f'PPU device {use_ppu_device} triggered for {len(train_ids)} train(s)')

 elif train_ids != [-1]:
    # Specific trains passed by parameter, convert to ndarray.
    train_ids = np.array(train_ids)

    print(f'Processing up to {len(train_ids)} manually selected train(s)')
 else:
    # Process all trains.
    train_ids = None

    print(f'Processing all valid trains')
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mapped_files, _, total_sequences, _, _ =  cal_tools.tools.map_modules_from_folder(
    str(in_folder), run, path_template, karabo_da, sequences
 )
 file_list = []

 # ToDo: Split table over pages
 print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
 table = []
 ti = 0
 for k, files in mapped_files.items():
    i = 0
    for f in list(files.queue):
        file_list.append(f)
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["#", "module", "# module", "file"])))
 file_list = sorted(file_list, key=lambda name: name[-10:])
 ```

 %% Cell type:code id: tags:

 ``` python
 first_mod_channel = sorted(modules)[0]

 instrument_src_mod = [
    s for s in list(dc.all_sources) if f"{first_mod_channel}CH" in s][0]
 mod_channel = int(re.findall(rf".*{first_mod_channel}CH([0-9]+):.*", instrument_src_mod)[0])

 agipd_cond = AgipdCtrl(
    run_dc=dc,
    image_src=instrument_src_mod,
    ctrl_src=ctrl_src,
    raise_error=False,  # to be able to process very old data without gain_setting value
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate creation time
 creation_time = None
 if use_dir_creation_date:
    creation_time = cal_tools.tools.get_dir_creation_date(str(in_folder), run)
    offset = parser.parse(creation_date_offset)
    delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
    creation_time += delta

 if acq_rate == 0.:
    acq_rate = agipd_cond.get_acq_rate()
 if mem_cells == 0.:
    mem_cells = agipd_cond.get_num_cells()
 # TODO: look for alternative for passing creation_time
 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == 0.:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()
 if gain_mode == -1:
    gain_mode = agipd_cond.get_gain_mode()
 else:
    gain_mode = AgipdGainMode(gain_mode)
 ```

 %% Cell type:code id: tags:

 ``` python
 if mem_cells is None:
    raise ValueError(f"No raw images found in {filename}")

 mem_cells_db = mem_cells if mem_cells_db == 0 else mem_cells_db

 print(f"Maximum memory cells to calibrate: {mem_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Using {creation_time} as creation time")
 print("Operating conditions are:")
 print(f"• Bias voltage: {bias_voltage}")
 print(f"• Memory cells: {mem_cells_db}")
 print(f"• Acquisition rate: {acq_rate}")
 print(f"• Gain setting: {gain_setting}")
 print(f"• Gain mode: {gain_mode.name}")
 print(f"• Integration time: {integration_time}")
-print(f"• Photon Energy: {photon_energy}")
+print(f"• Photon Energy: 9.2")
 ```

 %% Cell type:code id: tags:

 ``` python
 if gain_mode:
    for to_disable in disable_for_fixed_gain:
        if corr_bools.get(to_disable, False):
-            print(f"Warning: {to_disable} correction was requested, but does not apply to fixed gain mode")
+            warn(f"{to_disable} correction was requested, but does not apply to fixed gain mode")
            corr_bools[to_disable] = False
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_litframe_finder != 'off':
    from extra_redu import make_litframe_finder, LitFrameFinderError
    from extra_redu.litfrm.utils import litfrm_run_report

    if use_litframe_finder not in ['auto', 'offline', 'online']:
        raise ValueError("Unexpected value in 'use_litframe_finder'.")

    inst = karabo_id_control[:3]
    litfrm = make_litframe_finder(inst, dc, litframe_device_id)
    try:
        if use_litframe_finder == 'auto':
            r = litfrm.read_or_process()
        elif use_litframe_finder == 'offline':
            r = litfrm.process()
        elif use_litframe_finder == 'online':
            r = litfrm.read()

        report = litfrm_run_report(r)
        print("Lit-frame patterns:")
        print(" # trains                      Np  Nd  Nf lit frames")
        for rec in report:
            frmintf = ', '.join(
                [':'.join([str(n) for n in slc]) for slc in rec['frames']]
            )
            trsintf = ':'.join([str(n) for n in rec['trains']])
            print(
                ("{pattern_no:2d} {trsintf:25s} {npulse:4d} "
                 "{ndataframe:3d} {nframe:3d} [{frmintf}]"
                ).format(frmintf=frmintf, trsintf=trsintf, **rec)
            )
        cell_sel = LitFrameSelection(r, train_ids, max_pulses, energy_threshold)
    except LitFrameFinderError as err:
-        print("Cannot use AgipdLitFrameFinder due to:")
-        print(err)
+        warn(f"Cannot use AgipdLitFrameFinder due to:\n{err}")
        cell_sel = CellRange(max_pulses, max_cells=mem_cells)
 else:
    # Use range selection
    cell_sel = CellRange(max_pulses, max_cells=mem_cells)

 print(cell_sel.msg())
 ```

 %% Cell type:code id: tags:

 ``` python
-actual_photon_energy = None
-
-if use_xgm_device:
-    # Try to obtain photon energy from XGM device.
-    wavelength_data = dc[use_xgm_device, 'pulseEnergy.wavelengthUsed']
-
-    try:
-        from scipy.constants import h, c, e
-
-        # Read wavelength as a single value and convert to hv.
-        actual_photon_energy = (h * c / e) / (wavelength_data.as_single_value(rtol=1e-2) * 1e-6)
-        print(f'Obtained actual photon energy {actual_photon_energy:.3f} keV from {use_xgm_device}')
-    except ValueError:
-        if round_photons:
-            print('WARNING: XGM source available but actual photon energy varies greater than 1%, '
-                  'photon rounding disabled!')
+if round_photons and photon_energy <= 0.0:
+    if use_xgm_device:
+        # Try to obtain photon energy from XGM device.
+        wavelength_data = dc[use_xgm_device, 'pulseEnergy.wavelengthUsed']
+
+        try:
+            from scipy.constants import h, c, e
+
+            # Read wavelength as a single value and convert to hv.
+            photon_energy = (h * c / e) / (wavelength_data.as_single_value(rtol=1e-2) * 1e-6)
+            print(f'Obtained photon energy {photon_energy:.3f} keV from {use_xgm_device}')
+        except ValueError:
+            warn('XGM source available but photon energy varies greater than 1%, '
+                 'photon rounding disabled!')
            round_photons = False
-
-if actual_photon_energy is None and round_photons:
-    print('WARNING: Using operating condition for actual photon energy in photon rounding mode, this is NOT reliable!')
-    actual_photon_energy = photon_energy
+    else:
+        warn('Neither explicit photon energy nor XGM device configured, photon rounding disabled!')
+        round_photons = False
+elif round_photons:
+    print(f'Photon energy for rounding: {photon_energy:.3f} keV')
 ```

 %% Cell type:markdown id: tags:

 ## Data processing ##

 %% Cell type:code id: tags:

 ``` python
 agipd_corr = AgipdCorrections(
    mem_cells,
    cell_sel,
    h5_data_path=instrument_src,
    h5_index_path=index_src,
    corr_bools=corr_bools,
    gain_mode=gain_mode,
    comp_threads=os.cpu_count() // n_cores_files,
    train_ids=train_ids
 )

 agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
 agipd_corr.hg_hard_threshold = hg_hard_threshold
 agipd_corr.mg_hard_threshold = mg_hard_threshold

 agipd_corr.cm_dark_min = cm_dark_range[0]
 agipd_corr.cm_dark_max = cm_dark_range[1]
 agipd_corr.cm_dark_fraction = cm_dark_fraction
 agipd_corr.cm_n_itr = cm_n_itr
 agipd_corr.noisy_adc_threshold = noisy_adc_threshold
 agipd_corr.ff_gain = ff_gain
-agipd_corr.actual_photon_energy = actual_photon_energy
+agipd_corr.photon_energy = photon_energy

 agipd_corr.compress_fields = compress_fields
 if recast_image_data:
    agipd_corr.recast_image_fields['data'] = np.dtype(recast_image_data)
 ```

 %% Cell type:code id: tags:

 ``` python
 module_index_to_karabo_da = {mod: da for (mod, da) in zip(modules, karabo_da)}
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve calibration constants to RAM
 agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))

 metadata = cal_tools.tools.CalibrationMetadata(metadata_folder or out_folder)
 # NOTE: this notebook will not overwrite calibration metadata file
 const_yaml = metadata.get("retrieved-constants", {})

 def retrieve_constants(mod):
    """
    Retrieve calibration constants and load them to shared memory

    Metadata for constants is taken from yml file or retrieved from the DB
    """
    err = ""
    k_da = module_index_to_karabo_da[mod]
    try:
        # check if there is a yaml file in out_folder that has the device constants.
        if k_da in const_yaml:
            when = agipd_corr.initialize_from_yaml(k_da, const_yaml, mod)
            print(f"Found constants for {k_da} in calibration_metadata.yml")
        else:
            # TODO: replace with proper retrieval (as done in pre-correction)
            when = agipd_corr.initialize_from_db(
                karabo_id=karabo_id,
                karabo_da=k_da,
                cal_db_interface=cal_db_interface,
                creation_time=creation_time,
                memory_cells=mem_cells_db,
                bias_voltage=bias_voltage,
-                photon_energy=photon_energy,
+                photon_energy=9.2,
                gain_setting=gain_setting,
                acquisition_rate=acq_rate,
                integration_time=integration_time,
                module_idx=mod,
                only_dark=False,
            )
            print(f"Queried CalCat for {k_da}")
    except Exception as e:
        err = f"Error: {e}\nError traceback: {traceback.format_exc()}"
        when = None
    return err, mod, when, k_da


 print(f'Preparing constants (FF: {agipd_corr.corr_bools.get("xray_corr", False)}, PC: {any(agipd_corr.pc_bools)}, '
      f'BLC: {any(agipd_corr.blc_bools)})')
 ts = perf_counter()
 with multiprocessing.Pool(processes=len(modules)) as pool:
    const_out = pool.map(retrieve_constants, modules)
 print(f"Constants were loaded in {perf_counter()-ts:.01f}s")
 ```

 %% Cell type:code id: tags:

 ``` python
 # allocate memory for images and hists
 n_images_max = mem_cells * 256
 data_shape = (n_images_max, 512, 128)
 agipd_corr.allocate_images(data_shape, n_cores_files)
 ```

 %% Cell type:code id: tags:

 ``` python
 def batches(l, batch_size):
    """Group a list into batches of (up to) batch_size elements"""
    start = 0
    while start < len(l):
        yield l[start:start + batch_size]
        start += batch_size
 ```

 %% Cell type:code id: tags:

 ``` python
 def imagewise_chunks(img_counts):
    """Break up the loaded data into chunks of up to chunk_size

    Yields (file data slot, start index, stop index)
    """


    for i_proc, n_img in enumerate(img_counts):
        n_chunks = math.ceil(n_img / chunk_size)
        for i in range(n_chunks):
            yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 if max_tasks_per_worker == -1:
    max_tasks_per_worker = None
 with multiprocessing.Pool(maxtasksperchild=max_tasks_per_worker) as pool:
    step_timer.done_step('Started pool')

    for file_batch in batches(file_list, n_cores_files):
        # TODO: Move some printed output to logging or similar
        print(f"Processing next {len(file_batch)} files")
        step_timer.start()
        img_counts = pool.starmap(
            agipd_corr.read_file,
            zip(range(len(file_batch)), file_batch, [not common_mode]*len(file_batch))
        )
        step_timer.done_step(f'Loading data from files')

        if img_counts == 0:
            # Skip any further processing and output if there are no images to
            # correct in this file.
            continue

        if mask_zero_std:
            # Evaluate zero-data-std mask
            pool.starmap(
                agipd_corr.mask_zero_std, itertools.product(
                    range(len(file_batch)),
                    np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
                )
            )
            step_timer.done_step('Mask 0 std')

        # Perform offset image-wise correction
        pool.starmap(agipd_corr.offset_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Offset correction")

        if blc_noise or blc_stripes or blc_hmatch:
            # Perform image-wise correction
            pool.starmap(agipd_corr.baseline_correction, imagewise_chunks(img_counts))
            step_timer.done_step("Base-line shift correction")

        if common_mode:
            # In common mode corrected is enabled.
            # Cell selection is only activated after common mode correction.
            # Perform cross-file correction parallel over asics
            pool.starmap(agipd_corr.cm_correction, itertools.product(
                range(len(file_batch)), range(16)  # 16 ASICs per module
            ))
            step_timer.done_step("Common-mode correction")

            img_counts = pool.map(agipd_corr.apply_selected_pulses, range(len(file_batch)))
            step_timer.done_step("Applying selected cells after common mode correction")

        # Perform image-wise correction"
        pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Gain corrections")

        # Save corrected data
        pool.starmap(agipd_corr.write_file, [
            (i_proc, file_name, str(out_folder / Path(file_name).name.replace("RAW", "CORR")))
            for i_proc, file_name in enumerate(file_batch)
        ])
        step_timer.done_step("Save")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Correction of {len(file_list)} files is finished")
 print(f"Total processing time {step_timer.timespan():.01f} s")
 print(f"Timing summary per batch of {n_cores_files} files:")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 # if the yml file contains "retrieved-constants", that means a leading
 # notebook got processed and the reporting would be generated from it.
 fst_print = True
 timestamps = {}

 for i, (error, modno, when, k_da) in enumerate(const_out):
    qm = cal_tools.tools.module_index_to_qm(modno)
    # expose errors while applying correction
    if error:
        print("Error: {}".format(error) )

    if k_da not in const_yaml:
        if fst_print:
            print("Constants are retrieved with creation time: ")
            fst_print = False

        module_timestamps = {}

        # If correction is crashed
        if not error:
            print(f"{qm}:")
            for key, item in when.items():
                if hasattr(item, 'strftime'):
                    item = item.strftime('%y-%m-%d %H:%M')
                when[key] = item
                print('{:.<12s}'.format(key), item)

        # Store few time stamps if exists
        # Add NA to keep array structure
        for key in ['Offset', 'SlopesPC', 'SlopesFF']:
            if when and key in when and when[key]:
                module_timestamps[key] = when[key]
            else:
                if error is not None:
                    module_timestamps[key] = "Err"
                else:
                    module_timestamps[key] = "NA"
        timestamps[qm] = module_timestamps

 seq = sequences[0] if sequences else 0

 if timestamps:
    with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fd:
        yaml.safe_dump({"time-summary": {f"S{seq}": timestamps}}, fd)
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print('Skipping plots')
    import sys
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.gca(projection='3d')

    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)
    Z = data.T

    # Plot the surface.
    ax.plot_surface(X, Y, Z, cmap=colormap.coolwarm, linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")


 def do_2d_plot(data, edges, y_axis, x_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),
              np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
                   norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 def get_trains_data(data_folder, source, include, detector_id, tid=None, modules=16, fillvalue=None):
    """Load single train for all module

    :param data_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param detector_id: The karabo id of the detector to get data for
    :param tid: Train Id to be loaded. First train is considered if None is given
    :param path: Path to find image data inside h5 file
    """
    run_data = RunDirectory(data_folder, include)
    if tid is not None:
        tid, data = run_data.select(f'{detector_id}/DET/*', source).train_from_id(tid)
    else:
        tid, data = next(iter(run_data.select(f'{detector_id}/DET/*', source).trains(require_all=True)))

    # TODO: remove and use the keep_dims version after updating Extra-data.
    # Avoid using default axis with sources of an expected scalar value per train.
    nfrm = cell_sel.get_cells_on_trains([tid]).sum()
    if nfrm == 1 and source in ['image.blShift', 'image.cellId', 'image.pulseId']:
        axis = 0
    else:
        axis = -3

    stacked_data = stack_detector_data(
        train=data, data=source, fillvalue=fillvalue, modules=modules, axis=axis)
    # Add cellId dimension when correcting one cellId only.
    # avoid adding pulse dims for raw data.
    if (nfrm == 1 and data_folder != run_folder):
        stacked_data = stacked_data[np.newaxis, ...]

    return tid, stacked_data
 ```

 %% Cell type:code id: tags:

 ``` python
 if dinstance == "AGIPD500K":
    geom = AGIPD_500K2GGeometry.from_origin()
 else:
    geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
        (-525, 625),
        (-550, -10),
        (520, -160),
        (542.5, 475),
    ])
 ```

 %% Cell type:code id: tags:

 ``` python
 include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
 tid, corrected = get_trains_data(out_folder, 'image.data', include, karabo_id, modules=nmods)

 _, gains = get_trains_data(out_folder, 'image.gain', include, karabo_id, tid, modules=nmods)
 _, mask = get_trains_data(out_folder, 'image.mask', include, karabo_id, tid, modules=nmods)
 _, blshift = get_trains_data(out_folder, 'image.blShift', include, karabo_id, tid, modules=nmods)
 _, cellId = get_trains_data(out_folder, 'image.cellId', include, karabo_id, tid, modules=nmods)
 _, pulseId = get_trains_data(out_folder, 'image.pulseId', include, karabo_id, tid, modules=nmods, fillvalue=0)
 _, raw = get_trains_data(run_folder, 'image.data', include, karabo_id, tid, modules=nmods)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Analogue Gain ###

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(raw[:,0,...].flatten().astype(np.float32),
                                         raw[:,1,...].flatten().astype(np.float32),
                                         bins=(100, 100),
                                         range=[[4000, 8192], [4000, 8192]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Digitized Gain ###

 The following plot shows plots signal vs. digitized gain

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(corrected.flatten().astype(np.float32),
                                         gains.flatten().astype(np.float32), bins=(100, 3),
                                         range=[[-50, 8192], [0, 3]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Gain statistics in %")
 table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
          f'{gains[gains==1].size/gains.size*100:.03f}',
          f'{gains[gains==2].size/gains.size*100:.03f}']]
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["High", "Medium", "Low"])))
 ```

 %% Cell type:markdown id: tags:

 ### Intensity per Pulse ###

 %% Cell type:code id: tags:

 ``` python
 pulse_range = [np.min(pulseId[pulseId>=0]), np.max(pulseId[pulseId>=0])]

 # Modify pulse_range, if only one pulse is selected.
 if pulse_range[0] == pulse_range[1]:
    pulse_range = [0, pulse_range[1]+int(acq_rate)]

 mean_data = np.nanmean(corrected, axis=(2, 3))
 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100, int(pulse_range[1])),
                                      range=[[-50, 1000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")

 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100,  int(pulse_range[1])),
                                      range=[[-50, 200000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 ```

 %% Cell type:markdown id: tags:

 ### Baseline shift ###

 Estimated base-line shift with respect to the total ADU counts of corrected image.

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(blshift.flatten(), bins=100, log=True)
 _ = plt.xlabel('Baseline shift [ADU]')
 _ = plt.ylabel('Counts')
 _ = ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 10))
 corrected_ave = np.nansum(corrected, axis=(2, 3))
 plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)
 plt.xlim(-1, 1000)
 plt.grid()
 plt.xlabel('Illuminated corrected [MADU] ')
 _ = plt.ylabel('Estimated baseline shift [ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
 if cell_id_preview not in cellId[:, 0]:
    print(f"WARNING: The selected cell_id_preview value {cell_id_preview} is not available in the corrected data.")
    cell_id_preview = cellId[:, 0][0]
    cell_idx_preview = 0
    print(f"Previewing the first available cellId: {cell_id_preview}.")
 else:
    cell_idx_preview = np.where(cellId[:, 0] == cell_id_preview)[0][0]
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Raw preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown(f'Mean over images of the RAW data\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(raw[slice(*cell_sel.crange), 0, ...], axis=0)
    vmin, vmax = get_range(data, 5)
    ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 else:
    print("Skipping mean RAW preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'Single shot of the RAW data from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(raw[cell_idx_preview, 0, ...], 5)
 ax = geom.plot_data_fast(raw[cell_idx_preview, 0, ...], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Corrected preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown('### Mean CORRECTED Preview ###\n'))
    display(Markdown(f'A mean across train: {tid}\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(corrected, axis=0)
    vmin, vmax = get_range(data, 7)
    ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=-50, vmax=vmax)
 else:
    print("Skipping mean CORRECTED preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'A single shot of the CORRECTED image from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_idx_preview], 7, -50)
 vmin = - 50
 ax = geom.plot_data_fast(corrected[cell_idx_preview], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_idx_preview], 5, -50)
 nbins = np.int((vmax + 50) / 2)
 h = ax.hist(corrected[cell_idx_preview].flatten(),
            bins=nbins, range=(-50, vmax),
            histtype='stepfilled', log=True)
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected, 10, -100)
 vmax = np.nanmax(corrected)
 if vmax > 50000:
    vmax=50000
 nbins = np.int((vmax + 100) / 5)
 h = ax.hist(corrected.flatten(), bins=nbins,
            range=(-100, vmax), histtype='step', log=True, label = 'All')
 ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='High gain', color='green')
 ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Medium gain', color='red')
 ax.hist(corrected[gains == 2].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Low gain', color='yellow')
 ax.legend()
 ax.grid()
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Maximum GAIN Preview ###\n'))
 display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.max(gains, axis=0), ax=ax,
                         cmap="jet", vmin=-1, vmax=3)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'### Single Shot Bad Pixels ### \n'))
 display(Markdown(f'A single shot bad pixel map from cell {cell_id_preview} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(np.log2(mask[cell_idx_preview]), ax=ax, vmin=0, vmax=32, cmap="jet")
 ```

+%% Cell type:code id: tags:
+
+``` python
+if round_photons:
+    display(Markdown('### Photonization histograms ###'))
+
+    x_preround = (agipd_corr.hist_bins_preround[1:] + agipd_corr.hist_bins_preround[:-1]) / 2
+    x_postround = (agipd_corr.hist_bins_postround[1:] + agipd_corr.hist_bins_postround[:-1]) / 2
+    x_photons = np.arange(0, (x_postround[-1] + 1) / photon_energy)
+
+    fig, ax = plt.subplots(ncols=1, nrows=1, clear=True)
+    ax.plot(x_preround, agipd_corr.shared_hist_preround, '.-', color='C0')
+    ax.bar(x_postround, agipd_corr.shared_hist_postround, photon_energy, color='C1', alpha=0.5)
+    ax.set_yscale('log')
+    ax.set_ylim(0, max(agipd_corr.shared_hist_preround.max(), agipd_corr.shared_hist_postround.max())*3)
+    ax.set_xlim(x_postround[0], x_postround[-1]+1)
+    ax.set_xlabel('Photon energy / keV')
+    ax.set_ylabel('Intensity')
+    ax.vlines(x_photons * photon_energy, *ax.get_ylim(), color='k', linestyle='dashed')
+
+    phx = ax.twiny()
+    phx.set_xlim(x_postround[0] / photon_energy, (x_postround[-1]+1)/photon_energy)
+    phx.set_xticks(x_photons)
+    phx.set_xlabel('# Photons')
+    pass
+```
+
 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train  ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(np.mean(mask>0, axis=0), vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train. Only Dark Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 cm = np.copy(mask)
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 ax = geom.plot_data_fast(np.mean(cm>0, axis=0),
                         vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 # AGIPD Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/MID/202201/p002834/raw" # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/esobolev/pycal_litfrm/p002834/r0225"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 train_ids = [-1] # train IDs to correct, set to -1 for all, range allowed
 run = 225 # runs to process, required

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_template = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 index_source_template = 'INDEX/{}/DET/{}:xtdf/'  # path in the HDF5 file to images
 ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information
 karabo_id_control = "MID_EXP_AGIPD1M1" # karabo-id for control device

 slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants, loaded in precorrection notebook

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milliseconds
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants

 mem_cells = 0  # Number of memory cells used, set to 0 to automatically infer
 bias_voltage = 0  # bias voltage, set to 0 to use stored value in slow data.
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)
-photon_energy = 9.2 # photon energy in keV
 overwrite = True # set to True if existing data should be overwritten
 max_pulses = [0, 352, 1] # range list [st, end, step] of memory cell indices to be processed within a train. 3 allowed maximum list input elements.
 mem_cells_db = 0 # set to a value different than 0 to use this value for DB queries
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction parameters
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
 cm_dark_fraction = 0.66 # threshold for fraction of  empty pixels to consider module enough dark to perform CM correction
 cm_dark_range = [-50.,30] # range for signal value ADU for pixel to be consider as a dark pixel
 cm_n_itr = 4 # number of iterations for common mode correction
 hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
 mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
 noisy_adc_threshold = 0.25 # threshold to mask complete adc
 ff_gain = 7.2 # conversion gain for absolute FlatField constants, while applying xray_gain
+photon_energy = -1.0 # photon energy in keV, non-positive value for XGM autodetection

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 zero_nans = False # set NaN values in corrected data to 0
 zero_orange = False # set to 0 very negative and very large values in corrected data
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
 corr_asic_diag = False # if set, diagonal drop offs on ASICs are corrected
 force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold
 force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold
 mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
 common_mode = False # Common mode correction
 melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
 mask_zero_std = False # Mask pixels with zero standard deviation across train
 low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain
 round_photons = False  # Round to absolute number of photons, only use with gain corrections

 # Optional auxiliary devices
 use_ppu_device = ''  # Device ID for a pulse picker device to only process picked trains, empty string to disable
 ppu_train_offset = 0  # When using the pulse picker, offset between the PPU's sequence start and actually picked train

 use_litframe_finder = 'off' # Process only illuminated frames: 'off' - disable, 'device' - use online device data, 'offline' - use offline algorithm, 'auto' - choose online/offline source automatically (default)
 litframe_device_id = '' # Device ID for a lit frame finder device, empty string to auto detection
 energy_threshold = -1000 # The low limit for the energy (uJ) exposed by frames subject to processing. If -1000, selection by pulse energy is disabled

 use_xgm_device = ''  # DoocsXGM device ID to obtain actual photon energy, operating condition else.

 # Output parameters
 recast_image_data = ''  # Cast data to a different dtype before saving
 compress_fields = ['gain', 'mask']  # Datasets in image group to compress.

 # Plotting parameters
 skip_plots = False # exit after writing corrected files and metadata
 cell_id_preview = 1 # cell Id used for preview in single-shot plots

 # Parallelization parameters
 chunk_size = 1000  # Size of chunk for image-wise correction
 n_cores_correct = 16 # Number of chunks to be processed in parallel
 n_cores_files = 4 # Number of files to be processed in parallel
 sequences_per_node = 2 # number of sequence files per cluster node if run as SLURM job, set to 0 to not run SLURM parallel
 max_nodes = 8 # Maximum number of SLURM jobs to split correction work into
 max_tasks_per_worker = -1  # the number of tasks a correction pool worker process can complete before it will exit and be replaced with a fresh worker process. Leave as -1 to keep worker alive as long as pool.

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import os
 import math
 import multiprocessing
 import re
 import traceback
 import warnings
 from datetime import timedelta
+from logging import warn
 from pathlib import Path
 from time import perf_counter

 import tabulate
 from dateutil import parser
 from IPython.display import Latex, Markdown, display

 warnings.filterwarnings('ignore')
 import matplotlib
 import matplotlib.pyplot as plt
 import yaml
 from extra_data import H5File, RunDirectory, stack_detector_data, by_id
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from matplotlib import cm as colormap
 from matplotlib.colors import LogNorm

 matplotlib.use("agg")
 %matplotlib inline
 import numpy as np
 import seaborn as sns

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")

 import cal_tools
 import seaborn as sns
 from cal_tools import agipdalgs as calgs
 from cal_tools.agipdlib import (
    AgipdCorrections,
    AgipdCtrl,
    CellRange,
    LitFrameSelection,
 )
 from cal_tools.ana_tools import get_range
 from cal_tools.enums import AgipdGainMode, BadPixels
 from cal_tools.step_timing import StepTimer

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_folder = in_folder / f'r{run:04d}'
 ```

 %% Cell type:markdown id: tags:

 ## Evaluated parameters ##

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the hierarchy and dependability for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["zero_nans"] = zero_nans
    corr_bools["zero_orange"] = zero_orange
    corr_bools["mask_noisy_adc"] = mask_noisy_adc
    corr_bools["force_hg_if_below"] = force_hg_if_below
    corr_bools["force_mg_if_below"] = force_mg_if_below
    corr_bools["common_mode"] = common_mode
    corr_bools["melt_snow"] = melt_snow
    corr_bools["mask_zero_std"] = mask_zero_std
    corr_bools["low_medium_gap"] = low_medium_gap
    corr_bools["round_photons"] = round_photons

 # Many corrections don't apply to fixed gain mode; will explicitly disable later if detected
 disable_for_fixed_gain = [
    "adjust_mg_baseline",
    "blc_set_min",
    "force_hg_if_below",
    "force_mg_if_below",
    "low_medium_gap",
    "melt_snow",
    "rel_gain"
 ]
 ```

 %% Cell type:code id: tags:

 ``` python
 if sequences == [-1]:
    sequences = None

 dc = RunDirectory(run_folder)

 ctrl_src = ctrl_source_template.format(karabo_id_control)
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 index_src = index_source_template.format(karabo_id, receiver_template)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create output folder
 out_folder.mkdir(parents=True, exist_ok=True)

 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 # Evaluate requested modules
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 print("Process modules:", ', '.join(cal_tools.tools.module_index_to_qm(x) for x in modules))
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_ppu_device:
    # Obtain trains to process if using a pulse picker device.

    # Will throw an uncaught exception if the device is wrong.
    seq_start = dc[use_ppu_device, 'trainTrigger.sequenceStart.value'].ndarray()

    # The trains picked are the unique values of trainTrigger.sequenceStart
    # minus the first (previous trigger before this run).
    train_ids = np.unique(seq_start)[1:] + ppu_train_offset

    print(f'PPU device {use_ppu_device} triggered for {len(train_ids)} train(s)')

 elif train_ids != [-1]:
    # Specific trains passed by parameter, convert to ndarray.
    train_ids = np.array(train_ids)

    print(f'Processing up to {len(train_ids)} manually selected train(s)')
 else:
    # Process all trains.
    train_ids = None

    print(f'Processing all valid trains')
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mapped_files, _, total_sequences, _, _ =  cal_tools.tools.map_modules_from_folder(
    str(in_folder), run, path_template, karabo_da, sequences
 )
 file_list = []

 # ToDo: Split table over pages
 print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
 table = []
 ti = 0
 for k, files in mapped_files.items():
    i = 0
    for f in list(files.queue):
        file_list.append(f)
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["#", "module", "# module", "file"])))
 file_list = sorted(file_list, key=lambda name: name[-10:])
 ```

 %% Cell type:code id: tags:

 ``` python
 first_mod_channel = sorted(modules)[0]

 instrument_src_mod = [
    s for s in list(dc.all_sources) if f"{first_mod_channel}CH" in s][0]
 mod_channel = int(re.findall(rf".*{first_mod_channel}CH([0-9]+):.*", instrument_src_mod)[0])

 agipd_cond = AgipdCtrl(
    run_dc=dc,
    image_src=instrument_src_mod,
    ctrl_src=ctrl_src,
    raise_error=False,  # to be able to process very old data without gain_setting value
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate creation time
 creation_time = None
 if use_dir_creation_date:
    creation_time = cal_tools.tools.get_dir_creation_date(str(in_folder), run)
    offset = parser.parse(creation_date_offset)
    delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
    creation_time += delta

 if acq_rate == 0.:
    acq_rate = agipd_cond.get_acq_rate()
 if mem_cells == 0.:
    mem_cells = agipd_cond.get_num_cells()
 # TODO: look for alternative for passing creation_time
 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == 0.:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()
 if gain_mode == -1:
    gain_mode = agipd_cond.get_gain_mode()
 else:
    gain_mode = AgipdGainMode(gain_mode)
 ```

 %% Cell type:code id: tags:

 ``` python
 if mem_cells is None:
    raise ValueError(f"No raw images found in {filename}")

 mem_cells_db = mem_cells if mem_cells_db == 0 else mem_cells_db

 print(f"Maximum memory cells to calibrate: {mem_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Using {creation_time} as creation time")
 print("Operating conditions are:")
 print(f"• Bias voltage: {bias_voltage}")
 print(f"• Memory cells: {mem_cells_db}")
 print(f"• Acquisition rate: {acq_rate}")
 print(f"• Gain setting: {gain_setting}")
 print(f"• Gain mode: {gain_mode.name}")
 print(f"• Integration time: {integration_time}")
-print(f"• Photon Energy: {photon_energy}")
+print(f"• Photon Energy: 9.2")
 ```

 %% Cell type:code id: tags:

 ``` python
 if gain_mode:
    for to_disable in disable_for_fixed_gain:
        if corr_bools.get(to_disable, False):
-            print(f"Warning: {to_disable} correction was requested, but does not apply to fixed gain mode")
+            warn(f"{to_disable} correction was requested, but does not apply to fixed gain mode")
            corr_bools[to_disable] = False
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_litframe_finder != 'off':
    from extra_redu import make_litframe_finder, LitFrameFinderError
    from extra_redu.litfrm.utils import litfrm_run_report

    if use_litframe_finder not in ['auto', 'offline', 'online']:
        raise ValueError("Unexpected value in 'use_litframe_finder'.")

    inst = karabo_id_control[:3]
    litfrm = make_litframe_finder(inst, dc, litframe_device_id)
    try:
        if use_litframe_finder == 'auto':
            r = litfrm.read_or_process()
        elif use_litframe_finder == 'offline':
            r = litfrm.process()
        elif use_litframe_finder == 'online':
            r = litfrm.read()

        report = litfrm_run_report(r)
        print("Lit-frame patterns:")
        print(" # trains                      Np  Nd  Nf lit frames")
        for rec in report:
            frmintf = ', '.join(
                [':'.join([str(n) for n in slc]) for slc in rec['frames']]
            )
            trsintf = ':'.join([str(n) for n in rec['trains']])
            print(
                ("{pattern_no:2d} {trsintf:25s} {npulse:4d} "
                 "{ndataframe:3d} {nframe:3d} [{frmintf}]"
                ).format(frmintf=frmintf, trsintf=trsintf, **rec)
            )
        cell_sel = LitFrameSelection(r, train_ids, max_pulses, energy_threshold)
    except LitFrameFinderError as err:
-        print("Cannot use AgipdLitFrameFinder due to:")
-        print(err)
+        warn(f"Cannot use AgipdLitFrameFinder due to:\n{err}")
        cell_sel = CellRange(max_pulses, max_cells=mem_cells)
 else:
    # Use range selection
    cell_sel = CellRange(max_pulses, max_cells=mem_cells)

 print(cell_sel.msg())
 ```

 %% Cell type:code id: tags:

 ``` python
-actual_photon_energy = None
-
-if use_xgm_device:
-    # Try to obtain photon energy from XGM device.
-    wavelength_data = dc[use_xgm_device, 'pulseEnergy.wavelengthUsed']
-
-    try:
-        from scipy.constants import h, c, e
-
-        # Read wavelength as a single value and convert to hv.
-        actual_photon_energy = (h * c / e) / (wavelength_data.as_single_value(rtol=1e-2) * 1e-6)
-        print(f'Obtained actual photon energy {actual_photon_energy:.3f} keV from {use_xgm_device}')
-    except ValueError:
-        if round_photons:
-            print('WARNING: XGM source available but actual photon energy varies greater than 1%, '
-                  'photon rounding disabled!')
+if round_photons and photon_energy <= 0.0:
+    if use_xgm_device:
+        # Try to obtain photon energy from XGM device.
+        wavelength_data = dc[use_xgm_device, 'pulseEnergy.wavelengthUsed']
+
+        try:
+            from scipy.constants import h, c, e
+
+            # Read wavelength as a single value and convert to hv.
+            photon_energy = (h * c / e) / (wavelength_data.as_single_value(rtol=1e-2) * 1e-6)
+            print(f'Obtained photon energy {photon_energy:.3f} keV from {use_xgm_device}')
+        except ValueError:
+            warn('XGM source available but photon energy varies greater than 1%, '
+                 'photon rounding disabled!')
            round_photons = False
-
-if actual_photon_energy is None and round_photons:
-    print('WARNING: Using operating condition for actual photon energy in photon rounding mode, this is NOT reliable!')
-    actual_photon_energy = photon_energy
+    else:
+        warn('Neither explicit photon energy nor XGM device configured, photon rounding disabled!')
+        round_photons = False
+elif round_photons:
+    print(f'Photon energy for rounding: {photon_energy:.3f} keV')
 ```

 %% Cell type:markdown id: tags:

 ## Data processing ##

 %% Cell type:code id: tags:

 ``` python
 agipd_corr = AgipdCorrections(
    mem_cells,
    cell_sel,
    h5_data_path=instrument_src,
    h5_index_path=index_src,
    corr_bools=corr_bools,
    gain_mode=gain_mode,
    comp_threads=os.cpu_count() // n_cores_files,
    train_ids=train_ids
 )

 agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
 agipd_corr.hg_hard_threshold = hg_hard_threshold
 agipd_corr.mg_hard_threshold = mg_hard_threshold

 agipd_corr.cm_dark_min = cm_dark_range[0]
 agipd_corr.cm_dark_max = cm_dark_range[1]
 agipd_corr.cm_dark_fraction = cm_dark_fraction
 agipd_corr.cm_n_itr = cm_n_itr
 agipd_corr.noisy_adc_threshold = noisy_adc_threshold
 agipd_corr.ff_gain = ff_gain
-agipd_corr.actual_photon_energy = actual_photon_energy
+agipd_corr.photon_energy = photon_energy

 agipd_corr.compress_fields = compress_fields
 if recast_image_data:
    agipd_corr.recast_image_fields['data'] = np.dtype(recast_image_data)
 ```

 %% Cell type:code id: tags:

 ``` python
 module_index_to_karabo_da = {mod: da for (mod, da) in zip(modules, karabo_da)}
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve calibration constants to RAM
 agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))

 metadata = cal_tools.tools.CalibrationMetadata(metadata_folder or out_folder)
 # NOTE: this notebook will not overwrite calibration metadata file
 const_yaml = metadata.get("retrieved-constants", {})

 def retrieve_constants(mod):
    """
    Retrieve calibration constants and load them to shared memory

    Metadata for constants is taken from yml file or retrieved from the DB
    """
    err = ""
    k_da = module_index_to_karabo_da[mod]
    try:
        # check if there is a yaml file in out_folder that has the device constants.
        if k_da in const_yaml:
            when = agipd_corr.initialize_from_yaml(k_da, const_yaml, mod)
            print(f"Found constants for {k_da} in calibration_metadata.yml")
        else:
            # TODO: replace with proper retrieval (as done in pre-correction)
            when = agipd_corr.initialize_from_db(
                karabo_id=karabo_id,
                karabo_da=k_da,
                cal_db_interface=cal_db_interface,
                creation_time=creation_time,
                memory_cells=mem_cells_db,
                bias_voltage=bias_voltage,
-                photon_energy=photon_energy,
+                photon_energy=9.2,
                gain_setting=gain_setting,
                acquisition_rate=acq_rate,
                integration_time=integration_time,
                module_idx=mod,
                only_dark=False,
            )
            print(f"Queried CalCat for {k_da}")
    except Exception as e:
        err = f"Error: {e}\nError traceback: {traceback.format_exc()}"
        when = None
    return err, mod, when, k_da


 print(f'Preparing constants (FF: {agipd_corr.corr_bools.get("xray_corr", False)}, PC: {any(agipd_corr.pc_bools)}, '
      f'BLC: {any(agipd_corr.blc_bools)})')
 ts = perf_counter()
 with multiprocessing.Pool(processes=len(modules)) as pool:
    const_out = pool.map(retrieve_constants, modules)
 print(f"Constants were loaded in {perf_counter()-ts:.01f}s")
 ```

 %% Cell type:code id: tags:

 ``` python
 # allocate memory for images and hists
 n_images_max = mem_cells * 256
 data_shape = (n_images_max, 512, 128)
 agipd_corr.allocate_images(data_shape, n_cores_files)
 ```

 %% Cell type:code id: tags:

 ``` python
 def batches(l, batch_size):
    """Group a list into batches of (up to) batch_size elements"""
    start = 0
    while start < len(l):
        yield l[start:start + batch_size]
        start += batch_size
 ```

 %% Cell type:code id: tags:

 ``` python
 def imagewise_chunks(img_counts):
    """Break up the loaded data into chunks of up to chunk_size

    Yields (file data slot, start index, stop index)
    """


    for i_proc, n_img in enumerate(img_counts):
        n_chunks = math.ceil(n_img / chunk_size)
        for i in range(n_chunks):
            yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 if max_tasks_per_worker == -1:
    max_tasks_per_worker = None
 with multiprocessing.Pool(maxtasksperchild=max_tasks_per_worker) as pool:
    step_timer.done_step('Started pool')

    for file_batch in batches(file_list, n_cores_files):
        # TODO: Move some printed output to logging or similar
        print(f"Processing next {len(file_batch)} files")
        step_timer.start()
        img_counts = pool.starmap(
            agipd_corr.read_file,
            zip(range(len(file_batch)), file_batch, [not common_mode]*len(file_batch))
        )
        step_timer.done_step(f'Loading data from files')

        if img_counts == 0:
            # Skip any further processing and output if there are no images to
            # correct in this file.
            continue

        if mask_zero_std:
            # Evaluate zero-data-std mask
            pool.starmap(
                agipd_corr.mask_zero_std, itertools.product(
                    range(len(file_batch)),
                    np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
                )
            )
            step_timer.done_step('Mask 0 std')

        # Perform offset image-wise correction
        pool.starmap(agipd_corr.offset_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Offset correction")

        if blc_noise or blc_stripes or blc_hmatch:
            # Perform image-wise correction
            pool.starmap(agipd_corr.baseline_correction, imagewise_chunks(img_counts))
            step_timer.done_step("Base-line shift correction")

        if common_mode:
            # In common mode corrected is enabled.
            # Cell selection is only activated after common mode correction.
            # Perform cross-file correction parallel over asics
            pool.starmap(agipd_corr.cm_correction, itertools.product(
                range(len(file_batch)), range(16)  # 16 ASICs per module
            ))
            step_timer.done_step("Common-mode correction")

            img_counts = pool.map(agipd_corr.apply_selected_pulses, range(len(file_batch)))
            step_timer.done_step("Applying selected cells after common mode correction")

        # Perform image-wise correction"
        pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Gain corrections")

        # Save corrected data
        pool.starmap(agipd_corr.write_file, [
            (i_proc, file_name, str(out_folder / Path(file_name).name.replace("RAW", "CORR")))
            for i_proc, file_name in enumerate(file_batch)
        ])
        step_timer.done_step("Save")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Correction of {len(file_list)} files is finished")
 print(f"Total processing time {step_timer.timespan():.01f} s")
 print(f"Timing summary per batch of {n_cores_files} files:")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 # if the yml file contains "retrieved-constants", that means a leading
 # notebook got processed and the reporting would be generated from it.
 fst_print = True
 timestamps = {}

 for i, (error, modno, when, k_da) in enumerate(const_out):
    qm = cal_tools.tools.module_index_to_qm(modno)
    # expose errors while applying correction
    if error:
        print("Error: {}".format(error) )

    if k_da not in const_yaml:
        if fst_print:
            print("Constants are retrieved with creation time: ")
            fst_print = False

        module_timestamps = {}

        # If correction is crashed
        if not error:
            print(f"{qm}:")
            for key, item in when.items():
                if hasattr(item, 'strftime'):
                    item = item.strftime('%y-%m-%d %H:%M')
                when[key] = item
                print('{:.<12s}'.format(key), item)

        # Store few time stamps if exists
        # Add NA to keep array structure
        for key in ['Offset', 'SlopesPC', 'SlopesFF']:
            if when and key in when and when[key]:
                module_timestamps[key] = when[key]
            else:
                if error is not None:
                    module_timestamps[key] = "Err"
                else:
                    module_timestamps[key] = "NA"
        timestamps[qm] = module_timestamps

 seq = sequences[0] if sequences else 0

 if timestamps:
    with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fd:
        yaml.safe_dump({"time-summary": {f"S{seq}": timestamps}}, fd)
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print('Skipping plots')
    import sys
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.gca(projection='3d')

    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)
    Z = data.T

    # Plot the surface.
    ax.plot_surface(X, Y, Z, cmap=colormap.coolwarm, linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")


 def do_2d_plot(data, edges, y_axis, x_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),
              np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
                   norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 def get_trains_data(data_folder, source, include, detector_id, tid=None, modules=16, fillvalue=None):
    """Load single train for all module

    :param data_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param detector_id: The karabo id of the detector to get data for
    :param tid: Train Id to be loaded. First train is considered if None is given
    :param path: Path to find image data inside h5 file
    """
    run_data = RunDirectory(data_folder, include)
    if tid is not None:
        tid, data = run_data.select(f'{detector_id}/DET/*', source).train_from_id(tid)
    else:
        tid, data = next(iter(run_data.select(f'{detector_id}/DET/*', source).trains(require_all=True)))

    # TODO: remove and use the keep_dims version after updating Extra-data.
    # Avoid using default axis with sources of an expected scalar value per train.
    nfrm = cell_sel.get_cells_on_trains([tid]).sum()
    if nfrm == 1 and source in ['image.blShift', 'image.cellId', 'image.pulseId']:
        axis = 0
    else:
        axis = -3

    stacked_data = stack_detector_data(
        train=data, data=source, fillvalue=fillvalue, modules=modules, axis=axis)
    # Add cellId dimension when correcting one cellId only.
    # avoid adding pulse dims for raw data.
    if (nfrm == 1 and data_folder != run_folder):
        stacked_data = stacked_data[np.newaxis, ...]

    return tid, stacked_data
 ```

 %% Cell type:code id: tags:

 ``` python
 if dinstance == "AGIPD500K":
    geom = AGIPD_500K2GGeometry.from_origin()
 else:
    geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
        (-525, 625),
        (-550, -10),
        (520, -160),
        (542.5, 475),
    ])
 ```

 %% Cell type:code id: tags:

 ``` python
 include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
 tid, corrected = get_trains_data(out_folder, 'image.data', include, karabo_id, modules=nmods)

 _, gains = get_trains_data(out_folder, 'image.gain', include, karabo_id, tid, modules=nmods)
 _, mask = get_trains_data(out_folder, 'image.mask', include, karabo_id, tid, modules=nmods)
 _, blshift = get_trains_data(out_folder, 'image.blShift', include, karabo_id, tid, modules=nmods)
 _, cellId = get_trains_data(out_folder, 'image.cellId', include, karabo_id, tid, modules=nmods)
 _, pulseId = get_trains_data(out_folder, 'image.pulseId', include, karabo_id, tid, modules=nmods, fillvalue=0)
 _, raw = get_trains_data(run_folder, 'image.data', include, karabo_id, tid, modules=nmods)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Analogue Gain ###

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(raw[:,0,...].flatten().astype(np.float32),
                                         raw[:,1,...].flatten().astype(np.float32),
                                         bins=(100, 100),
                                         range=[[4000, 8192], [4000, 8192]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Digitized Gain ###

 The following plot shows plots signal vs. digitized gain

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(corrected.flatten().astype(np.float32),
                                         gains.flatten().astype(np.float32), bins=(100, 3),
                                         range=[[-50, 8192], [0, 3]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Gain statistics in %")
 table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
          f'{gains[gains==1].size/gains.size*100:.03f}',
          f'{gains[gains==2].size/gains.size*100:.03f}']]
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["High", "Medium", "Low"])))
 ```

 %% Cell type:markdown id: tags:

 ### Intensity per Pulse ###

 %% Cell type:code id: tags:

 ``` python
 pulse_range = [np.min(pulseId[pulseId>=0]), np.max(pulseId[pulseId>=0])]

 # Modify pulse_range, if only one pulse is selected.
 if pulse_range[0] == pulse_range[1]:
    pulse_range = [0, pulse_range[1]+int(acq_rate)]

 mean_data = np.nanmean(corrected, axis=(2, 3))
 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100, int(pulse_range[1])),
                                      range=[[-50, 1000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")

 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100,  int(pulse_range[1])),
                                      range=[[-50, 200000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 ```

 %% Cell type:markdown id: tags:

 ### Baseline shift ###

 Estimated base-line shift with respect to the total ADU counts of corrected image.

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(blshift.flatten(), bins=100, log=True)
 _ = plt.xlabel('Baseline shift [ADU]')
 _ = plt.ylabel('Counts')
 _ = ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 10))
 corrected_ave = np.nansum(corrected, axis=(2, 3))
 plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)
 plt.xlim(-1, 1000)
 plt.grid()
 plt.xlabel('Illuminated corrected [MADU] ')
 _ = plt.ylabel('Estimated baseline shift [ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
 if cell_id_preview not in cellId[:, 0]:
    print(f"WARNING: The selected cell_id_preview value {cell_id_preview} is not available in the corrected data.")
    cell_id_preview = cellId[:, 0][0]
    cell_idx_preview = 0
    print(f"Previewing the first available cellId: {cell_id_preview}.")
 else:
    cell_idx_preview = np.where(cellId[:, 0] == cell_id_preview)[0][0]
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Raw preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown(f'Mean over images of the RAW data\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(raw[slice(*cell_sel.crange), 0, ...], axis=0)
    vmin, vmax = get_range(data, 5)
    ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 else:
    print("Skipping mean RAW preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'Single shot of the RAW data from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(raw[cell_idx_preview, 0, ...], 5)
 ax = geom.plot_data_fast(raw[cell_idx_preview, 0, ...], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Corrected preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown('### Mean CORRECTED Preview ###\n'))
    display(Markdown(f'A mean across train: {tid}\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(corrected, axis=0)
    vmin, vmax = get_range(data, 7)
    ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=-50, vmax=vmax)
 else:
    print("Skipping mean CORRECTED preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'A single shot of the CORRECTED image from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_idx_preview], 7, -50)
 vmin = - 50
 ax = geom.plot_data_fast(corrected[cell_idx_preview], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_idx_preview], 5, -50)
 nbins = np.int((vmax + 50) / 2)
 h = ax.hist(corrected[cell_idx_preview].flatten(),
            bins=nbins, range=(-50, vmax),
            histtype='stepfilled', log=True)
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected, 10, -100)
 vmax = np.nanmax(corrected)
 if vmax > 50000:
    vmax=50000
 nbins = np.int((vmax + 100) / 5)
 h = ax.hist(corrected.flatten(), bins=nbins,
            range=(-100, vmax), histtype='step', log=True, label = 'All')
 ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='High gain', color='green')
 ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Medium gain', color='red')
 ax.hist(corrected[gains == 2].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Low gain', color='yellow')
 ax.legend()
 ax.grid()
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Maximum GAIN Preview ###\n'))
 display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.max(gains, axis=0), ax=ax,
                         cmap="jet", vmin=-1, vmax=3)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'### Single Shot Bad Pixels ### \n'))
 display(Markdown(f'A single shot bad pixel map from cell {cell_id_preview} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(np.log2(mask[cell_idx_preview]), ax=ax, vmin=0, vmax=32, cmap="jet")
 ```

+%% Cell type:code id: tags:
+
+``` python
+if round_photons:
+    display(Markdown('### Photonization histograms ###'))
+
+    x_preround = (agipd_corr.hist_bins_preround[1:] + agipd_corr.hist_bins_preround[:-1]) / 2
+    x_postround = (agipd_corr.hist_bins_postround[1:] + agipd_corr.hist_bins_postround[:-1]) / 2
+    x_photons = np.arange(0, (x_postround[-1] + 1) / photon_energy)
+
+    fig, ax = plt.subplots(ncols=1, nrows=1, clear=True)
+    ax.plot(x_preround, agipd_corr.shared_hist_preround, '.-', color='C0')
+    ax.bar(x_postround, agipd_corr.shared_hist_postround, photon_energy, color='C1', alpha=0.5)
+    ax.set_yscale('log')
+    ax.set_ylim(0, max(agipd_corr.shared_hist_preround.max(), agipd_corr.shared_hist_postround.max())*3)
+    ax.set_xlim(x_postround[0], x_postround[-1]+1)
+    ax.set_xlabel('Photon energy / keV')
+    ax.set_ylabel('Intensity')
+    ax.vlines(x_photons * photon_energy, *ax.get_ylim(), color='k', linestyle='dashed')
+
+    phx = ax.twiny()
+    phx.set_xlim(x_postround[0] / photon_energy, (x_postround[-1]+1)/photon_energy)
+    phx.set_xticks(x_photons)
+    phx.set_xlabel('# Photons')
+    pass
+```
+
 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train  ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(np.mean(mask>0, axis=0), vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train. Only Dark Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 cm = np.copy(mask)
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 ax = geom.plot_data_fast(np.mean(cm>0, axis=0),
                         vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

--- a/notebooks/AGIPD/AGIPD_Retrieve_Constants_Precorrection.ipynb
+++ b/notebooks/AGIPD/AGIPD_Retrieve_Constants_Precorrection.ipynb
 %% Cell type:markdown id: tags:

 # AGIPD Retrieving Constants Pre-correction #

 Author: European XFEL Detector Group, Version: 1.0

 Retrieving Required Constants for Offline Calibration of the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202030/p900119/raw" # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/AGIPD_"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences =  [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 run = 80 # runs to process, required

 karabo_id = "SPB_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 ctrl_source_template = '{}/MDL/FPGA_COMP_TEST'  # path to control information
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 receiver_template = "{}CH0" # inset for receiver devices
 karabo_id_control = "SPB_IRU_AGIPD1M1" # karabo-id for control device

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants

 slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants
 calfile =  "" # path to calibration file. Leave empty if all data should come from DB
 nodb = False # if set only file-based constants will be used
 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 0  # bias voltage, set to 0 to use stored value in slow data.
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)
-photon_energy = 9.2 # photon energy in keV
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = True # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 ```

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis
 # Here the hierarichy and dependencies for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_hmatch"] = blc_hmatch
 ```

 %% Cell type:code id: tags:

 ``` python
 from pathlib import Path
 from typing import List, Tuple

 import matplotlib
 import matplotlib.pyplot as plt
 import multiprocessing
 import numpy as np
 from datetime import timedelta
 from dateutil import parser
 from extra_data import RunDirectory

 matplotlib.use("agg")

 from cal_tools import agipdlib, tools
 from cal_tools.enums import AgipdGainMode
 from iCalibrationDB import Conditions, Constants, Detectors
 ```

 %% Cell type:code id: tags:

 ``` python
 # slopes_ff_from_files left as str for now
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 metadata = tools.CalibrationMetadata(metadata_folder or out_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 creation_time = None
 if use_dir_creation_date:
    creation_time = tools.get_dir_creation_date(str(in_folder), run)
    offset = parser.parse(creation_date_offset)
    delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
    creation_time += delta
    print(f"Using {creation_time} as creation time")

 if sequences[0] == -1:
    sequences = None

 print(f"Outputting to {out_folder}")
 out_folder.mkdir(parents=True, exist_ok=True)

 melt_snow = False if corr_bools["only_offset"] else agipdlib.SnowResolution.NONE
 ```

 %% Cell type:code id: tags:

 ``` python
 ctrl_src = ctrl_source_template.format(karabo_id_control)

 print(f"Detector in use is {karabo_id}")

 # Extracting Instrument string
 instrument = karabo_id.split("_")[0]
 # Evaluate detector instance for mapping
 if instrument == "SPB":
    nmods = 16
 elif instrument == "MID":
    nmods = 16
 elif instrument == "HED":
    nmods = 8

 print(f"Instrument {instrument}")

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
 ```

 %% Cell type:code id: tags:

 ``` python
 run_dc = RunDirectory(in_folder / f"r{run:04d}")

 instrument_src = instrument_source_template.format(karabo_id, receiver_template)

 instr_dc = run_dc.select(instrument_src.format("*"), require_all=True)

 if not instr_dc.train_ids:
    raise ValueError(f"No images found for {in_folder / f'r{run:04d}'}")
 ```

 %% Cell type:code id: tags:

 ``` python
 agipd_cond = agipdlib.AgipdCtrl(
    run_dc=run_dc,
    image_src=None,  # Not neededed, as we wont read mem_cells or acq_rate.
    ctrl_src=ctrl_src,
 )

 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == 0.:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()
 if gain_mode == -1:
    gain_mode = agipd_cond.get_gain_mode()
 else:
    gain_mode = AgipdGainMode(gain_mode)
 ```

 %% Cell type:markdown id: tags:

 ## Retrieve Constants ##

 %% Cell type:code id: tags:

 ``` python
 def retrieve_constants(
    k_da: str, idx: int
 ) -> Tuple[str, str, float, float, str, dict]:
    """
    Retrieve constants for a module.

    :return:
            k_da: karabo data aggregator.
            acq_rate: acquisition rate parameter.
            mem_cells: number of memory cells.
            mdata_dict: (DICT) dictionary with the metadata for the retrieved constants.
    """
    # check if this module has images to process.
    if instrument_src.format(idx) not in instr_dc.all_sources:
        print("ERROR: No raw images found for "
              f"{tools.module_index_to_qm(idx)}({k_da}).")

        return None, k_da, None, None

    agipd_cond.image_src = instrument_src.format(idx)

    if mem_cells == 0:
        # Read value from fast data.
        local_mem_cells = agipd_cond.get_num_cells()
    else:
        # or use overriding notebook parameter.
        local_mem_cells = mem_cells

    if acq_rate == 0.:
        local_acq_rate = agipd_cond.get_acq_rate()
    else:
        local_acq_rate = acq_rate

    # avoid retrieving constant, if requested.
    if nodb_with_dark:
        return None, k_da, None, None

    const_dict = agipdlib.assemble_constant_dict(
        corr_bools,
        pc_bools,
        local_mem_cells,
        bias_voltage,
        gain_setting,
        local_acq_rate,
-        photon_energy,
+        photon_energy=9.2,
        gain_mode=gain_mode,
        beam_energy=None,
        only_dark=only_dark,
        integration_time=integration_time
    )

    # Retrieve multiple constants through an input dictionary
    # to return a dict of useful metadata.
    mdata_dict = dict()
    mdata_dict["constants"] = dict()
    mdata_dict["physical-detector-unit"] = None  # initialization

    for const_name, (const_init_fun, const_shape, (cond_type, cond_param)) in const_dict.items():  # noqa
        if gain_mode and const_name in ("ThresholdsDark",):
            continue

        # saving metadata in a dict
        const_mdata = dict()
        mdata_dict["constants"][const_name] = const_mdata

        if slopes_ff_from_files and const_name in ["SlopesFF", "BadPixelsFF"]:
            const_mdata["file-path"] = (
                f"{slopes_ff_from_files}/slopesff_bpmask_module_{tools.module_index_to_qm(idx)}.h5")  # noqa
            const_mdata["creation-time"] = "00:00:00"
            continue

        if gain_mode and const_name in (
            "BadPixelsPC", "SlopesPC", "BadPixelsFF", "SlopesFF"
        ):
            param_copy = cond_param.copy()
            del param_copy["gain_mode"]
            condition = getattr(Conditions, cond_type).AGIPD(**param_copy)
        else:
            condition = getattr(Conditions, cond_type).AGIPD(**cond_param)

        _, mdata = tools.get_from_db(
            karabo_id,
            k_da,
            getattr(Constants.AGIPD, const_name)(),
            condition,
            getattr(np, const_init_fun)(const_shape),
            cal_db_interface,
            creation_time,
            meta_only=True,
            verbosity=0,
        )
        mdata_const = mdata.calibration_constant_version
        # check if constant was sucessfully retrieved.
        if mdata.comm_db_success:
            const_mdata["file-path"] = (
                f"{mdata_const.hdf5path}" f"{mdata_const.filename}"
            )
            const_mdata["creation-time"] = f"{mdata_const.begin_at}"
            mdata_dict["physical-detector-unit"] = mdata_const.device_name
        else:
            const_mdata["file-path"] = const_dict[const_name][:2]
            const_mdata["creation-time"] = None

    return mdata_dict, k_da, local_acq_rate, local_mem_cells
 ```

 %% Cell type:code id: tags:

 ``` python
 # Constant paths & timestamps are saved under retrieved-constants in calibration_metadata.yml
 retrieved_constants = metadata.setdefault("retrieved-constants", {})
 ```

 %% Cell type:code id: tags:

 ``` python
 pc_bools = [corr_bools.get("rel_gain"),
            corr_bools.get("adjust_mg_baseline"),
            corr_bools.get('blc_noise'),
            corr_bools.get('blc_hmatch'),
            corr_bools.get('blc_stripes'),
            melt_snow]
 inp = []
 only_dark = False
 nodb_with_dark = False
 if not nodb:
    only_dark = (calfile != "")
 if calfile != "" and not corr_bools["only_offset"]:
    nodb_with_dark = nodb

 da_to_qm = dict()
 for module_index, k_da in zip(modules, karabo_da):
    da_to_qm[k_da] = tools.module_index_to_qm(module_index)
    if k_da in retrieved_constants:
        print(
            f"Constant for {k_da} already in calibration_metadata.yml, won't query again.")
        continue

    inp.append((k_da, module_index))
 ```

 %% Cell type:code id: tags:

 ``` python
 with multiprocessing.Pool(processes=nmods) as pool:
    results = pool.starmap(retrieve_constants, inp)
 ```

 %% Cell type:code id: tags:

 ``` python
 acq_rate_mods = []
 mem_cells_mods = []
 for md_dict, k_da, acq_rate, mem_cells in results:
    if acq_rate is None and mem_cells is None:
        continue
    md_dict, k_da, acq_rate, mem_cells
    retrieved_constants[k_da] = md_dict
    mem_cells_mods.append(mem_cells)
    acq_rate_mods.append(acq_rate)

 # Validate that mem_cells and acq_rate are the same for all modules.
 # TODO: Should a warning be enough?
 if len(set(mem_cells_mods)) != 1 or len(set(acq_rate_mods)) != 1:
    print(
        "WARNING: Number of memory cells or "
        "acquisition rate are not identical for all modules.\n"
        f"mem_cells: {mem_cells_mods}.\nacq_rate: {acq_rate_mods}.")

 # check if it is requested not to retrieve any constants from the database
 if nodb_with_dark:
    print("No constants were retrieved as calibrated files will be used.")
 else:

    print("\nRetrieved constants for modules:",
          ', '.join([tools.module_index_to_qm(x) for x in modules]))
    print(f"Operating conditions are:")
    print(f"• Bias voltage: {bias_voltage}")
    print(f"• Memory cells: {mem_cells}")
    print(f"• Acquisition rate: {acq_rate}")
    print(f"• Gain mode: {gain_mode.name}")
    print(f"• Gain setting: {gain_setting}")
    print(f"• Integration time: {integration_time}")
-    print(f"• Photon Energy: {photon_energy}")
+    print(f"• Photon Energy: 9.2")
    print("Constant metadata is saved under \"retrieved-constants\" in calibration_metadata.yml\n")
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Using constants with creation times:")
 timestamps = {}

 for k_da, module_name in da_to_qm.items():
    if k_da not in retrieved_constants.keys():
        continue
    module_timestamps = timestamps[module_name] = {}
    module_constants = retrieved_constants[k_da]

    print(f"{module_name}:")
    for cname, mdata in module_constants["constants"].items():
        if hasattr(mdata["creation-time"], 'strftime'):
            mdata["creation-time"] = mdata["creation-time"].strftime('%y-%m-%d %H:%M')
        print(f'{cname:.<12s}', mdata["creation-time"])

    for cname in ['Offset', 'SlopesPC', 'SlopesFF']:
        if cname in module_constants["constants"]:
            module_timestamps[cname] = module_constants["constants"][cname]["creation-time"]
        else:
            module_timestamps[cname] = "NA"

 time_summary = retrieved_constants.setdefault("time-summary", {})
 time_summary["SAll"] = timestamps

 metadata.save()
 ```

 %% Cell type:markdown id: tags:

 # AGIPD Retrieving Constants Pre-correction #

 Author: European XFEL Detector Group, Version: 1.0

 Retrieving Required Constants for Offline Calibration of the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202030/p900119/raw" # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/AGIPD_"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences =  [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 run = 80 # runs to process, required

 karabo_id = "SPB_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 ctrl_source_template = '{}/MDL/FPGA_COMP_TEST'  # path to control information
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 receiver_template = "{}CH0" # inset for receiver devices
 karabo_id_control = "SPB_IRU_AGIPD1M1" # karabo-id for control device

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants

 slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants
 calfile =  "" # path to calibration file. Leave empty if all data should come from DB
 nodb = False # if set only file-based constants will be used
 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 0  # bias voltage, set to 0 to use stored value in slow data.
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)
-photon_energy = 9.2 # photon energy in keV
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = True # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 ```

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis
 # Here the hierarichy and dependencies for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_hmatch"] = blc_hmatch
 ```

 %% Cell type:code id: tags:

 ``` python
 from pathlib import Path
 from typing import List, Tuple

 import matplotlib
 import matplotlib.pyplot as plt
 import multiprocessing
 import numpy as np
 from datetime import timedelta
 from dateutil import parser
 from extra_data import RunDirectory

 matplotlib.use("agg")

 from cal_tools import agipdlib, tools
 from cal_tools.enums import AgipdGainMode
 from iCalibrationDB import Conditions, Constants, Detectors
 ```

 %% Cell type:code id: tags:

 ``` python
 # slopes_ff_from_files left as str for now
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 metadata = tools.CalibrationMetadata(metadata_folder or out_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 creation_time = None
 if use_dir_creation_date:
    creation_time = tools.get_dir_creation_date(str(in_folder), run)
    offset = parser.parse(creation_date_offset)
    delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
    creation_time += delta
    print(f"Using {creation_time} as creation time")

 if sequences[0] == -1:
    sequences = None

 print(f"Outputting to {out_folder}")
 out_folder.mkdir(parents=True, exist_ok=True)

 melt_snow = False if corr_bools["only_offset"] else agipdlib.SnowResolution.NONE
 ```

 %% Cell type:code id: tags:

 ``` python
 ctrl_src = ctrl_source_template.format(karabo_id_control)

 print(f"Detector in use is {karabo_id}")

 # Extracting Instrument string
 instrument = karabo_id.split("_")[0]
 # Evaluate detector instance for mapping
 if instrument == "SPB":
    nmods = 16
 elif instrument == "MID":
    nmods = 16
 elif instrument == "HED":
    nmods = 8

 print(f"Instrument {instrument}")

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
 ```

 %% Cell type:code id: tags:

 ``` python
 run_dc = RunDirectory(in_folder / f"r{run:04d}")

 instrument_src = instrument_source_template.format(karabo_id, receiver_template)

 instr_dc = run_dc.select(instrument_src.format("*"), require_all=True)

 if not instr_dc.train_ids:
    raise ValueError(f"No images found for {in_folder / f'r{run:04d}'}")
 ```

 %% Cell type:code id: tags:

 ``` python
 agipd_cond = agipdlib.AgipdCtrl(
    run_dc=run_dc,
    image_src=None,  # Not neededed, as we wont read mem_cells or acq_rate.
    ctrl_src=ctrl_src,
 )

 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == 0.:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()
 if gain_mode == -1:
    gain_mode = agipd_cond.get_gain_mode()
 else:
    gain_mode = AgipdGainMode(gain_mode)
 ```

 %% Cell type:markdown id: tags:

 ## Retrieve Constants ##

 %% Cell type:code id: tags:

 ``` python
 def retrieve_constants(
    k_da: str, idx: int
 ) -> Tuple[str, str, float, float, str, dict]:
    """
    Retrieve constants for a module.

    :return:
            k_da: karabo data aggregator.
            acq_rate: acquisition rate parameter.
            mem_cells: number of memory cells.
            mdata_dict: (DICT) dictionary with the metadata for the retrieved constants.
    """
    # check if this module has images to process.
    if instrument_src.format(idx) not in instr_dc.all_sources:
        print("ERROR: No raw images found for "
              f"{tools.module_index_to_qm(idx)}({k_da}).")

        return None, k_da, None, None

    agipd_cond.image_src = instrument_src.format(idx)

    if mem_cells == 0:
        # Read value from fast data.
        local_mem_cells = agipd_cond.get_num_cells()
    else:
        # or use overriding notebook parameter.
        local_mem_cells = mem_cells

    if acq_rate == 0.:
        local_acq_rate = agipd_cond.get_acq_rate()
    else:
        local_acq_rate = acq_rate

    # avoid retrieving constant, if requested.
    if nodb_with_dark:
        return None, k_da, None, None

    const_dict = agipdlib.assemble_constant_dict(
        corr_bools,
        pc_bools,
        local_mem_cells,
        bias_voltage,
        gain_setting,
        local_acq_rate,
-        photon_energy,
+        photon_energy=9.2,
        gain_mode=gain_mode,
        beam_energy=None,
        only_dark=only_dark,
        integration_time=integration_time
    )

    # Retrieve multiple constants through an input dictionary
    # to return a dict of useful metadata.
    mdata_dict = dict()
    mdata_dict["constants"] = dict()
    mdata_dict["physical-detector-unit"] = None  # initialization

    for const_name, (const_init_fun, const_shape, (cond_type, cond_param)) in const_dict.items():  # noqa
        if gain_mode and const_name in ("ThresholdsDark",):
            continue

        # saving metadata in a dict
        const_mdata = dict()
        mdata_dict["constants"][const_name] = const_mdata

        if slopes_ff_from_files and const_name in ["SlopesFF", "BadPixelsFF"]:
            const_mdata["file-path"] = (
                f"{slopes_ff_from_files}/slopesff_bpmask_module_{tools.module_index_to_qm(idx)}.h5")  # noqa
            const_mdata["creation-time"] = "00:00:00"
            continue

        if gain_mode and const_name in (
            "BadPixelsPC", "SlopesPC", "BadPixelsFF", "SlopesFF"
        ):
            param_copy = cond_param.copy()
            del param_copy["gain_mode"]
            condition = getattr(Conditions, cond_type).AGIPD(**param_copy)
        else:
            condition = getattr(Conditions, cond_type).AGIPD(**cond_param)

        _, mdata = tools.get_from_db(
            karabo_id,
            k_da,
            getattr(Constants.AGIPD, const_name)(),
            condition,
            getattr(np, const_init_fun)(const_shape),
            cal_db_interface,
            creation_time,
            meta_only=True,
            verbosity=0,
        )
        mdata_const = mdata.calibration_constant_version
        # check if constant was sucessfully retrieved.
        if mdata.comm_db_success:
            const_mdata["file-path"] = (
                f"{mdata_const.hdf5path}" f"{mdata_const.filename}"
            )
            const_mdata["creation-time"] = f"{mdata_const.begin_at}"
            mdata_dict["physical-detector-unit"] = mdata_const.device_name
        else:
            const_mdata["file-path"] = const_dict[const_name][:2]
            const_mdata["creation-time"] = None

    return mdata_dict, k_da, local_acq_rate, local_mem_cells
 ```

 %% Cell type:code id: tags:

 ``` python
 # Constant paths & timestamps are saved under retrieved-constants in calibration_metadata.yml
 retrieved_constants = metadata.setdefault("retrieved-constants", {})
 ```

 %% Cell type:code id: tags:

 ``` python
 pc_bools = [corr_bools.get("rel_gain"),
            corr_bools.get("adjust_mg_baseline"),
            corr_bools.get('blc_noise'),
            corr_bools.get('blc_hmatch'),
            corr_bools.get('blc_stripes'),
            melt_snow]
 inp = []
 only_dark = False
 nodb_with_dark = False
 if not nodb:
    only_dark = (calfile != "")
 if calfile != "" and not corr_bools["only_offset"]:
    nodb_with_dark = nodb

 da_to_qm = dict()
 for module_index, k_da in zip(modules, karabo_da):
    da_to_qm[k_da] = tools.module_index_to_qm(module_index)
    if k_da in retrieved_constants:
        print(
            f"Constant for {k_da} already in calibration_metadata.yml, won't query again.")
        continue

    inp.append((k_da, module_index))
 ```

 %% Cell type:code id: tags:

 ``` python
 with multiprocessing.Pool(processes=nmods) as pool:
    results = pool.starmap(retrieve_constants, inp)
 ```

 %% Cell type:code id: tags:

 ``` python
 acq_rate_mods = []
 mem_cells_mods = []
 for md_dict, k_da, acq_rate, mem_cells in results:
    if acq_rate is None and mem_cells is None:
        continue
    md_dict, k_da, acq_rate, mem_cells
    retrieved_constants[k_da] = md_dict
    mem_cells_mods.append(mem_cells)
    acq_rate_mods.append(acq_rate)

 # Validate that mem_cells and acq_rate are the same for all modules.
 # TODO: Should a warning be enough?
 if len(set(mem_cells_mods)) != 1 or len(set(acq_rate_mods)) != 1:
    print(
        "WARNING: Number of memory cells or "
        "acquisition rate are not identical for all modules.\n"
        f"mem_cells: {mem_cells_mods}.\nacq_rate: {acq_rate_mods}.")

 # check if it is requested not to retrieve any constants from the database
 if nodb_with_dark:
    print("No constants were retrieved as calibrated files will be used.")
 else:

    print("\nRetrieved constants for modules:",
          ', '.join([tools.module_index_to_qm(x) for x in modules]))
    print(f"Operating conditions are:")
    print(f"• Bias voltage: {bias_voltage}")
    print(f"• Memory cells: {mem_cells}")
    print(f"• Acquisition rate: {acq_rate}")
    print(f"• Gain mode: {gain_mode.name}")
    print(f"• Gain setting: {gain_setting}")
    print(f"• Integration time: {integration_time}")
-    print(f"• Photon Energy: {photon_energy}")
+    print(f"• Photon Energy: 9.2")
    print("Constant metadata is saved under \"retrieved-constants\" in calibration_metadata.yml\n")
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Using constants with creation times:")
 timestamps = {}

 for k_da, module_name in da_to_qm.items():
    if k_da not in retrieved_constants.keys():
        continue
    module_timestamps = timestamps[module_name] = {}
    module_constants = retrieved_constants[k_da]

    print(f"{module_name}:")
    for cname, mdata in module_constants["constants"].items():
        if hasattr(mdata["creation-time"], 'strftime'):
            mdata["creation-time"] = mdata["creation-time"].strftime('%y-%m-%d %H:%M')
        print(f'{cname:.<12s}', mdata["creation-time"])

    for cname in ['Offset', 'SlopesPC', 'SlopesFF']:
        if cname in module_constants["constants"]:
            module_timestamps[cname] = module_constants["constants"][cname]["creation-time"]
        else:
            module_timestamps[cname] = "NA"

 time_summary = retrieved_constants.setdefault("time-summary", {})
 time_summary["SAll"] = timestamps

 metadata.save()
 ```

--- a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Jungfrau Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the Jungfrau Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202130/p900204/raw"  # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/remove"  # the folder to output to, required
 run = 91  # run to process, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1]  # sequences to correct, set to [-1] for all, range allowed
 sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel

 # Parameters used to access raw data.
 karabo_id = "SPB_IRDA_JF4M"  # karabo prefix of Jungfrau devices
 karabo_da = ['JNGFR01', 'JNGFR02', 'JNGFR03', 'JNGFR04', 'JNGFR05', 'JNGFR06', 'JNGFR07', 'JNGFR08']  # data aggregators
 receiver_template = "JNGFR{:02d}"  # Detector receiver template for accessing raw data files. e.g. "JNGFR{:02d}"
 instrument_source_template = '{}/DET/{}:daqOutput'  # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = '{}/DET/CONTROL'  # template for control source name (filled with karabo_id_control)
 karabo_id_control = ""  # if control is on a different ID, set to empty string if it is the same a karabo-id

 # Parameters for calibration database.
 use_dir_creation_date = True  # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8017#8025" # the database interface to use
 cal_db_timeout = 180000  # timeout on caldb requests

 # Parameters affecting corrected data.
-relative_gain = True  # do relative gain correction
-limit_images = 0  # ONLY FOR TESTING. process only first N images, Use 0 to process all.
+relative_gain = True  # do relative gain correction.
+strixel_sensor = False  # reordering for strixel detector layout.
+strixel_double_norm = 2.0  # normalization to use for double-size pixels, only applied for strixel sensors.
+limit_trains = 0  # ONLY FOR TESTING. process only first N trains, Use 0 to process all.
+chunks_ids = 32  # HDF chunk size for memoryCell and frameNumber.
+chunks_data = 1  # HDF chunk size for pixel data in number of frames.

 # Parameters for retrieving calibration constants
 manual_slow_data = False  # if true, use manually entered bias_voltage, integration_time, gain_setting, and gain_mode values
 integration_time = 4.96  # integration time in us, will be overwritten by value in file
 gain_setting = 0  # 0 for dynamic gain, 1 for dynamic HG0, will be overwritten by value in file
 gain_mode = 0  # 0 for runs with dynamic gain setting, 1 for fixgain. It will be overwritten by value in file, if manual_slow_data is set to True.
 mem_cells = -1  # Set mem_cells to -1 to automatically use the value stored in RAW data.
 bias_voltage = 180  # will be overwritten by value in file

 # Parameters for plotting
 skip_plots = False  # exit after writing corrected files
 plot_trains = 500  # Number of trains to plot for RAW and CORRECTED plots. Set to -1 to automatically plot all trains.
 cell_id_preview = 15  # cell Id used for preview in single-shot plots

 # Parameters for ROI selection and reduction
 roi_definitions = [-1]  # List with groups of 6 values defining ROIs, e.g. [3, 120, 180, 200, 550, -2] for module 3 (JNGFR03), slice 120:180, 200:550, average along axis -2 (slow scan, or -1 for fast scan)

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import multiprocessing
 import sys
 import warnings
 from functools import partial
+from logging import warning
 from pathlib import Path

 import h5py
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
 import pasha as psh
 import tabulate
 from IPython.display import Latex, Markdown, display
 from extra_data import H5File, RunDirectory, by_id, components
 from extra_geom import JUNGFRAUGeometry
 from matplotlib.colors import LogNorm

 from cal_tools import h5_copy_except
 from cal_tools.jungfraulib import JungfrauCtrl
 from cal_tools.enums import BadPixels
+from cal_tools.files import DataFile
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    get_constant_from_db_and_time,
    get_dir_creation_date,
    get_pdu_from_db,
    map_seq_files,
    write_compressed_frames,
    CalibrationMetadata,
 )
 from iCalibrationDB import Conditions, Constants

 warnings.filterwarnings('ignore')

 matplotlib.use('agg')
 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_folder = in_folder / f'r{run:04d}'
 run_dc = RunDirectory(run_folder)
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)

 out_folder.mkdir(parents=True, exist_ok=True)

 print(f"Run is: {run}")
 print(f"Instrument H5File source: {instrument_src}")
 print(f"Process modules: {karabo_da}")

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time} as creation time")

 if karabo_id_control == "":
    karabo_id_control = karabo_id

 if any(axis_no not in {-2, -1, 2, 3} for axis_no in roi_definitions[5::6]):
    print("ROI averaging must be on axis 2/3 (or equivalently -2/-1). "
          f"Axis numbers given: {roi_definitions[5::6]}")
    sys.exit(1)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read available sequence files to correct.
 mapped_files, num_seq_files = map_seq_files(
    run_folder, karabo_da, sequences)

 if not len(mapped_files):
    raise IndexError(
        "No sequence files available to correct for the selected sequences and karabo_da.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Processing a total of {num_seq_files} sequence files")
 table = []
 fi = 0
 for kda, sfiles in mapped_files.items():
    for k, f in enumerate(sfiles):
        if k == 0:
            table.append((fi, kda, k, f))
        else:
            table.append((fi, "", k,  f))
        fi += 1
 md = display(Latex(tabulate.tabulate(
    table, tablefmt='latex',
    headers=["#", "module", "# module", "file"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 ctrl_src = ctrl_source_template.format(karabo_id_control)
 ctrl_data = JungfrauCtrl(run_dc, ctrl_src)

 if mem_cells < 0:
    memory_cells, sc_start = ctrl_data.get_memory_cells()

    mem_cells_name = "single cell" if memory_cells == 1 else "burst"
    print(f"Run is in {mem_cells_name} mode.\nStorage cell start: {sc_start:02d}")
 else:
    memory_cells = mem_cells
    mem_cells_name = "single cell" if memory_cells == 1 else "burst"
    print(f"Run is in manually set to {mem_cells_name} mode. With {memory_cells} memory cells")

 if not manual_slow_data:
    integration_time = ctrl_data.get_integration_time()
    bias_voltage = ctrl_data.get_bias_voltage()
    gain_setting = ctrl_data.get_gain_setting()
    gain_mode = ctrl_data.get_gain_mode()

 print(f"Integration time is {integration_time} us")
-print(f"Gain setting is {gain_setting} (run settings: "
-      f"{ctrl_data.run_settings.value if ctrl_data.run_settings else ctrl_data.run_settings})")  # noqa
-print(f"Gain mode is {gain_mode}")
+print(f"Gain setting is {gain_setting} (run settings: {ctrl_data.run_settings})")
+print(f"Gain mode is {gain_mode} ({ctrl_data.run_mode})")
 print(f"Bias voltage is {bias_voltage} V")
 print(f"Number of memory cells are {memory_cells}")
 ```

+%% Cell type:code id: tags:
+
+``` python
+strixel_transform = None
+output_frame_shape = None
+
+if strixel_sensor:
+    from cal_tools.jfalgs import strixel_transform, strixel_shape, strixel_double_pixels
+    output_frame_shape = strixel_shape()
+    Ydouble, Xdouble = strixel_double_pixels()
+    print('Strixel sensor transformation enabled')
+```
+
 %% Cell type:markdown id: tags:

 ### Retrieving calibration constants ###

 %% Cell type:code id: tags:

 ``` python
 condition = Conditions.Dark.jungfrau(
    memory_cells=memory_cells,
    bias_voltage=bias_voltage,
    integration_time=integration_time,
    gain_setting=gain_setting,
    gain_mode=gain_mode,
 )

 empty_constants = {
    "Offset": np.zeros((512, 1024, memory_cells, 3), dtype=np.float32),
    "BadPixelsDark": np.zeros((512, 1024, memory_cells, 3), dtype=np.uint32),
    "RelativeGain": None,
    "BadPixelsFF": None,
 }
 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # NOTE: this notebook will not overwrite calibration metadata file
 const_yaml = metadata.get("retrieved-constants", {})

 def get_constants_for_module(karabo_da: str):
    """ Get calibration constants for given module of Jungfrau

    :return:
        offset_map (offset map),
        mask (mask of bad pixels),
        gain_map (map of relative gain factors),
        db_module (name of DB module),
        when (dictionary: constant - creation time)
    """

    when = dict()
    const_data = dict()

    if const_yaml:
        for cname, mdata in const_yaml[karabo_da]["constants"].items():
            const_data[cname] = dict()
            when[cname] = mdata["creation-time"]
            if when[cname]:
                with h5py.File(mdata["file-path"], "r") as cf:
                    const_data[cname] = np.copy(
                        cf[f"{mdata['dataset-name']}/data"])
            else:
                const_data[cname] = empty_constants[cname]
    else:
        retrieval_function = partial(
            get_constant_from_db_and_time,
            karabo_id=karabo_id,
            karabo_da=karabo_da,
            cal_db_interface=cal_db_interface,
            creation_time=creation_time,
            timeout=cal_db_timeout,
            print_once=False,
        )

        for cname, cempty in empty_constants.items():
            const_data[cname], when[cname] = retrieval_function(
                condition=condition,
                constant=getattr(Constants.jungfrau, cname)(),
                empty_constant=cempty,
            )

    offset_map = const_data["Offset"]
    mask = const_data["BadPixelsDark"]
    gain_map = const_data["RelativeGain"]
    mask_ff = const_data["BadPixelsFF"]

    # Combine masks
    if mask_ff is not None:
        mask |= np.moveaxis(mask_ff, 0, 1)

    if memory_cells > 1:
        # move from x, y, cell, gain to cell, x, y, gain
        offset_map = np.moveaxis(offset_map, [0, 1], [1, 2])
        mask = np.moveaxis(mask, [0, 1], [1, 2])
    else:
        offset_map = np.squeeze(offset_map)
        mask = np.squeeze(mask)

    # masking double size pixels
    mask[..., [255, 256], :, :] |= BadPixels.NON_STANDARD_SIZE
    mask[..., [255, 256, 511, 512, 767, 768], :] |= BadPixels.NON_STANDARD_SIZE

    if gain_map is not None:
        if memory_cells > 1:
            gain_map = np.moveaxis(gain_map, [0, 2], [2, 0])
            # add extra empty cell constant
            b = np.ones(((1,)+gain_map.shape[1:]))
            gain_map = np.concatenate((gain_map, b), axis=0)
        else:
            gain_map = np.moveaxis(np.squeeze(gain_map), 1, 0)

    return offset_map, mask, gain_map, karabo_da, when

 with multiprocessing.Pool() as pool:
    r = pool.map(get_constants_for_module, karabo_da)

 # Print timestamps for the retrieved constants.
 constants = {}
 for offset_map, mask, gain_map, k_da, when in r:
    print(f'Constants for module {k_da}:')
    for const in when:
        print(f'  {const} injected at {when[const]}')

    if gain_map is None:
        print("No gain map found")
        relative_gain = False

    constants[k_da] = (offset_map, mask, gain_map)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Correct a chunk of images for offset and gain
 def correct_train(wid, index, d):
    d = d.astype(np.float32)  # [cells, x, y]
    g = gain[index]

+    # Copy gain over first to keep it at the original 3 for low gain.
+    if strixel_transform is not None:
+        strixel_transform(g, out=gain_corr[index, ...])
+    else:
+        gain_corr[index, ...] = g
+
    # Jungfrau gains 0[00], 1[01], 3[11]
+    # Change low gain to 2 for indexing purposes.
    g[g==3] = 2

    # Select memory cells
    if memory_cells > 1:
        """
        Even though it is correct to assume that memory cells pattern
        can be the same across all trains (for one correction run
        taken with one acquisition), it is preferred to not assume
        this to account for exceptions that can happen.
        """
        m = memcells[index].copy()
        # 255 is a cell value pointing to no cell image data (image of 0 pixels).
        # Corresponding image will be corrected with constant of cell 0. To avoid values of 0.
        # This line is depending on not storing the modified memory cells in the corrected data.
        m[m==255] = 0

        offset_map_cell = offset_map[m, ...]  # [16 + empty cell, x, y]
        mask_cell = mask[m, ...]
    else:
        offset_map_cell = offset_map
        mask_cell = mask

    # Offset correction
    offset = np.choose(g, np.moveaxis(offset_map_cell, -1, 0))

    d -= offset

    # Gain correction
    if relative_gain:
        if memory_cells > 1:
            gain_map_cell = gain_map[m, ...]
        else:
            gain_map_cell = gain_map
        cal = np.choose(g, np.moveaxis(gain_map_cell, -1, 0))
        d /= cal

    msk = np.choose(g, np.moveaxis(mask_cell, -1, 0))

-    data_corr[index, ...] = d
-    mask_corr[index, ...] = msk
+    if strixel_transform is not None:
+        strixel_transform(d, out=data_corr[index, ...])
+        data_corr[index, :, Ydouble, Xdouble] /= strixel_double_norm
+
+        strixel_transform(msk, out=mask_corr[index, ...])
+    else:
+        data_corr[index, ...] = d
+        mask_corr[index, ...] = msk
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()

 n_cpus = multiprocessing.cpu_count()
 context = psh.context.ProcessContext(num_workers=n_cpus)
 print(f"Using {n_cpus} workers for correction.")
 ```

 %% Cell type:code id: tags:

 ``` python
 def save_reduced_rois(ofile, data_corr, mask_corr, karabo_da):
    """If ROIs are defined for this karabo_da, reduce them and save to the output file"""
    rois_defined = 0
    module_no = int(karabo_da[-2:])
    params_source = f'{karabo_id}/ROIPROC/{karabo_da}'
-    rois_source = f'{params_source}:output/data'
+    rois_source = f'{params_source}:output'

    for i in range(len(roi_definitions) // 6):
        roi_module, a1, a2, b1, b2, mean_axis = roi_definitions[i*6 : (i+1)*6]
        if roi_module == module_no:
            rois_defined += 1
            # Apply the mask and average remaining pixels to 1D
            roi_data = data_corr[..., a1:a2, b1:b2].mean(
                axis=mean_axis, where=(mask_corr[..., a1:a2, b1:b2] == 0)
            )
-            ofile.create_dataset(
-                f'INSTRUMENT/{rois_source}/roi{rois_defined}/data',
-                data=roi_data
-            )
-            ofile.require_group(f'CONTROL/{params_source}')
-            params_grp = ofile.create_group(f'RUN/{params_source}/roi{rois_defined}')
-            params_grp['region'] = np.array([[a1, a2, b1, b2]])
-            params_grp['reduce_axis'] = np.array([mean_axis])
+
+            # Create Instrument section to later add corrected datasets.
+            outp_source = ofile.create_instrument_source(rois_source)
+            outp_source.create_key(f'data.roi{rois_defined}.data', data=roi_data)
+
+            ctrl_source = ofile.create_control_source(params_source)
+            ctrl_source.create_run_key('region', np.array([[a1, a2, b1, b2]]))
+            ctrl_source.create_run_key('reduce_axis', np.array([mean_axis]))

    if rois_defined:
        # Copy the index for the new source
+        # Create count/first datasets at INDEX source.
        ofile.copy(f'INDEX/{karabo_id}/DET/{karabo_da}:daqOutput/data',
                   f'INDEX/{rois_source}')
        ntrains = ofile['INDEX/trainId'].shape[0]
-        ofile.create_dataset(f'INDEX/{params_source}/count', shape=(ntrains,), dtype=np.uint64)
-        ofile.create_dataset(f'INDEX/{params_source}/first', shape=(ntrains,), dtype=np.uint64)
-
-        # Add the new source to the list in METADATA
-        if 'dataSourceId' in ofile['METADATA']:
-            # Older file format
-            data_sources_grp = ofile['METADATA']
-        else:
-            # Newer file format
-            data_sources_grp = ofile['METADATA/dataSources']
-
-        def extend(dset, values):
-            dset.resize(dset.shape[0] + len(values), axis=0)
-            dset[-len(values):] = values
-
-        extend(data_sources_grp['root'], ['CONTROL', 'INSTRUMENT'])
-        extend(data_sources_grp['deviceId'], [params_source, rois_source])
-        extend(data_sources_grp['dataSourceId'], [
-            f'CONTROL/{params_source}', f'INSTRUMENT/{rois_source}']
-        )
+        ctrl_source.create_index(ntrains)
 ```

 %% Cell type:markdown id: tags:

 ### Correcting RAW data ###

 %% Cell type:code id: tags:

 ``` python
 # Loop over modules
+empty_seq = 0
 for local_karabo_da, mapped_files_module in mapped_files.items():
    instrument_src_kda = instrument_src.format(int(local_karabo_da[-2:]))
-    data_path = "INSTRUMENT/"+instrument_src_kda+"/data"
-
-    for sequence_file in mapped_files_module:  # noqa
-        sequence_file = Path(sequence_file)
-        seq_dc = H5File(sequence_file)

+    for sequence_file in mapped_files_module:
        # Save corrected data in an output file with name
        # of corresponding raw sequence file.
        ofile_name = sequence_file.name.replace("RAW", "CORR")
        out_file = out_folder / ofile_name

-        # load shape of data for memory cells, and detector size (imgs, cells, x, y)
-        # dshape[0] = number of available images to correct.
-        dshape = seq_dc[instrument_src_kda, "data.adc"].shape
-
-        if dshape[0] == 0:
-            print(f"\t- WARNING: No image data for {ofile_name}: data shape is {dshape}")
+        # Load sequence file data collection, data.adc keydata,
+        # the shape for data to later created arrays of the same shape,
+        # and number of available trains to correct.
+        seq_dc = H5File(sequence_file)
+        seq_dc_adc = seq_dc[instrument_src_kda, "data.adc"]
+        ishape = seq_dc_adc.shape  # input shape.
+        corr_ntrains = ishape[0]  # number of available trains to correct.
+        all_train_ids = seq_dc_adc.train_ids
+
+        # Raise a WARNING if this sequence has no trains to correct.
+        # Otherwise, print number of trains with no data.
+        if corr_ntrains == 0:
+            warning(f"No trains to correct for {sequence_file.name}: "
+                 "Skipping the processing of this file.")
+            empty_seq += 1
            continue
+        elif len(all_train_ids) != corr_ntrains:
+            print(f"{sequence_file.name} has {len(seq_dc_adc.train_ids) - corr_ntrains} "
+                  "trains with missing data.")
+
+        # For testing, limit corrected trains. i.e. Getting output faster.
+        if limit_trains > 0:
+            print(f"\nCorrected trains are limited to: {limit_trains} trains")
+            corr_ntrains = min(corr_ntrains, limit_trains)

-        # load number of data available, including trains with empty data.
-        n_trains = len(seq_dc.train_ids)
+        print(f"\nNumber of corrected trains are: {corr_ntrains} for {ofile_name}")

-        n_imgs = dshape[0]
-
-        # For testing, only correct limit_images
-        if limit_images > 0:
-            n_imgs = min(n_imgs, limit_images)
-
-        print(f"\nNumber of images to correct: {n_imgs} for {ofile_name}")
-        if n_trains - dshape[0] != 0:
-            print(f"\t- WARNING: {sequence_file.name} has {n_trains - dshape[0]} "
-                  "trains with empty data.")
-
-        # load constants from the constants dictionary.
+        # Load constants from the constants dictionary.
+        # These arrays are used by `correct_train()` function
        offset_map, mask, gain_map = constants[local_karabo_da]

-        # Allocate shared arrays for corrected data.
-        data_corr = context.alloc(shape=dshape, dtype=np.float32)
-        mask_corr = context.alloc(shape=dshape, dtype=np.uint32)
+        # Determine total output shape.
+        if output_frame_shape is not None:
+            oshape = (*ishape[:-2], *output_frame_shape)
+        else:
+            oshape = ishape
+
+        # Allocate shared arrays for corrected data. Used in `correct_train()`
+        data_corr = context.alloc(shape=oshape, dtype=np.float32)
+        gain_corr = context.alloc(shape=oshape, dtype=np.uint8)
+        mask_corr = context.alloc(shape=oshape, dtype=np.uint32)

        step_timer.start()
+        # Overwrite seq_dc after eliminating empty trains or/and applying limited images.
        seq_dc = seq_dc.select(
-            instrument_src_kda, "*", require_all=True).select_trains(np.s_[:n_imgs])
+            instrument_src_kda, "*", require_all=True).select_trains(np.s_[:corr_ntrains])

+        # Load raw images(adc), gain, memcells, and frame numbers.
        data = seq_dc[instrument_src_kda, "data.adc"].ndarray()
        gain = seq_dc[instrument_src_kda, "data.gain"].ndarray()
        memcells = seq_dc[instrument_src_kda, "data.memoryCell"].ndarray()
+        frame_number = seq_dc[instrument_src_kda, "data.frameNumber"].ndarray()

+        # Validate that the selected cell id to preview is available in raw data.
        if memory_cells > 1:
            # For plotting, assuming that memory cells are sorted the same for all trains.
            found_cells = memcells[0] == cell_id_preview
            if any(found_cells):
                cell_idx_preview = np.where(found_cells)[0][0]
            else:
-                print(f"WARNING: The selected cell_id_preview {cell_id_preview} is not available in burst mode."
-                f"Previewing cell `{memcells[0]}`.")
+                print(f"The selected cell_id_preview {cell_id_preview} is not available in burst mode. "
+                      f"Previewing cell `{memcells[0]}`.")
                cell_idx_preview = 0
        else:
            cell_idx_preview = 0

+        # Correct data per train
        context.map(correct_train, data)
-        step_timer.done_step(f'Correction time.')
+        step_timer.done_step(f"Correction time.")

        step_timer.start()
-        # Create CORR files and add corrected data sources.
-        # Exclude raw data images (data/adc)
-        with h5py.File(out_file, 'w') as ofile:
-            # Copy RAW non-calibrated sources.
-            with h5py.File(sequence_file, 'r') as sfile:
-                h5_copy_except.h5_copy_except_paths(
-                    sfile, ofile, [f"{data_path}/adc"])
-
-            # Create datasets with the available corrected data
-            ddset = ofile.create_dataset(
-                f"{data_path}/adc",
-                data=data_corr,
-                chunks=((1,) + dshape[1:]),  # 1 chunk == 1 image
-                dtype=np.float32,
-            )
+
+        # Create CORR files and add corrected data sections.
+        sel_trains = np.isin(all_train_ids, seq_dc.train_ids)
+        image_counts = seq_dc[instrument_src_kda, "data.adc"].data_counts(labelled=False)
+
+        with DataFile(out_file, 'w') as outp_file:
+            # Create INDEX datasets.
+            outp_file.create_index(
+                train_ids=seq_dc.train_ids,
+                timestamps=seq_dc.files[0].file["INDEX/timestamp"][sel_trains],
+                flags=seq_dc.files[0].validity_flag[sel_trains])
+
+            # Create Instrument section to later add corrected datasets.
+            outp_source = outp_file.create_instrument_source(instrument_src_kda)
+
+            # Create count/first datasets at INDEX source.
+            outp_source.create_index(data=image_counts)
+
+            # RAW memoryCell and frameNumber are not corrected. But we are storing only
+            # the values for the corrected trains.
+            outp_source.create_key(
+                "data.memoryCell", data=memcells,
+                chunks=(min(chunks_ids, memcells.shape[0]), 1))
+            outp_source.create_key(
+                "data.frameNumber", data=frame_number,
+                chunks=(min(chunks_ids, frame_number.shape[0]), 1))
+            # Add main corrected `data.adc`` dataset and store corrected data.
+            outp_source.create_key(
+                "data.adc", data=data_corr,
+                chunks=(min(chunks_data, data_corr.shape[0]), *oshape[1:]))

            write_compressed_frames(
-                mask_corr,
-                ofile,
-                dataset_path=f"{data_path}/mask",
-                comp_threads=n_cpus,
-            )
-            save_reduced_rois(ofile, data_corr, mask_corr, local_karabo_da)
+                gain_corr, outp_file, f"{outp_source.name}/data/gain", comp_threads=8)
+            write_compressed_frames(
+                mask_corr, outp_file, f"{outp_source.name}/data/mask", comp_threads=8)
+
+            save_reduced_rois(outp_file, data_corr, mask_corr, local_karabo_da)
+
+            # Create METDATA datasets
+            outp_file.create_metadata(like=seq_dc)

        step_timer.done_step(f'Saving data time.')
+if empty_seq == sum([len(i) for i in mapped_files.values()]):
+    raise ValueError("No valid trains for RAW data to correct.")
 ```

 %% Cell type:markdown id: tags:

 ### Processing time summary ###

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print('Skipping plots')
    import sys
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Positions are given in pixels
 mod_width = (256 * 4) + (2 * 3)  # inc. 2px gaps between tiles
 mod_height = (256 * 2) + 2
 if karabo_id == "SPB_IRDA_JF4M":
    # The first 4 modules are rotated 180 degrees relative to the others.
    # We pass the bottom, beam-right corner of the module regardless of its
    # orientation, requiring a subtraction from the symmetric positions we'd
    # otherwise calculate.
    x_start, y_start = 1125, 1078
    module_pos = [
        (x_start - mod_width, y_start - mod_height - (i * (mod_height + 33)))
        for i in range(4)
    ] + [
        (-x_start, -y_start + (i * (mod_height + 33))) for i in range(4)
    ]
    orientations = [(-1, -1) for _ in range(4)] + [(1, 1) for _ in range(4)]
 elif karabo_id == "FXE_XAD_JF1M":
    module_pos = ((-mod_width//2, 33),(-mod_width//2, -mod_height -33))
    orientations = [(-1,-1), (1,1)]
 else:
    module_pos = ((-mod_width//2,-mod_height//2),)
    orientations = None

 geom = JUNGFRAUGeometry.from_module_positions(module_pos, orientations=orientations, asic_gap=0)
 ```

 %% Cell type:code id: tags:

 ``` python
 first_seq = 0 if sequences == [-1] else sequences[0]

 with RunDirectory(out_folder, f"*{run}*S{first_seq:05d}*") as corr_dc:
    # Reading CORR data for plotting.
    jf_corr = components.JUNGFRAU(
        corr_dc,
        detector_name=karabo_id,
    ).select_trains(np.s_[:plot_trains])
    tid, jf_corr_data = next(iter(jf_corr.trains(require_all=True)))

 # Shape = [modules, trains, cells, x, y]
+# TODO: Fix the case if not all modules were requested to be corrected.
+# For example if only one modules was corrected. An assertion error is expected
+# at `geom.plot_data_fast`, while plotting corrected images.
 corrected = jf_corr.get_array("data.adc")[:, :, cell_idx_preview, ...].values
 corrected_train = jf_corr_data["data.adc"][
    :, cell_idx_preview, ...
 ].values  # loose the train axis.

 mask = jf_corr.get_array("data.mask")[:, :, cell_idx_preview, ...].values
 mask_train = jf_corr_data["data.mask"][:, cell_idx_preview, ...].values

 with RunDirectory(f"{in_folder}/r{run:04d}/", f"*S{first_seq:05d}*") as raw_dc:

    # Reading RAW data for plotting.
    jf_raw = components.JUNGFRAU(raw_dc, detector_name=karabo_id).select_trains(
        np.s_[:plot_trains]
    )

 raw = jf_raw.get_array("data.adc")[:, :, cell_idx_preview, ...].values
 raw_train = (
    jf_raw.select_trains(by_id[[tid]])
    .get_array("data.adc")[:, 0, cell_idx_preview, ...]
    .values
 )

 gain = jf_raw.get_array("data.gain")[:, :, cell_idx_preview, ...].values
 gain_train_cells = (
    jf_raw.select_trains(by_id[[tid]]).get_array("data.gain")[:, :, :, ...].values
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 db_modules = get_pdu_from_db(
    karabo_id=karabo_id,
    karabo_da=karabo_da,
    constant=Constants.jungfrau.Offset(),
    condition=condition,
    cal_db_interface=cal_db_interface,
    snapshot_at=creation_time,
 )
 ```

 %% Cell type:markdown id: tags:

 ### Mean RAW Preview

 %% Cell type:code id: tags:

 ``` python
 print(f"The per pixel mean of the first {raw.shape[1]} trains of the first sequence file")

 fig, ax = plt.subplots(figsize=(18, 10))
 raw_mean = np.mean(raw, axis=1)
 geom.plot_data_fast(
    raw_mean,
    ax=ax,
    vmin=min(0.75*np.median(raw_mean[raw_mean > 0]), 2000),
    vmax=max(1.5*np.median(raw_mean[raw_mean > 0]), 16000),
    cmap="jet",
    colorbar={'shrink': 1, 'pad': 0.01},
 )
 ax.set_title(f'{karabo_id} - Mean RAW', size=18)
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ### Mean CORRECTED Preview

 %% Cell type:code id: tags:

 ``` python
 print(f"The per pixel mean of the first {corrected.shape[1]} trains of the first sequence file")

 fig, ax = plt.subplots(figsize=(18, 10))
 corrected_mean = np.mean(corrected, axis=1)
 _corrected_vmin = min(0.75*np.median(corrected_mean[corrected_mean > 0]), -0.5)
 _corrected_vmax = max(2.*np.median(corrected_mean[corrected_mean > 0]), 100)
-geom.plot_data_fast(
-    corrected_mean,
-    ax=ax,
-    vmin=_corrected_vmin,
-    vmax=_corrected_vmax,
-    cmap="jet",
-    colorbar={'shrink': 1, 'pad': 0.01},
+
+mean_plot_kwargs = dict(
+    vmin=_corrected_vmin, vmax=_corrected_vmax, cmap="jet"
 )
+
+if not strixel_sensor:
+    geom.plot_data_fast(
+        corrected_mean,
+        ax=ax,
+        colorbar={'shrink': 1, 'pad': 0.01},
+        **mean_plot_kwargs
+    )
+else:
+    ax.imshow(corrected_mean.squeeze(), aspect=10, **mean_plot_kwargs)
+
 ax.set_title(f'{karabo_id} - Mean CORRECTED', size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(figsize=(18, 10))
 corrected_masked = corrected.copy()
 corrected_masked[mask != 0] = np.nan
 corrected_masked_mean = np.nanmean(corrected_masked, axis=1)
 del corrected_masked
-geom.plot_data_fast(
-    corrected_masked_mean,
-    ax=ax,
-    vmin=_corrected_vmin,
-    vmax=_corrected_vmax,
-    cmap="jet",
-    colorbar={'shrink': 1, 'pad': 0.01},
-)
+
+if not strixel_sensor:
+    geom.plot_data_fast(
+        corrected_masked_mean,
+        ax=ax,
+        colorbar={'shrink': 1, 'pad': 0.01},
+        **mean_plot_kwargs
+    )
+else:
+    ax.imshow(corrected_mean.squeeze(), aspect=10, **mean_plot_kwargs)
+
 ax.set_title(f'{karabo_id} - Mean CORRECTED with mask', size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown((f"#### A single image from train {tid}")))

 fig, ax = plt.subplots(figsize=(18, 10))
-geom.plot_data_fast(
-    corrected_train,
-    ax=ax,
+
+single_plot_kwargs = dict(
    vmin=min(0.75 * np.median(corrected_train[corrected_train > 0]), -0.5),
    vmax=max(2.0 * np.median(corrected_train[corrected_train > 0]), 100),
-    cmap="jet",
-    colorbar={"shrink": 1, "pad": 0.01},
+    cmap="jet"
 )
+
+if not strixel_sensor:
+    geom.plot_data_fast(
+        corrected_train,
+        ax=ax,
+        colorbar={"shrink": 1, "pad": 0.01},
+        **single_plot_kwargs
+    )
+else:
+    ax.imshow(corrected_train.squeeze(), aspect=10, **single_plot_kwargs)
+
 ax.set_title(f"{karabo_id} - CORRECTED train: {tid}", size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis, title):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [
        np.min(edges[1]),
        np.max(edges[1]),
        np.min(edges[0]),
        np.max(edges[0]),
    ]

    im = ax.imshow(
        data[::-1, :],
        extent=extent,
        aspect="auto",
        norm=LogNorm(vmin=1, vmax=np.max(data))
    )
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_title(title)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:markdown id: tags:

 ### Gain Bit Value

 %% Cell type:code id: tags:

 ``` python
 for i, (pdu, mod) in enumerate(zip(db_modules, karabo_da)):
    h, ex, ey = np.histogram2d(
        raw[i].flatten(),
        gain[i].flatten(),
        bins=[100, 4],
        range=[[0, 10000], [0, 4]],
    )
    do_2d_plot(
        h,
        (ex, ey),
        "Signal (ADU)",
        "Gain Bit Value (high gain=0[00], medium gain=1[01], low gain=3[11])",
        f"Module {mod} ({pdu})",
    )
 ```

 %% Cell type:markdown id: tags:

 ## Signal Distribution ##

 %% Cell type:code id: tags:

 ``` python
 for i, (pdu, mod) in enumerate(zip(db_modules, karabo_da)):
    fig, axs = plt.subplots(nrows=2, ncols=1, figsize=(18, 10))
    corrected_flatten = corrected[i].flatten()
    for ax, hist_range in zip(axs, [(-100, 1000), (-1000, 10000)]):
        h = ax.hist(
            corrected_flatten,
            bins=1000,
            range=hist_range,
            log=True,
        )
        l = ax.set_xlabel("Signal (keV)")
        l = ax.set_ylabel("Counts")
        _ = ax.set_title(f'Module {mod} ({pdu})')
 ```

 %% Cell type:markdown id: tags:

 ### Maximum GAIN Preview

 %% Cell type:code id: tags:

 ``` python
 display(Markdown((f"#### The per pixel maximum of train {tid} of the GAIN data")))

 fig, ax = plt.subplots(figsize=(18, 10))
 gain_max = np.max(gain_train_cells, axis=(1, 2))
 geom.plot_data_fast(
    gain_max,
    ax=ax,
    cmap="jet",
    colorbar={'shrink': 1, 'pad': 0.01},
 )
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append(
        (item.name, f"{item.value:016b}"))
 md = display(Latex(tabulate.tabulate(
    table, tablefmt='latex',
    headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:markdown id: tags:

 ### Single Image Bad Pixels ###

 A single image bad pixel map for the first image of the first train

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f"#### Bad pixels image for train {tid}"))

 fig, ax = plt.subplots(figsize=(18, 10))
-geom.plot_data_fast(
-    np.log2(mask_train),
-    ax=ax,
-    vmin=0, vmax=32, cmap="jet",
-    colorbar={'shrink': 1, 'pad': 0.01},
-)
+if not strixel_sensor:
+    geom.plot_data_fast(
+        np.log2(mask_train),
+        ax=ax,
+        vmin=0, vmax=32, cmap="jet",
+        colorbar={'shrink': 1, 'pad': 0.01},
+    )
+else:
+    ax.imshow(np.log2(mask_train).squeeze(), vmin=0, vmax=32, cmap='jet', aspect=10)
+
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 # Jungfrau Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the Jungfrau Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202130/p900204/raw"  # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/remove"  # the folder to output to, required
 run = 91  # run to process, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1]  # sequences to correct, set to [-1] for all, range allowed
 sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel

 # Parameters used to access raw data.
 karabo_id = "SPB_IRDA_JF4M"  # karabo prefix of Jungfrau devices
 karabo_da = ['JNGFR01', 'JNGFR02', 'JNGFR03', 'JNGFR04', 'JNGFR05', 'JNGFR06', 'JNGFR07', 'JNGFR08']  # data aggregators
 receiver_template = "JNGFR{:02d}"  # Detector receiver template for accessing raw data files. e.g. "JNGFR{:02d}"
 instrument_source_template = '{}/DET/{}:daqOutput'  # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = '{}/DET/CONTROL'  # template for control source name (filled with karabo_id_control)
 karabo_id_control = ""  # if control is on a different ID, set to empty string if it is the same a karabo-id

 # Parameters for calibration database.
 use_dir_creation_date = True  # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8017#8025" # the database interface to use
 cal_db_timeout = 180000  # timeout on caldb requests

 # Parameters affecting corrected data.
-relative_gain = True  # do relative gain correction
-limit_images = 0  # ONLY FOR TESTING. process only first N images, Use 0 to process all.
+relative_gain = True  # do relative gain correction.
+strixel_sensor = False  # reordering for strixel detector layout.
+strixel_double_norm = 2.0  # normalization to use for double-size pixels, only applied for strixel sensors.
+limit_trains = 0  # ONLY FOR TESTING. process only first N trains, Use 0 to process all.
+chunks_ids = 32  # HDF chunk size for memoryCell and frameNumber.
+chunks_data = 1  # HDF chunk size for pixel data in number of frames.

 # Parameters for retrieving calibration constants
 manual_slow_data = False  # if true, use manually entered bias_voltage, integration_time, gain_setting, and gain_mode values
 integration_time = 4.96  # integration time in us, will be overwritten by value in file
 gain_setting = 0  # 0 for dynamic gain, 1 for dynamic HG0, will be overwritten by value in file
 gain_mode = 0  # 0 for runs with dynamic gain setting, 1 for fixgain. It will be overwritten by value in file, if manual_slow_data is set to True.
 mem_cells = -1  # Set mem_cells to -1 to automatically use the value stored in RAW data.
 bias_voltage = 180  # will be overwritten by value in file

 # Parameters for plotting
 skip_plots = False  # exit after writing corrected files
 plot_trains = 500  # Number of trains to plot for RAW and CORRECTED plots. Set to -1 to automatically plot all trains.
 cell_id_preview = 15  # cell Id used for preview in single-shot plots

 # Parameters for ROI selection and reduction
 roi_definitions = [-1]  # List with groups of 6 values defining ROIs, e.g. [3, 120, 180, 200, 550, -2] for module 3 (JNGFR03), slice 120:180, 200:550, average along axis -2 (slow scan, or -1 for fast scan)

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import multiprocessing
 import sys
 import warnings
 from functools import partial
+from logging import warning
 from pathlib import Path

 import h5py
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
 import pasha as psh
 import tabulate
 from IPython.display import Latex, Markdown, display
 from extra_data import H5File, RunDirectory, by_id, components
 from extra_geom import JUNGFRAUGeometry
 from matplotlib.colors import LogNorm

 from cal_tools import h5_copy_except
 from cal_tools.jungfraulib import JungfrauCtrl
 from cal_tools.enums import BadPixels
+from cal_tools.files import DataFile
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    get_constant_from_db_and_time,
    get_dir_creation_date,
    get_pdu_from_db,
    map_seq_files,
    write_compressed_frames,
    CalibrationMetadata,
 )
 from iCalibrationDB import Conditions, Constants

 warnings.filterwarnings('ignore')

 matplotlib.use('agg')
 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_folder = in_folder / f'r{run:04d}'
 run_dc = RunDirectory(run_folder)
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)

 out_folder.mkdir(parents=True, exist_ok=True)

 print(f"Run is: {run}")
 print(f"Instrument H5File source: {instrument_src}")
 print(f"Process modules: {karabo_da}")

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time} as creation time")

 if karabo_id_control == "":
    karabo_id_control = karabo_id

 if any(axis_no not in {-2, -1, 2, 3} for axis_no in roi_definitions[5::6]):
    print("ROI averaging must be on axis 2/3 (or equivalently -2/-1). "
          f"Axis numbers given: {roi_definitions[5::6]}")
    sys.exit(1)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read available sequence files to correct.
 mapped_files, num_seq_files = map_seq_files(
    run_folder, karabo_da, sequences)

 if not len(mapped_files):
    raise IndexError(
        "No sequence files available to correct for the selected sequences and karabo_da.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Processing a total of {num_seq_files} sequence files")
 table = []
 fi = 0
 for kda, sfiles in mapped_files.items():
    for k, f in enumerate(sfiles):
        if k == 0:
            table.append((fi, kda, k, f))
        else:
            table.append((fi, "", k,  f))
        fi += 1
 md = display(Latex(tabulate.tabulate(
    table, tablefmt='latex',
    headers=["#", "module", "# module", "file"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 ctrl_src = ctrl_source_template.format(karabo_id_control)
 ctrl_data = JungfrauCtrl(run_dc, ctrl_src)

 if mem_cells < 0:
    memory_cells, sc_start = ctrl_data.get_memory_cells()

    mem_cells_name = "single cell" if memory_cells == 1 else "burst"
    print(f"Run is in {mem_cells_name} mode.\nStorage cell start: {sc_start:02d}")
 else:
    memory_cells = mem_cells
    mem_cells_name = "single cell" if memory_cells == 1 else "burst"
    print(f"Run is in manually set to {mem_cells_name} mode. With {memory_cells} memory cells")

 if not manual_slow_data:
    integration_time = ctrl_data.get_integration_time()
    bias_voltage = ctrl_data.get_bias_voltage()
    gain_setting = ctrl_data.get_gain_setting()
    gain_mode = ctrl_data.get_gain_mode()

 print(f"Integration time is {integration_time} us")
-print(f"Gain setting is {gain_setting} (run settings: "
-      f"{ctrl_data.run_settings.value if ctrl_data.run_settings else ctrl_data.run_settings})")  # noqa
-print(f"Gain mode is {gain_mode}")
+print(f"Gain setting is {gain_setting} (run settings: {ctrl_data.run_settings})")
+print(f"Gain mode is {gain_mode} ({ctrl_data.run_mode})")
 print(f"Bias voltage is {bias_voltage} V")
 print(f"Number of memory cells are {memory_cells}")
 ```

+%% Cell type:code id: tags:
+
+``` python
+strixel_transform = None
+output_frame_shape = None
+
+if strixel_sensor:
+    from cal_tools.jfalgs import strixel_transform, strixel_shape, strixel_double_pixels
+    output_frame_shape = strixel_shape()
+    Ydouble, Xdouble = strixel_double_pixels()
+    print('Strixel sensor transformation enabled')
+```
+
 %% Cell type:markdown id: tags:

 ### Retrieving calibration constants ###

 %% Cell type:code id: tags:

 ``` python
 condition = Conditions.Dark.jungfrau(
    memory_cells=memory_cells,
    bias_voltage=bias_voltage,
    integration_time=integration_time,
    gain_setting=gain_setting,
    gain_mode=gain_mode,
 )

 empty_constants = {
    "Offset": np.zeros((512, 1024, memory_cells, 3), dtype=np.float32),
    "BadPixelsDark": np.zeros((512, 1024, memory_cells, 3), dtype=np.uint32),
    "RelativeGain": None,
    "BadPixelsFF": None,
 }
 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # NOTE: this notebook will not overwrite calibration metadata file
 const_yaml = metadata.get("retrieved-constants", {})

 def get_constants_for_module(karabo_da: str):
    """ Get calibration constants for given module of Jungfrau

    :return:
        offset_map (offset map),
        mask (mask of bad pixels),
        gain_map (map of relative gain factors),
        db_module (name of DB module),
        when (dictionary: constant - creation time)
    """

    when = dict()
    const_data = dict()

    if const_yaml:
        for cname, mdata in const_yaml[karabo_da]["constants"].items():
            const_data[cname] = dict()
            when[cname] = mdata["creation-time"]
            if when[cname]:
                with h5py.File(mdata["file-path"], "r") as cf:
                    const_data[cname] = np.copy(
                        cf[f"{mdata['dataset-name']}/data"])
            else:
                const_data[cname] = empty_constants[cname]
    else:
        retrieval_function = partial(
            get_constant_from_db_and_time,
            karabo_id=karabo_id,
            karabo_da=karabo_da,
            cal_db_interface=cal_db_interface,
            creation_time=creation_time,
            timeout=cal_db_timeout,
            print_once=False,
        )

        for cname, cempty in empty_constants.items():
            const_data[cname], when[cname] = retrieval_function(
                condition=condition,
                constant=getattr(Constants.jungfrau, cname)(),
                empty_constant=cempty,
            )

    offset_map = const_data["Offset"]
    mask = const_data["BadPixelsDark"]
    gain_map = const_data["RelativeGain"]
    mask_ff = const_data["BadPixelsFF"]

    # Combine masks
    if mask_ff is not None:
        mask |= np.moveaxis(mask_ff, 0, 1)

    if memory_cells > 1:
        # move from x, y, cell, gain to cell, x, y, gain
        offset_map = np.moveaxis(offset_map, [0, 1], [1, 2])
        mask = np.moveaxis(mask, [0, 1], [1, 2])
    else:
        offset_map = np.squeeze(offset_map)
        mask = np.squeeze(mask)

    # masking double size pixels
    mask[..., [255, 256], :, :] |= BadPixels.NON_STANDARD_SIZE
    mask[..., [255, 256, 511, 512, 767, 768], :] |= BadPixels.NON_STANDARD_SIZE

    if gain_map is not None:
        if memory_cells > 1:
            gain_map = np.moveaxis(gain_map, [0, 2], [2, 0])
            # add extra empty cell constant
            b = np.ones(((1,)+gain_map.shape[1:]))
            gain_map = np.concatenate((gain_map, b), axis=0)
        else:
            gain_map = np.moveaxis(np.squeeze(gain_map), 1, 0)

    return offset_map, mask, gain_map, karabo_da, when

 with multiprocessing.Pool() as pool:
    r = pool.map(get_constants_for_module, karabo_da)

 # Print timestamps for the retrieved constants.
 constants = {}
 for offset_map, mask, gain_map, k_da, when in r:
    print(f'Constants for module {k_da}:')
    for const in when:
        print(f'  {const} injected at {when[const]}')

    if gain_map is None:
        print("No gain map found")
        relative_gain = False

    constants[k_da] = (offset_map, mask, gain_map)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Correct a chunk of images for offset and gain
 def correct_train(wid, index, d):
    d = d.astype(np.float32)  # [cells, x, y]
    g = gain[index]

+    # Copy gain over first to keep it at the original 3 for low gain.
+    if strixel_transform is not None:
+        strixel_transform(g, out=gain_corr[index, ...])
+    else:
+        gain_corr[index, ...] = g
+
    # Jungfrau gains 0[00], 1[01], 3[11]
+    # Change low gain to 2 for indexing purposes.
    g[g==3] = 2

    # Select memory cells
    if memory_cells > 1:
        """
        Even though it is correct to assume that memory cells pattern
        can be the same across all trains (for one correction run
        taken with one acquisition), it is preferred to not assume
        this to account for exceptions that can happen.
        """
        m = memcells[index].copy()
        # 255 is a cell value pointing to no cell image data (image of 0 pixels).
        # Corresponding image will be corrected with constant of cell 0. To avoid values of 0.
        # This line is depending on not storing the modified memory cells in the corrected data.
        m[m==255] = 0

        offset_map_cell = offset_map[m, ...]  # [16 + empty cell, x, y]
        mask_cell = mask[m, ...]
    else:
        offset_map_cell = offset_map
        mask_cell = mask

    # Offset correction
    offset = np.choose(g, np.moveaxis(offset_map_cell, -1, 0))

    d -= offset

    # Gain correction
    if relative_gain:
        if memory_cells > 1:
            gain_map_cell = gain_map[m, ...]
        else:
            gain_map_cell = gain_map
        cal = np.choose(g, np.moveaxis(gain_map_cell, -1, 0))
        d /= cal

    msk = np.choose(g, np.moveaxis(mask_cell, -1, 0))

-    data_corr[index, ...] = d
-    mask_corr[index, ...] = msk
+    if strixel_transform is not None:
+        strixel_transform(d, out=data_corr[index, ...])
+        data_corr[index, :, Ydouble, Xdouble] /= strixel_double_norm
+
+        strixel_transform(msk, out=mask_corr[index, ...])
+    else:
+        data_corr[index, ...] = d
+        mask_corr[index, ...] = msk
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()

 n_cpus = multiprocessing.cpu_count()
 context = psh.context.ProcessContext(num_workers=n_cpus)
 print(f"Using {n_cpus} workers for correction.")
 ```

 %% Cell type:code id: tags:

 ``` python
 def save_reduced_rois(ofile, data_corr, mask_corr, karabo_da):
    """If ROIs are defined for this karabo_da, reduce them and save to the output file"""
    rois_defined = 0
    module_no = int(karabo_da[-2:])
    params_source = f'{karabo_id}/ROIPROC/{karabo_da}'
-    rois_source = f'{params_source}:output/data'
+    rois_source = f'{params_source}:output'

    for i in range(len(roi_definitions) // 6):
        roi_module, a1, a2, b1, b2, mean_axis = roi_definitions[i*6 : (i+1)*6]
        if roi_module == module_no:
            rois_defined += 1
            # Apply the mask and average remaining pixels to 1D
            roi_data = data_corr[..., a1:a2, b1:b2].mean(
                axis=mean_axis, where=(mask_corr[..., a1:a2, b1:b2] == 0)
            )
-            ofile.create_dataset(
-                f'INSTRUMENT/{rois_source}/roi{rois_defined}/data',
-                data=roi_data
-            )
-            ofile.require_group(f'CONTROL/{params_source}')
-            params_grp = ofile.create_group(f'RUN/{params_source}/roi{rois_defined}')
-            params_grp['region'] = np.array([[a1, a2, b1, b2]])
-            params_grp['reduce_axis'] = np.array([mean_axis])
+
+            # Create Instrument section to later add corrected datasets.
+            outp_source = ofile.create_instrument_source(rois_source)
+            outp_source.create_key(f'data.roi{rois_defined}.data', data=roi_data)
+
+            ctrl_source = ofile.create_control_source(params_source)
+            ctrl_source.create_run_key('region', np.array([[a1, a2, b1, b2]]))
+            ctrl_source.create_run_key('reduce_axis', np.array([mean_axis]))

    if rois_defined:
        # Copy the index for the new source
+        # Create count/first datasets at INDEX source.
        ofile.copy(f'INDEX/{karabo_id}/DET/{karabo_da}:daqOutput/data',
                   f'INDEX/{rois_source}')
        ntrains = ofile['INDEX/trainId'].shape[0]
-        ofile.create_dataset(f'INDEX/{params_source}/count', shape=(ntrains,), dtype=np.uint64)
-        ofile.create_dataset(f'INDEX/{params_source}/first', shape=(ntrains,), dtype=np.uint64)
-
-        # Add the new source to the list in METADATA
-        if 'dataSourceId' in ofile['METADATA']:
-            # Older file format
-            data_sources_grp = ofile['METADATA']
-        else:
-            # Newer file format
-            data_sources_grp = ofile['METADATA/dataSources']
-
-        def extend(dset, values):
-            dset.resize(dset.shape[0] + len(values), axis=0)
-            dset[-len(values):] = values
-
-        extend(data_sources_grp['root'], ['CONTROL', 'INSTRUMENT'])
-        extend(data_sources_grp['deviceId'], [params_source, rois_source])
-        extend(data_sources_grp['dataSourceId'], [
-            f'CONTROL/{params_source}', f'INSTRUMENT/{rois_source}']
-        )
+        ctrl_source.create_index(ntrains)
 ```

 %% Cell type:markdown id: tags:

 ### Correcting RAW data ###

 %% Cell type:code id: tags:

 ``` python
 # Loop over modules
+empty_seq = 0
 for local_karabo_da, mapped_files_module in mapped_files.items():
    instrument_src_kda = instrument_src.format(int(local_karabo_da[-2:]))
-    data_path = "INSTRUMENT/"+instrument_src_kda+"/data"
-
-    for sequence_file in mapped_files_module:  # noqa
-        sequence_file = Path(sequence_file)
-        seq_dc = H5File(sequence_file)

+    for sequence_file in mapped_files_module:
        # Save corrected data in an output file with name
        # of corresponding raw sequence file.
        ofile_name = sequence_file.name.replace("RAW", "CORR")
        out_file = out_folder / ofile_name

-        # load shape of data for memory cells, and detector size (imgs, cells, x, y)
-        # dshape[0] = number of available images to correct.
-        dshape = seq_dc[instrument_src_kda, "data.adc"].shape
-
-        if dshape[0] == 0:
-            print(f"\t- WARNING: No image data for {ofile_name}: data shape is {dshape}")
+        # Load sequence file data collection, data.adc keydata,
+        # the shape for data to later created arrays of the same shape,
+        # and number of available trains to correct.
+        seq_dc = H5File(sequence_file)
+        seq_dc_adc = seq_dc[instrument_src_kda, "data.adc"]
+        ishape = seq_dc_adc.shape  # input shape.
+        corr_ntrains = ishape[0]  # number of available trains to correct.
+        all_train_ids = seq_dc_adc.train_ids
+
+        # Raise a WARNING if this sequence has no trains to correct.
+        # Otherwise, print number of trains with no data.
+        if corr_ntrains == 0:
+            warning(f"No trains to correct for {sequence_file.name}: "
+                 "Skipping the processing of this file.")
+            empty_seq += 1
            continue
+        elif len(all_train_ids) != corr_ntrains:
+            print(f"{sequence_file.name} has {len(seq_dc_adc.train_ids) - corr_ntrains} "
+                  "trains with missing data.")
+
+        # For testing, limit corrected trains. i.e. Getting output faster.
+        if limit_trains > 0:
+            print(f"\nCorrected trains are limited to: {limit_trains} trains")
+            corr_ntrains = min(corr_ntrains, limit_trains)

-        # load number of data available, including trains with empty data.
-        n_trains = len(seq_dc.train_ids)
+        print(f"\nNumber of corrected trains are: {corr_ntrains} for {ofile_name}")

-        n_imgs = dshape[0]
-
-        # For testing, only correct limit_images
-        if limit_images > 0:
-            n_imgs = min(n_imgs, limit_images)
-
-        print(f"\nNumber of images to correct: {n_imgs} for {ofile_name}")
-        if n_trains - dshape[0] != 0:
-            print(f"\t- WARNING: {sequence_file.name} has {n_trains - dshape[0]} "
-                  "trains with empty data.")
-
-        # load constants from the constants dictionary.
+        # Load constants from the constants dictionary.
+        # These arrays are used by `correct_train()` function
        offset_map, mask, gain_map = constants[local_karabo_da]

-        # Allocate shared arrays for corrected data.
-        data_corr = context.alloc(shape=dshape, dtype=np.float32)
-        mask_corr = context.alloc(shape=dshape, dtype=np.uint32)
+        # Determine total output shape.
+        if output_frame_shape is not None:
+            oshape = (*ishape[:-2], *output_frame_shape)
+        else:
+            oshape = ishape
+
+        # Allocate shared arrays for corrected data. Used in `correct_train()`
+        data_corr = context.alloc(shape=oshape, dtype=np.float32)
+        gain_corr = context.alloc(shape=oshape, dtype=np.uint8)
+        mask_corr = context.alloc(shape=oshape, dtype=np.uint32)

        step_timer.start()
+        # Overwrite seq_dc after eliminating empty trains or/and applying limited images.
        seq_dc = seq_dc.select(
-            instrument_src_kda, "*", require_all=True).select_trains(np.s_[:n_imgs])
+            instrument_src_kda, "*", require_all=True).select_trains(np.s_[:corr_ntrains])

+        # Load raw images(adc), gain, memcells, and frame numbers.
        data = seq_dc[instrument_src_kda, "data.adc"].ndarray()
        gain = seq_dc[instrument_src_kda, "data.gain"].ndarray()
        memcells = seq_dc[instrument_src_kda, "data.memoryCell"].ndarray()
+        frame_number = seq_dc[instrument_src_kda, "data.frameNumber"].ndarray()

+        # Validate that the selected cell id to preview is available in raw data.
        if memory_cells > 1:
            # For plotting, assuming that memory cells are sorted the same for all trains.
            found_cells = memcells[0] == cell_id_preview
            if any(found_cells):
                cell_idx_preview = np.where(found_cells)[0][0]
            else:
-                print(f"WARNING: The selected cell_id_preview {cell_id_preview} is not available in burst mode."
-                f"Previewing cell `{memcells[0]}`.")
+                print(f"The selected cell_id_preview {cell_id_preview} is not available in burst mode. "
+                      f"Previewing cell `{memcells[0]}`.")
                cell_idx_preview = 0
        else:
            cell_idx_preview = 0

+        # Correct data per train
        context.map(correct_train, data)
-        step_timer.done_step(f'Correction time.')
+        step_timer.done_step(f"Correction time.")

        step_timer.start()
-        # Create CORR files and add corrected data sources.
-        # Exclude raw data images (data/adc)
-        with h5py.File(out_file, 'w') as ofile:
-            # Copy RAW non-calibrated sources.
-            with h5py.File(sequence_file, 'r') as sfile:
-                h5_copy_except.h5_copy_except_paths(
-                    sfile, ofile, [f"{data_path}/adc"])
-
-            # Create datasets with the available corrected data
-            ddset = ofile.create_dataset(
-                f"{data_path}/adc",
-                data=data_corr,
-                chunks=((1,) + dshape[1:]),  # 1 chunk == 1 image
-                dtype=np.float32,
-            )
+
+        # Create CORR files and add corrected data sections.
+        sel_trains = np.isin(all_train_ids, seq_dc.train_ids)
+        image_counts = seq_dc[instrument_src_kda, "data.adc"].data_counts(labelled=False)
+
+        with DataFile(out_file, 'w') as outp_file:
+            # Create INDEX datasets.
+            outp_file.create_index(
+                train_ids=seq_dc.train_ids,
+                timestamps=seq_dc.files[0].file["INDEX/timestamp"][sel_trains],
+                flags=seq_dc.files[0].validity_flag[sel_trains])
+
+            # Create Instrument section to later add corrected datasets.
+            outp_source = outp_file.create_instrument_source(instrument_src_kda)
+
+            # Create count/first datasets at INDEX source.
+            outp_source.create_index(data=image_counts)
+
+            # RAW memoryCell and frameNumber are not corrected. But we are storing only
+            # the values for the corrected trains.
+            outp_source.create_key(
+                "data.memoryCell", data=memcells,
+                chunks=(min(chunks_ids, memcells.shape[0]), 1))
+            outp_source.create_key(
+                "data.frameNumber", data=frame_number,
+                chunks=(min(chunks_ids, frame_number.shape[0]), 1))
+            # Add main corrected `data.adc`` dataset and store corrected data.
+            outp_source.create_key(
+                "data.adc", data=data_corr,
+                chunks=(min(chunks_data, data_corr.shape[0]), *oshape[1:]))

            write_compressed_frames(
-                mask_corr,
-                ofile,
-                dataset_path=f"{data_path}/mask",
-                comp_threads=n_cpus,
-            )
-            save_reduced_rois(ofile, data_corr, mask_corr, local_karabo_da)
+                gain_corr, outp_file, f"{outp_source.name}/data/gain", comp_threads=8)
+            write_compressed_frames(
+                mask_corr, outp_file, f"{outp_source.name}/data/mask", comp_threads=8)
+
+            save_reduced_rois(outp_file, data_corr, mask_corr, local_karabo_da)
+
+            # Create METDATA datasets
+            outp_file.create_metadata(like=seq_dc)

        step_timer.done_step(f'Saving data time.')
+if empty_seq == sum([len(i) for i in mapped_files.values()]):
+    raise ValueError("No valid trains for RAW data to correct.")
 ```

 %% Cell type:markdown id: tags:

 ### Processing time summary ###

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print('Skipping plots')
    import sys
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Positions are given in pixels
 mod_width = (256 * 4) + (2 * 3)  # inc. 2px gaps between tiles
 mod_height = (256 * 2) + 2
 if karabo_id == "SPB_IRDA_JF4M":
    # The first 4 modules are rotated 180 degrees relative to the others.
    # We pass the bottom, beam-right corner of the module regardless of its
    # orientation, requiring a subtraction from the symmetric positions we'd
    # otherwise calculate.
    x_start, y_start = 1125, 1078
    module_pos = [
        (x_start - mod_width, y_start - mod_height - (i * (mod_height + 33)))
        for i in range(4)
    ] + [
        (-x_start, -y_start + (i * (mod_height + 33))) for i in range(4)
    ]
    orientations = [(-1, -1) for _ in range(4)] + [(1, 1) for _ in range(4)]
 elif karabo_id == "FXE_XAD_JF1M":
    module_pos = ((-mod_width//2, 33),(-mod_width//2, -mod_height -33))
    orientations = [(-1,-1), (1,1)]
 else:
    module_pos = ((-mod_width//2,-mod_height//2),)
    orientations = None

 geom = JUNGFRAUGeometry.from_module_positions(module_pos, orientations=orientations, asic_gap=0)
 ```

 %% Cell type:code id: tags:

 ``` python
 first_seq = 0 if sequences == [-1] else sequences[0]

 with RunDirectory(out_folder, f"*{run}*S{first_seq:05d}*") as corr_dc:
    # Reading CORR data for plotting.
    jf_corr = components.JUNGFRAU(
        corr_dc,
        detector_name=karabo_id,
    ).select_trains(np.s_[:plot_trains])
    tid, jf_corr_data = next(iter(jf_corr.trains(require_all=True)))

 # Shape = [modules, trains, cells, x, y]
+# TODO: Fix the case if not all modules were requested to be corrected.
+# For example if only one modules was corrected. An assertion error is expected
+# at `geom.plot_data_fast`, while plotting corrected images.
 corrected = jf_corr.get_array("data.adc")[:, :, cell_idx_preview, ...].values
 corrected_train = jf_corr_data["data.adc"][
    :, cell_idx_preview, ...
 ].values  # loose the train axis.

 mask = jf_corr.get_array("data.mask")[:, :, cell_idx_preview, ...].values
 mask_train = jf_corr_data["data.mask"][:, cell_idx_preview, ...].values

 with RunDirectory(f"{in_folder}/r{run:04d}/", f"*S{first_seq:05d}*") as raw_dc:

    # Reading RAW data for plotting.
    jf_raw = components.JUNGFRAU(raw_dc, detector_name=karabo_id).select_trains(
        np.s_[:plot_trains]
    )

 raw = jf_raw.get_array("data.adc")[:, :, cell_idx_preview, ...].values
 raw_train = (
    jf_raw.select_trains(by_id[[tid]])
    .get_array("data.adc")[:, 0, cell_idx_preview, ...]
    .values
 )

 gain = jf_raw.get_array("data.gain")[:, :, cell_idx_preview, ...].values
 gain_train_cells = (
    jf_raw.select_trains(by_id[[tid]]).get_array("data.gain")[:, :, :, ...].values
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 db_modules = get_pdu_from_db(
    karabo_id=karabo_id,
    karabo_da=karabo_da,
    constant=Constants.jungfrau.Offset(),
    condition=condition,
    cal_db_interface=cal_db_interface,
    snapshot_at=creation_time,
 )
 ```

 %% Cell type:markdown id: tags:

 ### Mean RAW Preview

 %% Cell type:code id: tags:

 ``` python
 print(f"The per pixel mean of the first {raw.shape[1]} trains of the first sequence file")

 fig, ax = plt.subplots(figsize=(18, 10))
 raw_mean = np.mean(raw, axis=1)
 geom.plot_data_fast(
    raw_mean,
    ax=ax,
    vmin=min(0.75*np.median(raw_mean[raw_mean > 0]), 2000),
    vmax=max(1.5*np.median(raw_mean[raw_mean > 0]), 16000),
    cmap="jet",
    colorbar={'shrink': 1, 'pad': 0.01},
 )
 ax.set_title(f'{karabo_id} - Mean RAW', size=18)
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ### Mean CORRECTED Preview

 %% Cell type:code id: tags:

 ``` python
 print(f"The per pixel mean of the first {corrected.shape[1]} trains of the first sequence file")

 fig, ax = plt.subplots(figsize=(18, 10))
 corrected_mean = np.mean(corrected, axis=1)
 _corrected_vmin = min(0.75*np.median(corrected_mean[corrected_mean > 0]), -0.5)
 _corrected_vmax = max(2.*np.median(corrected_mean[corrected_mean > 0]), 100)
-geom.plot_data_fast(
-    corrected_mean,
-    ax=ax,
-    vmin=_corrected_vmin,
-    vmax=_corrected_vmax,
-    cmap="jet",
-    colorbar={'shrink': 1, 'pad': 0.01},
+
+mean_plot_kwargs = dict(
+    vmin=_corrected_vmin, vmax=_corrected_vmax, cmap="jet"
 )
+
+if not strixel_sensor:
+    geom.plot_data_fast(
+        corrected_mean,
+        ax=ax,
+        colorbar={'shrink': 1, 'pad': 0.01},
+        **mean_plot_kwargs
+    )
+else:
+    ax.imshow(corrected_mean.squeeze(), aspect=10, **mean_plot_kwargs)
+
 ax.set_title(f'{karabo_id} - Mean CORRECTED', size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(figsize=(18, 10))
 corrected_masked = corrected.copy()
 corrected_masked[mask != 0] = np.nan
 corrected_masked_mean = np.nanmean(corrected_masked, axis=1)
 del corrected_masked
-geom.plot_data_fast(
-    corrected_masked_mean,
-    ax=ax,
-    vmin=_corrected_vmin,
-    vmax=_corrected_vmax,
-    cmap="jet",
-    colorbar={'shrink': 1, 'pad': 0.01},
-)
+
+if not strixel_sensor:
+    geom.plot_data_fast(
+        corrected_masked_mean,
+        ax=ax,
+        colorbar={'shrink': 1, 'pad': 0.01},
+        **mean_plot_kwargs
+    )
+else:
+    ax.imshow(corrected_mean.squeeze(), aspect=10, **mean_plot_kwargs)
+
 ax.set_title(f'{karabo_id} - Mean CORRECTED with mask', size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown((f"#### A single image from train {tid}")))

 fig, ax = plt.subplots(figsize=(18, 10))
-geom.plot_data_fast(
-    corrected_train,
-    ax=ax,
+
+single_plot_kwargs = dict(
    vmin=min(0.75 * np.median(corrected_train[corrected_train > 0]), -0.5),
    vmax=max(2.0 * np.median(corrected_train[corrected_train > 0]), 100),
-    cmap="jet",
-    colorbar={"shrink": 1, "pad": 0.01},
+    cmap="jet"
 )
+
+if not strixel_sensor:
+    geom.plot_data_fast(
+        corrected_train,
+        ax=ax,
+        colorbar={"shrink": 1, "pad": 0.01},
+        **single_plot_kwargs
+    )
+else:
+    ax.imshow(corrected_train.squeeze(), aspect=10, **single_plot_kwargs)
+
 ax.set_title(f"{karabo_id} - CORRECTED train: {tid}", size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis, title):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [
        np.min(edges[1]),
        np.max(edges[1]),
        np.min(edges[0]),
        np.max(edges[0]),
    ]

    im = ax.imshow(
        data[::-1, :],
        extent=extent,
        aspect="auto",
        norm=LogNorm(vmin=1, vmax=np.max(data))
    )
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_title(title)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:markdown id: tags:

 ### Gain Bit Value

 %% Cell type:code id: tags:

 ``` python
 for i, (pdu, mod) in enumerate(zip(db_modules, karabo_da)):
    h, ex, ey = np.histogram2d(
        raw[i].flatten(),
        gain[i].flatten(),
        bins=[100, 4],
        range=[[0, 10000], [0, 4]],
    )
    do_2d_plot(
        h,
        (ex, ey),
        "Signal (ADU)",
        "Gain Bit Value (high gain=0[00], medium gain=1[01], low gain=3[11])",
        f"Module {mod} ({pdu})",
    )
 ```

 %% Cell type:markdown id: tags:

 ## Signal Distribution ##

 %% Cell type:code id: tags:

 ``` python
 for i, (pdu, mod) in enumerate(zip(db_modules, karabo_da)):
    fig, axs = plt.subplots(nrows=2, ncols=1, figsize=(18, 10))
    corrected_flatten = corrected[i].flatten()
    for ax, hist_range in zip(axs, [(-100, 1000), (-1000, 10000)]):
        h = ax.hist(
            corrected_flatten,
            bins=1000,
            range=hist_range,
            log=True,
        )
        l = ax.set_xlabel("Signal (keV)")
        l = ax.set_ylabel("Counts")
        _ = ax.set_title(f'Module {mod} ({pdu})')
 ```

 %% Cell type:markdown id: tags:

 ### Maximum GAIN Preview

 %% Cell type:code id: tags:

 ``` python
 display(Markdown((f"#### The per pixel maximum of train {tid} of the GAIN data")))

 fig, ax = plt.subplots(figsize=(18, 10))
 gain_max = np.max(gain_train_cells, axis=(1, 2))
 geom.plot_data_fast(
    gain_max,
    ax=ax,
    cmap="jet",
    colorbar={'shrink': 1, 'pad': 0.01},
 )
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append(
        (item.name, f"{item.value:016b}"))
 md = display(Latex(tabulate.tabulate(
    table, tablefmt='latex',
    headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:markdown id: tags:

 ### Single Image Bad Pixels ###

 A single image bad pixel map for the first image of the first train

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f"#### Bad pixels image for train {tid}"))

 fig, ax = plt.subplots(figsize=(18, 10))
-geom.plot_data_fast(
-    np.log2(mask_train),
-    ax=ax,
-    vmin=0, vmax=32, cmap="jet",
-    colorbar={'shrink': 1, 'pad': 0.01},
-)
+if not strixel_sensor:
+    geom.plot_data_fast(
+        np.log2(mask_train),
+        ax=ax,
+        vmin=0, vmax=32, cmap="jet",
+        colorbar={'shrink': 1, 'pad': 0.01},
+    )
+else:
+    ax.imshow(np.log2(mask_train).squeeze(), vmin=0, vmax=32, cmap='jet', aspect=10)
+
 plt.show()
 ```

--- a/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Jungfrau Dark Image Characterization #

 Author: European XFEL Detector Group, Version: 2.0

 Analyzes Jungfrau dark image data to deduce offset, noise and resulting bad pixel maps

 %% Cell type:code id: tags:

 ``` python
 in_folder = '/gpfs/exfel/exp/SPB/202130/p900204/raw/'  # folder under which runs are located, required
 out_folder = '/gpfs/exfel/data/scratch/ahmedk/test/remove' # path to place reports at, required
 metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run_high = 141 # run number for G0 dark run, required
 run_med = 142 # run number for G1 dark run, required
 run_low = 143 # run number for G2 dark run, required

 # Parameters used to access raw data.
 karabo_da = ['JNGFR01', 'JNGFR02','JNGFR03','JNGFR04', 'JNGFR05', 'JNGFR06','JNGFR07','JNGFR08'] # list of data aggregators, which corresponds to different JF modules
 karabo_id = 'SPB_IRDA_JF4M'  # karabo_id (detector identifier) prefix of Jungfrau detector to process.
 karabo_id_control = ''  # if control is on a different ID, set to empty string if it is the same a karabo-id
 receiver_template = 'JNGFR{:02}' # inset for receiver devices
 instrument_source_template = '{}/DET/{}:daqOutput'  # template for instrument source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = '{}/DET/CONTROL'  # template for control source name (filled with karabo_id_control)

 # Parameters for calibration database and storing constants.
 use_dir_creation_date = True  # use dir creation date
-cal_db_interface = 'tcp://max-exfl016:8016'  # calibrate db interface to connect to
+cal_db_interface = 'tcp://max-exfl016:8016#8045'  # calibrate db interface to connect to
 cal_db_timeout = 300000 # timeout on caldb requests
 local_output = True  # output constants locally
 db_output = False  # output constants to database

 # Parameters affecting creating dark calibration constants.
 badpixel_threshold_sigma = 5.  # bad pixels defined by values outside n times this std from median
 offset_abs_threshold_low = [1000, 10000, 10000]  # absolute bad pixel threshold in terms of offset, lower values
 offset_abs_threshold_high = [8000, 15000, 15000]  # absolute bad pixel threshold in terms of offset, upper values
 max_trains = 0  # Maximum trains to process darks. Set to 0 to process all available train images.
 min_trains = 1  # Minimum number of trains that should be available to process dark constants. Default 1.
 manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values
 time_limits = 0.025  # to find calibration constants later on, the integration time is allowed to vary by 0.5 us

 # Parameters to be used for injecting dark calibration constants.
 integration_time = 1000 # integration time in us, will be overwritten by value in file
 gain_setting = 0  # 0 for dynamic, forceswitchg1, forceswitchg2, 1 for dynamichg0, fixgain1, fixgain2. Will be overwritten by value in file
 gain_mode = 0  # 1 if medium and low runs are  fixgain1 and fixgain2, otherwise 0. It will be overwritten by value in file, if manual_slow_data
 bias_voltage = 90  # sensor bias voltage in V, will be overwritten by value in file
 memory_cells = 16  # number of memory cells

+# Parameters used for plotting
+detailed_report = False
+
 # TODO: this is used for only Warning check at AGIPD dark.
 # Need to rethink if it makes sense to use it here as well.
 operation_mode = 'ADAPTIVE_GAIN'  # Detector operation mode, optional
 ```

 %% Cell type:code id: tags:

 ``` python
 import glob
 import os
 import warnings
 from pathlib import Path
 warnings.filterwarnings('ignore')

 import matplotlib
 import matplotlib.pyplot as plt
 import multiprocessing
 import numpy as np
 import pasha as psh
+import yaml
 from IPython.display import Markdown, display
 from extra_data import RunDirectory

 matplotlib.use('agg')
 %matplotlib inline

 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.histogram import histPlot
 from cal_tools import jungfraulib, step_timing
 from cal_tools.ana_tools import save_dict_to_hdf5
-from cal_tools.enums import BadPixels, JungfrauSettings
+from cal_tools.enums import BadPixels, JungfrauGainMode
 from cal_tools.tools import (
    get_dir_creation_date,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    save_const_to_h5,
    send_to_db,
 )
 from iCalibrationDB import Conditions, Constants
 ```

 %% Cell type:code id: tags:

 ``` python
 # Constants relevant for the analysis
 run_nums = [run_high, run_med, run_low]  # run number for G0/HG0, G1, G2
 sensor_size = (1024, 512)
 gains = [0, 1, 2]

 fixed_settings = [
-    JungfrauSettings.FIX_GAIN_1.value, JungfrauSettings.FIX_GAIN_2.value]  # noqa
+    JungfrauGainMode.FIX_GAIN_1.value, JungfrauGainMode.FIX_GAIN_2.value]
 dynamic_settings = [
-    JungfrauSettings.FORCE_SWITCH_HG1.value, JungfrauSettings.FORCE_SWITCH_HG2.value]  # noqa
+    JungfrauGainMode.FORCE_SWITCH_HG1.value, JungfrauGainMode.FORCE_SWITCH_HG2.value]
+old_fixed_settings = ["fixgain1", "fixgain2"]

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)
    print(f"Using {creation_time} as creation time")
 os.makedirs(out_folder, exist_ok=True)

 cal_db_interface = get_random_db_interface(cal_db_interface)
 print(f'Calibration database interface: {cal_db_interface}')

 if karabo_id_control == "":
    karabo_id_control = karabo_id
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = f"proposal:{proposal} runs:{run_high} {run_med} {run_low}"

 report = get_report(metadata_folder)

 step_timer = step_timing.StepTimer()
 ```

 %% Cell type:markdown id: tags:

 ## Reading control data

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 gain_runs = dict()

 med_low_settings = []

 ctrl_src = ctrl_source_template.format(karabo_id_control)

 for gain, run_n in enumerate(run_nums):
    run_dc = RunDirectory(f"{in_folder}/r{run_n:04d}/")
    gain_runs[run_n] = [gain, run_dc]
    ctrl_data = jungfraulib.JungfrauCtrl(run_dc, ctrl_src)
-    run_settings = ctrl_data.run_settings.value if ctrl_data.run_settings else ctrl_data.run_settings  # noqa
    # Read control data for the high gain run only.
    if run_n == run_high:

        run_mcells, sc_start = ctrl_data.get_memory_cells()

        if not manual_slow_data:
            integration_time = ctrl_data.get_integration_time()
            bias_voltage = ctrl_data.get_bias_voltage()
            gain_setting = ctrl_data.get_gain_setting()
-            print(f"Gain setting is {gain_setting} ({run_settings})")
+            print(f"Gain setting is {gain_setting} ({ctrl_data.run_settings})")
            print(f"Integration time is {integration_time} us")
            print(f"Bias voltage is {bias_voltage} V")
        if run_mcells == 1:
            memory_cells = 1
            print('Dark runs in single cell mode, '
                  f'storage cell start: {sc_start:02d}')
        else:
            memory_cells = 16
            print('Dark runs in burst mode, '
                  f'storage cell start: {sc_start:02d}')
    else:
        gain_mode = ctrl_data.get_gain_mode()
-
-        med_low_settings.append(run_settings)
+        med_low_settings.append(ctrl_data.run_mode)

 # A transperent workaround for old raw data with wrong/missing medium and low settings
 if med_low_settings == [None, None]:
    print("WARNING: run.settings is not stored in the data to read. "
          f"Hence assuming gain_mode = {gain_mode} for adaptive old data.")
 elif med_low_settings == ["dynamicgain", "forceswitchg1"]:
    print(f"WARNING: run.settings for medium and low gain runs are wrong {med_low_settings}. "
          f"This is an expected bug for old raw data. Setting gain_mode to {gain_mode}.")
 # Validate that low_med_settings is not a mix of adaptive and fixed settings.
-elif not (sorted(med_low_settings) in [fixed_settings, dynamic_settings]):  # noqa
+elif not (sorted(med_low_settings) in [fixed_settings, dynamic_settings, old_fixed_settings]):  # noqa
    raise ValueError(
        "Medium and low run settings are not as expected. "
-        f"Either {dynamic_settings} or {fixed_settings} are expected.\n"
+        f"Either {dynamic_settings}, {fixed_settings}, or {old_fixed_settings} are expected.\n"
        f"Got {sorted(med_low_settings)} for both runs, respectively.")

 print(f"Gain mode is {gain_mode} ({med_low_settings})")

 step_timer.done_step(f'Reading control data.')
 ```

 %% Cell type:code id: tags:

 ``` python
 # set the operating condition
 condition = Conditions.Dark.jungfrau(
    memory_cells=memory_cells,
    bias_voltage=bias_voltage,
    integration_time=integration_time,
    gain_setting=gain_setting,
    gain_mode=gain_mode,
 )

 db_modules = get_pdu_from_db(
    karabo_id=karabo_id,
    karabo_da=karabo_da,
    constant=Constants.jungfrau.Offset(),
    condition=condition,
    cal_db_interface=cal_db_interface,
    snapshot_at=creation_time)
 ```

 %% Cell type:code id: tags:

 ``` python
+# Start retrieving existing constants for comparison
+mod_x_const = [(mod, const) for const in ["Offset", "Noise", "BadPixelsDark"] for mod in karabo_da]
+
+from cal_tools.tools import get_from_db
+from datetime import timedelta
+
+def retrieve_old_constant(mod, const):
+    dconst = getattr(Constants.jungfrau, const)()
+
+    data, mdata = get_from_db(
+        karabo_id=karabo_id,
+        karabo_da=mod,
+        constant=dconst,
+        condition=condition,
+        empty_constant=None,
+        cal_db_interface=cal_db_interface,
+        creation_time=creation_time-timedelta(seconds=60) if creation_time else None,
+        strategy="pdu_prior_in_time",
+        verbosity=1,
+        timeout=cal_db_timeout
+    )
+
+    if mdata is None or data is None:
+        timestamp = "Not found"
+        filepath = None
+        h5path = None
+    else:
+        timestamp = mdata.calibration_constant_version.begin_at.isoformat()
+        filepath = os.path.join(
+            mdata.calibration_constant_version.hdf5path,
+            mdata.calibration_constant_version.filename
+        )
+        h5path = mdata.calibration_constant_version.h5path
+
+    return data, timestamp, filepath, h5path
+
+
+old_retrieval_pool = multiprocessing.Pool()
+old_retrieval_res = old_retrieval_pool.starmap_async(
+    retrieve_old_constant, mod_x_const
+)
+old_retrieval_pool.close()
+```
+
+%% Cell type:code id: tags:
+
+``` python
 # Use only high gain threshold for all gains in case of fixed_gain.

 if gain_mode:  # fixed_gain
    offset_abs_threshold = [[offset_abs_threshold_low[0]]*3, [offset_abs_threshold_high[0]]*3]
 else:
    offset_abs_threshold = [offset_abs_threshold_low, offset_abs_threshold_high]
 ```

 %% Cell type:code id: tags:

 ``` python
 context = psh.context.ThreadContext(num_workers=multiprocessing.cpu_count())
 ```

 %% Cell type:code id: tags:

 ``` python
 """
 All jungfrau runs are taken through one acquisition, except for the forceswitch runs.
 While taking non-fixed dark runs, a procedure of multiple acquisitions is used to switch the storage cell indices.

 This is done for medium and low gain dark dynamic runs, only [forceswitchg1, forceswitchg2]:
 Switching the cell indices in burst mode is a work around for hardware procedure
 deficiency that produces wrong data for dark runs except for the first storage cell.
 This is why multiple acquisitions are taken to switch the used storage cells and
 acquire data through two cells for each of the 16 cells instead of acquiring darks through all 16 cells.
 """

 print(f"Maximum trains to process is set to {max_trains}")

 noise_map = dict()
 offset_map = dict()
 bad_pixels_map = dict()

 for mod in karabo_da:
    step_timer.start()
    instrument_src = instrument_source_template.format(
        karabo_id, receiver_template.format(int(mod[-2:])))

    print(f"\n- Instrument data path for {mod} is {instrument_src}.")

    offset_map[mod] = context.alloc(shape=(sensor_size+(memory_cells, 3)), fill=0)
    noise_map[mod] = context.alloc(like=offset_map[mod], fill=0)
    bad_pixels_map[mod] = context.alloc(like=offset_map[mod], dtype=np.uint32, fill=0)

    for run_n, [gain, run_dc] in gain_runs.items():

        def process_cell(worker_id, array_index, cell_number):
            cell_slice_idx = acelltable == cell_number
            thiscell = images[..., cell_slice_idx]

+            # Identify cells/trains with images of 0 pixels.
+            # TODO: An investigation is ongoing by DET to identify reason for these empty images.
+            nonzero_adc = np.any(thiscell != 0 , axis=(0, 1))
+
+            # Exclude empty images with 0 pixels, before calculating offset and noise
+            thiscell = thiscell[..., nonzero_adc]
            offset_map[mod][..., cell_number, gain] = np.mean(thiscell, axis=2)
            noise_map[mod][..., cell_number, gain] = np.std(thiscell, axis=2)

            # Check if there are wrong bad gain values.
-            # Indicate pixels with wrong gain value across all trains for each cell.
-            bad_pixels_map[mod][
-                np.average(gain_vals[..., cell_slice_idx], axis=2) != raw_g] |= BadPixels.WRONG_GAIN_VALUE.value
+            # 1. Exclude empty images.
+            # 2. Indicate pixels with wrong gain value for any train for each cell.
+            # TODO: mean is used to use thresholds for accepting gain values, even if not 0 mean value.
+            gain_avg = np.mean(
+                gain_vals[..., cell_slice_idx][..., nonzero_adc], axis=2)
+
+            bad_pixels_map[mod][..., cell_number, gain][gain_avg != raw_g] |= BadPixels.WRONG_GAIN_VALUE.value
        print(f"Gain stage {gain}, run {run_n}")

        # load shape of data for memory cells, and detector size (imgs, cells, x, y)
        n_imgs = run_dc[instrument_src, "data.adc"].shape[0]
        # load number of data available, including trains with empty data.
        n_trains = len(run_dc.train_ids)
        instr_dc = run_dc.select(instrument_src, require_all=True)
        empty_trains = n_trains - n_imgs
        if empty_trains != 0:
            print(f"\tWARNING: {mod} has {empty_trains} trains with empty data out of {n_trains} trains")  # noqa
        if max_trains > 0:
            n_imgs = min(n_imgs, max_trains)
        print(f"Processing {n_imgs} images.")
        # Select only requested number of images to process darks.
        instr_dc = instr_dc.select_trains(np.s_[:n_imgs])

        if n_imgs < min_trains:
            raise ValueError(
                f"Less than {min_trains} trains are available in RAW data."
                " Not enough data to process darks.")

        images = np.transpose(
            instr_dc[instrument_src, "data.adc"].ndarray(), (3, 2, 1, 0))
        acelltable = np.transpose(instr_dc[instrument_src, "data.memoryCell"].ndarray())
        gain_vals = np.transpose(
            instr_dc[instrument_src, "data.gain"].ndarray(), (3, 2, 1, 0))

        # define gain value as saved in raw gain map
        raw_g = 3 if gain == 2 else gain

        if memory_cells == 1:
            acelltable -= sc_start
        # Only for dynamic medium and low gain runs [forceswitchg1, forceswitchg2] in burst mode.

        if gain_mode == 0 and gain > 0 and memory_cells == 16:
            # 255 similar to the receiver which uses the 255
            # value to indicate a cell without an image.
            # image shape for forceswitchg1 and forceswitchg2 = (1024, 512, 2, trains)
            # compared to expected shape of (1024, 512, 16, trains) for high gain run.
            acelltable[1:] = 255

        # Calculate offset and noise maps
        context.map(process_cell, range(memory_cells))

    step_timer.done_step(f'Creating Offset and noise constants for a module.')
 ```

-%% Cell type:markdown id: tags:
-
-## Offset and Noise Maps ##
-
-Below offset and noise maps for the high ($g_0$) gain stage are shown, alongside the distribution of these values. One expects block-like structures mapping to the ASICs of the detector
-
 %% Cell type:code id: tags:

 ``` python
-g_name = ['G0', 'G1', 'G2']
-g_range = [(0, 8000), (8000, 16000), (8000, 16000)]
-n_range = [(0., 50.), (0., 50.), (0., 50.)]
-
-unit = '[ADCu]'
-```
-
-%% Cell type:code id: tags:
+if detailed_report:
+    display(Markdown("## Offset and Noise Maps:"))
+    display(Markdown(
+        "Below offset and noise maps for the high ($g_0$) gain stage are shown, "
+        "alongside the distribution of these values. One expects block-like "
+        "structures mapping to the ASICs of the detector"))
+    g_name = ['G0', 'G1', 'G2']
+    g_range = [(0, 8000), (8000, 16000), (8000, 16000)]
+    n_range = [(0., 50.), (0., 50.), (0., 50.)]

-``` python
-# TODO: Fix plots arrangment and speed for Jungfrau burst mode.
-step_timer.start()
-for pdu, mod in zip(db_modules, karabo_da):
-    for g_idx in gains:
-        for cell in range(0, memory_cells):
-            f_o0 = heatmapPlot(
-                np.swapaxes(offset_map[mod][..., cell, g_idx], 0, 1),
-                y_label="Row",
-                x_label="Column",
-                lut_label=unit,
-                aspect=1.,
-                vmin=g_range[g_idx][0],
-                vmax=g_range[g_idx][1],
-                title=f'Pedestal {g_name[g_idx]} - Cell {cell:02d} - Module {mod} ({pdu})')
-
-            fo0, ax_o0 = plt.subplots()
-            res_o0 = histPlot(
-                ax_o0, offset_map[mod][..., cell, g_idx],
-                bins=800,
-                range=g_range[g_idx],
-                facecolor='b',
-                histotype='stepfilled',
-            )
-
-            ax_o0.tick_params(axis='both',which='major',labelsize=15)
-            ax_o0.set_title(
-                f'Module pedestal distribution - Cell {cell:02d} - Module {mod} ({pdu})',
-                fontsize=15)
-            ax_o0.set_xlabel(f'Pedestal {g_name[g_idx]} {unit}',fontsize=15)
-            ax_o0.set_yscale('log')
-
-            f_n0 = heatmapPlot(
-                np.swapaxes(noise_map[mod][..., cell, g_idx], 0, 1),
-                y_label="Row",
-                x_label="Column",
-                lut_label= unit,
-                aspect=1.,
-                vmin=n_range[g_idx][0],
-                vmax=n_range[g_idx][1],
-                title=f"RMS noise {g_name[g_idx]} - Cell {cell:02d} - Module {mod} ({pdu})",
-            )
-
-            fn0, ax_n0 = plt.subplots()
-            res_n0 = histPlot(
-                ax_n0,
-                noise_map[mod][..., cell, g_idx],
-                bins=100,
-                range=n_range[g_idx],
-                facecolor='b',
-                histotype='stepfilled',
-            )
-
-            ax_n0.tick_params(axis='both', which='major', labelsize=15)
-            ax_n0.set_title(
-                f'Module noise distribution - Cell {cell:02d} - Module {mod} ({pdu})',
-                fontsize=15)
-            ax_n0.set_xlabel(
-                f'RMS noise {g_name[g_idx]} ' + unit, fontsize=15)
-            plt.show()
-step_timer.done_step(f'Plotting offset and noise maps.')
+    unit = '[ADCu]'
+    # TODO: Fix plots arrangment and speed for Jungfrau burst mode.
+    step_timer.start()
+    for pdu, mod in zip(db_modules, karabo_da):
+        for g_idx in gains:
+            for cell in range(0, memory_cells):
+                f_o0 = heatmapPlot(
+                    np.swapaxes(offset_map[mod][..., cell, g_idx], 0, 1),
+                    y_label="Row",
+                    x_label="Column",
+                    lut_label=unit,
+                    aspect=1.,
+                    vmin=g_range[g_idx][0],
+                    vmax=g_range[g_idx][1],
+                    title=f'Pedestal {g_name[g_idx]} - Cell {cell:02d} - Module {mod} ({pdu})')
+
+                fo0, ax_o0 = plt.subplots()
+                res_o0 = histPlot(
+                    ax_o0, offset_map[mod][..., cell, g_idx],
+                    bins=800,
+                    range=g_range[g_idx],
+                    facecolor='b',
+                    histotype='stepfilled',
+                )
+
+                ax_o0.tick_params(axis='both',which='major',labelsize=15)
+                ax_o0.set_title(
+                    f'Module pedestal distribution - Cell {cell:02d} - Module {mod} ({pdu})',
+                    fontsize=15)
+                ax_o0.set_xlabel(f'Pedestal {g_name[g_idx]} {unit}',fontsize=15)
+                ax_o0.set_yscale('log')
+
+                f_n0 = heatmapPlot(
+                    np.swapaxes(noise_map[mod][..., cell, g_idx], 0, 1),
+                    y_label="Row",
+                    x_label="Column",
+                    lut_label= unit,
+                    aspect=1.,
+                    vmin=n_range[g_idx][0],
+                    vmax=n_range[g_idx][1],
+                    title=f"RMS noise {g_name[g_idx]} - Cell {cell:02d} - Module {mod} ({pdu})",
+                )
+
+                fn0, ax_n0 = plt.subplots()
+                res_n0 = histPlot(
+                    ax_n0,
+                    noise_map[mod][..., cell, g_idx],
+                    bins=100,
+                    range=n_range[g_idx],
+                    facecolor='b',
+                    histotype='stepfilled',
+                )
+
+                ax_n0.tick_params(axis='both', which='major', labelsize=15)
+                ax_n0.set_title(
+                    f'Module noise distribution - Cell {cell:02d} - Module {mod} ({pdu})',
+                    fontsize=15)
+                ax_n0.set_xlabel(
+                    f'RMS noise {g_name[g_idx]} ' + unit, fontsize=15)
+                plt.show()
+    step_timer.done_step(f'Plotting offset and noise maps.')
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixel Map ###

 The bad pixel map is deduced by comparing offset and noise of each pixel ($v_i$) and each gain ($g$) against the median value for that gain stage:

 $$
 v_i > \mathrm{median}(v_{k,g}) + n \sigma_{v_{k,g}}
 $$
 or
 $$
 v_i < \mathrm{median}(v_{k,g}) - n \sigma_{v_{k,g}}
 $$

 Values are encoded in a 32 bit mask, where for the dark image deduced bad pixels the following non-zero entries are relevant:

 %% Cell type:code id: tags:

 ``` python
 def print_bp_entry(bp):
    print("{:<30s} {:032b} -> {}".format(bp.name, bp.value, int(bp.value)))

 print_bp_entry(BadPixels.OFFSET_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.NOISE_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.OFFSET_NOISE_EVAL_ERROR)
 print_bp_entry(BadPixels.WRONG_GAIN_VALUE)

 def eval_bpidx(d):

    mdn = np.nanmedian(d, axis=(0, 1))[None, None, :, :]
    std = np.nanstd(d, axis=(0, 1))[None, None, :, :]
    idx = (d > badpixel_threshold_sigma*std+mdn) | (d < (-badpixel_threshold_sigma)*std+mdn)

    return idx
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 for pdu, mod in zip(db_modules, karabo_da):
    display(Markdown(f"### Badpixels for module {mod} ({pdu}):"))
    offset_abs_threshold = np.array(offset_abs_threshold)

    bad_pixels_map[mod][eval_bpidx(offset_map[mod])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value

    bad_pixels_map[mod][~np.isfinite(offset_map[mod])] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    bad_pixels_map[mod][eval_bpidx(noise_map[mod])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value

    bad_pixels_map[mod][~np.isfinite(noise_map[mod])] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    bad_pixels_map[mod][(offset_map[mod] < offset_abs_threshold[0][None, None, None, :]) | (offset_map[mod] > offset_abs_threshold[1][None, None, None, :])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value  # noqa

-    for g_idx in gains:
-        for cell in range(memory_cells):
-            bad_pixels = bad_pixels_map[mod][:, :, cell, g_idx]
-            fn_0 = heatmapPlot(
-                np.swapaxes(bad_pixels, 0, 1),
-                y_label="Row",
-                x_label="Column",
-                lut_label=f"Badpixels {g_name[g_idx]} [ADCu]",
-                aspect=1.,
-                vmin=0, vmax=5,
-                title=f'G{g_idx} Bad pixel map - Cell {cell:02d} - Module {mod} ({pdu})')
-step_timer.done_step(f'Creating bad pixels constant and plotting it for a module.')
+    if detailed_report:
+
+        for g_idx in gains:
+            for cell in range(memory_cells):
+                bad_pixels = bad_pixels_map[mod][:, :, cell, g_idx]
+                fn_0 = heatmapPlot(
+                    np.swapaxes(bad_pixels, 0, 1),
+                    y_label="Row",
+                    x_label="Column",
+                    lut_label=f"Badpixels {g_name[g_idx]} [ADCu]",
+                    aspect=1.,
+                    vmin=0, vmax=5,
+                    title=f'G{g_idx} Bad pixel map - Cell {cell:02d} - Module {mod} ({pdu})')
+step_timer.done_step(f'Creating bad pixels constant')
 ```

 %% Cell type:markdown id: tags:

 ## Inject and save calibration constants

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 for mod, db_mod in zip(karabo_da, db_modules):
    constants = {
        'Offset': np.moveaxis(offset_map[mod], 0, 1),
        'Noise': np.moveaxis(noise_map[mod], 0, 1),
        'BadPixelsDark': np.moveaxis(bad_pixels_map[mod], 0, 1),
    }

    md = None

    for key, const_data in constants.items():

        const =  getattr(Constants.jungfrau, key)()
        const.data = const_data

        for parm in condition.parameters:
            if parm.name == "Integration Time":
                parm.lower_deviation = time_limits
                parm.upper_deviation = time_limits

        if db_output:
            md = send_to_db(
                db_module=db_mod,
                karabo_id=karabo_id,
                constant=const,
                condition=condition,
                file_loc=file_loc,
                report_path=report,
                cal_db_interface=cal_db_interface,
                creation_time=creation_time,
                timeout=cal_db_timeout,
            )
        if local_output:
            md = save_const_to_h5(
                db_module=db_mod,
                karabo_id=karabo_id,
                constant=const,
                condition=condition,
                data=const.data,
                file_loc=file_loc,
                report=report,
                creation_time=creation_time,
                out_folder=out_folder,
            )
            print(f"Calibration constant {key} is stored locally at {out_folder}.\n")

 print("Constants parameter conditions are:\n")
 print(
    f"• Bias voltage: {bias_voltage}\n"
    f"• Memory cells: {memory_cells}\n"
    f"• Integration time: {integration_time}\n"
    f"• Gain setting: {gain_setting}\n"
    f"• Gain mode: {gain_mode}\n"
    f"• Creation time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")  # noqa
 step_timer.done_step("Injecting constants.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```
+
+%% Cell type:code id: tags:
+
+``` python
+# now we need the old constants
+old_const = {}
+old_mdata = {}
+old_retrieval_res.wait()
+
+for (mod, const), (data, timestamp, filepath, h5path) in zip(
+    mod_x_const, old_retrieval_res.get()):
+    old_const.setdefault(mod, {})[const] = data
+    old_mdata.setdefault(mod, {})[const] = {
+        "timestamp": timestamp,
+        "filepath": filepath,
+        "h5path": h5path,
+    }
+```
+
+%% Cell type:code id: tags:
+
+``` python
+display(Markdown("## The following pre-existing constants are used for comparison:"))
+
+for mod, consts in old_mdata.items():
+    pdu = db_modules[karabo_da.index(mod)]
+    display(Markdown(f"- {mod} ({pdu})"))
+    for const in consts:
+        display(Markdown(f"    - {const} at {consts[const]['timestamp']}"))
+    # saving locations of old constants for summary notebook
+    with open(f"{metadata_folder or out_folder}/module_metadata_{mod}.yml", "w") as fd:
+        yaml.safe_dump(
+            {
+                "module": mod,
+                "pdu": pdu,
+                "old-constants": old_mdata[mod],
+            },
+            fd,
+        )
+```

 %% Cell type:markdown id: tags:

 # Jungfrau Dark Image Characterization #

 Author: European XFEL Detector Group, Version: 2.0

 Analyzes Jungfrau dark image data to deduce offset, noise and resulting bad pixel maps

 %% Cell type:code id: tags:

 ``` python
 in_folder = '/gpfs/exfel/exp/SPB/202130/p900204/raw/'  # folder under which runs are located, required
 out_folder = '/gpfs/exfel/data/scratch/ahmedk/test/remove' # path to place reports at, required
 metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run_high = 141 # run number for G0 dark run, required
 run_med = 142 # run number for G1 dark run, required
 run_low = 143 # run number for G2 dark run, required

 # Parameters used to access raw data.
 karabo_da = ['JNGFR01', 'JNGFR02','JNGFR03','JNGFR04', 'JNGFR05', 'JNGFR06','JNGFR07','JNGFR08'] # list of data aggregators, which corresponds to different JF modules
 karabo_id = 'SPB_IRDA_JF4M'  # karabo_id (detector identifier) prefix of Jungfrau detector to process.
 karabo_id_control = ''  # if control is on a different ID, set to empty string if it is the same a karabo-id
 receiver_template = 'JNGFR{:02}' # inset for receiver devices
 instrument_source_template = '{}/DET/{}:daqOutput'  # template for instrument source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = '{}/DET/CONTROL'  # template for control source name (filled with karabo_id_control)

 # Parameters for calibration database and storing constants.
 use_dir_creation_date = True  # use dir creation date
-cal_db_interface = 'tcp://max-exfl016:8016'  # calibrate db interface to connect to
+cal_db_interface = 'tcp://max-exfl016:8016#8045'  # calibrate db interface to connect to
 cal_db_timeout = 300000 # timeout on caldb requests
 local_output = True  # output constants locally
 db_output = False  # output constants to database

 # Parameters affecting creating dark calibration constants.
 badpixel_threshold_sigma = 5.  # bad pixels defined by values outside n times this std from median
 offset_abs_threshold_low = [1000, 10000, 10000]  # absolute bad pixel threshold in terms of offset, lower values
 offset_abs_threshold_high = [8000, 15000, 15000]  # absolute bad pixel threshold in terms of offset, upper values
 max_trains = 0  # Maximum trains to process darks. Set to 0 to process all available train images.
 min_trains = 1  # Minimum number of trains that should be available to process dark constants. Default 1.
 manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values
 time_limits = 0.025  # to find calibration constants later on, the integration time is allowed to vary by 0.5 us

 # Parameters to be used for injecting dark calibration constants.
 integration_time = 1000 # integration time in us, will be overwritten by value in file
 gain_setting = 0  # 0 for dynamic, forceswitchg1, forceswitchg2, 1 for dynamichg0, fixgain1, fixgain2. Will be overwritten by value in file
 gain_mode = 0  # 1 if medium and low runs are  fixgain1 and fixgain2, otherwise 0. It will be overwritten by value in file, if manual_slow_data
 bias_voltage = 90  # sensor bias voltage in V, will be overwritten by value in file
 memory_cells = 16  # number of memory cells

+# Parameters used for plotting
+detailed_report = False
+
 # TODO: this is used for only Warning check at AGIPD dark.
 # Need to rethink if it makes sense to use it here as well.
 operation_mode = 'ADAPTIVE_GAIN'  # Detector operation mode, optional
 ```

 %% Cell type:code id: tags:

 ``` python
 import glob
 import os
 import warnings
 from pathlib import Path
 warnings.filterwarnings('ignore')

 import matplotlib
 import matplotlib.pyplot as plt
 import multiprocessing
 import numpy as np
 import pasha as psh
+import yaml
 from IPython.display import Markdown, display
 from extra_data import RunDirectory

 matplotlib.use('agg')
 %matplotlib inline

 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.histogram import histPlot
 from cal_tools import jungfraulib, step_timing
 from cal_tools.ana_tools import save_dict_to_hdf5
-from cal_tools.enums import BadPixels, JungfrauSettings
+from cal_tools.enums import BadPixels, JungfrauGainMode
 from cal_tools.tools import (
    get_dir_creation_date,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    save_const_to_h5,
    send_to_db,
 )
 from iCalibrationDB import Conditions, Constants
 ```

 %% Cell type:code id: tags:

 ``` python
 # Constants relevant for the analysis
 run_nums = [run_high, run_med, run_low]  # run number for G0/HG0, G1, G2
 sensor_size = (1024, 512)
 gains = [0, 1, 2]

 fixed_settings = [
-    JungfrauSettings.FIX_GAIN_1.value, JungfrauSettings.FIX_GAIN_2.value]  # noqa
+    JungfrauGainMode.FIX_GAIN_1.value, JungfrauGainMode.FIX_GAIN_2.value]
 dynamic_settings = [
-    JungfrauSettings.FORCE_SWITCH_HG1.value, JungfrauSettings.FORCE_SWITCH_HG2.value]  # noqa
+    JungfrauGainMode.FORCE_SWITCH_HG1.value, JungfrauGainMode.FORCE_SWITCH_HG2.value]
+old_fixed_settings = ["fixgain1", "fixgain2"]

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)
    print(f"Using {creation_time} as creation time")
 os.makedirs(out_folder, exist_ok=True)

 cal_db_interface = get_random_db_interface(cal_db_interface)
 print(f'Calibration database interface: {cal_db_interface}')

 if karabo_id_control == "":
    karabo_id_control = karabo_id
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = f"proposal:{proposal} runs:{run_high} {run_med} {run_low}"

 report = get_report(metadata_folder)

 step_timer = step_timing.StepTimer()
 ```

 %% Cell type:markdown id: tags:

 ## Reading control data

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 gain_runs = dict()

 med_low_settings = []

 ctrl_src = ctrl_source_template.format(karabo_id_control)

 for gain, run_n in enumerate(run_nums):
    run_dc = RunDirectory(f"{in_folder}/r{run_n:04d}/")
    gain_runs[run_n] = [gain, run_dc]
    ctrl_data = jungfraulib.JungfrauCtrl(run_dc, ctrl_src)
-    run_settings = ctrl_data.run_settings.value if ctrl_data.run_settings else ctrl_data.run_settings  # noqa
    # Read control data for the high gain run only.
    if run_n == run_high:

        run_mcells, sc_start = ctrl_data.get_memory_cells()

        if not manual_slow_data:
            integration_time = ctrl_data.get_integration_time()
            bias_voltage = ctrl_data.get_bias_voltage()
            gain_setting = ctrl_data.get_gain_setting()
-            print(f"Gain setting is {gain_setting} ({run_settings})")
+            print(f"Gain setting is {gain_setting} ({ctrl_data.run_settings})")
            print(f"Integration time is {integration_time} us")
            print(f"Bias voltage is {bias_voltage} V")
        if run_mcells == 1:
            memory_cells = 1
            print('Dark runs in single cell mode, '
                  f'storage cell start: {sc_start:02d}')
        else:
            memory_cells = 16
            print('Dark runs in burst mode, '
                  f'storage cell start: {sc_start:02d}')
    else:
        gain_mode = ctrl_data.get_gain_mode()
-
-        med_low_settings.append(run_settings)
+        med_low_settings.append(ctrl_data.run_mode)

 # A transperent workaround for old raw data with wrong/missing medium and low settings
 if med_low_settings == [None, None]:
    print("WARNING: run.settings is not stored in the data to read. "
          f"Hence assuming gain_mode = {gain_mode} for adaptive old data.")
 elif med_low_settings == ["dynamicgain", "forceswitchg1"]:
    print(f"WARNING: run.settings for medium and low gain runs are wrong {med_low_settings}. "
          f"This is an expected bug for old raw data. Setting gain_mode to {gain_mode}.")
 # Validate that low_med_settings is not a mix of adaptive and fixed settings.
-elif not (sorted(med_low_settings) in [fixed_settings, dynamic_settings]):  # noqa
+elif not (sorted(med_low_settings) in [fixed_settings, dynamic_settings, old_fixed_settings]):  # noqa
    raise ValueError(
        "Medium and low run settings are not as expected. "
-        f"Either {dynamic_settings} or {fixed_settings} are expected.\n"
+        f"Either {dynamic_settings}, {fixed_settings}, or {old_fixed_settings} are expected.\n"
        f"Got {sorted(med_low_settings)} for both runs, respectively.")

 print(f"Gain mode is {gain_mode} ({med_low_settings})")

 step_timer.done_step(f'Reading control data.')
 ```

 %% Cell type:code id: tags:

 ``` python
 # set the operating condition
 condition = Conditions.Dark.jungfrau(
    memory_cells=memory_cells,
    bias_voltage=bias_voltage,
    integration_time=integration_time,
    gain_setting=gain_setting,
    gain_mode=gain_mode,
 )

 db_modules = get_pdu_from_db(
    karabo_id=karabo_id,
    karabo_da=karabo_da,
    constant=Constants.jungfrau.Offset(),
    condition=condition,
    cal_db_interface=cal_db_interface,
    snapshot_at=creation_time)
 ```

 %% Cell type:code id: tags:

 ``` python
+# Start retrieving existing constants for comparison
+mod_x_const = [(mod, const) for const in ["Offset", "Noise", "BadPixelsDark"] for mod in karabo_da]
+
+from cal_tools.tools import get_from_db
+from datetime import timedelta
+
+def retrieve_old_constant(mod, const):
+    dconst = getattr(Constants.jungfrau, const)()
+
+    data, mdata = get_from_db(
+        karabo_id=karabo_id,
+        karabo_da=mod,
+        constant=dconst,
+        condition=condition,
+        empty_constant=None,
+        cal_db_interface=cal_db_interface,
+        creation_time=creation_time-timedelta(seconds=60) if creation_time else None,
+        strategy="pdu_prior_in_time",
+        verbosity=1,
+        timeout=cal_db_timeout
+    )
+
+    if mdata is None or data is None:
+        timestamp = "Not found"
+        filepath = None
+        h5path = None
+    else:
+        timestamp = mdata.calibration_constant_version.begin_at.isoformat()
+        filepath = os.path.join(
+            mdata.calibration_constant_version.hdf5path,
+            mdata.calibration_constant_version.filename
+        )
+        h5path = mdata.calibration_constant_version.h5path
+
+    return data, timestamp, filepath, h5path
+
+
+old_retrieval_pool = multiprocessing.Pool()
+old_retrieval_res = old_retrieval_pool.starmap_async(
+    retrieve_old_constant, mod_x_const
+)
+old_retrieval_pool.close()
+```
+
+%% Cell type:code id: tags:
+
+``` python
 # Use only high gain threshold for all gains in case of fixed_gain.

 if gain_mode:  # fixed_gain
    offset_abs_threshold = [[offset_abs_threshold_low[0]]*3, [offset_abs_threshold_high[0]]*3]
 else:
    offset_abs_threshold = [offset_abs_threshold_low, offset_abs_threshold_high]
 ```

 %% Cell type:code id: tags:

 ``` python
 context = psh.context.ThreadContext(num_workers=multiprocessing.cpu_count())
 ```

 %% Cell type:code id: tags:

 ``` python
 """
 All jungfrau runs are taken through one acquisition, except for the forceswitch runs.
 While taking non-fixed dark runs, a procedure of multiple acquisitions is used to switch the storage cell indices.

 This is done for medium and low gain dark dynamic runs, only [forceswitchg1, forceswitchg2]:
 Switching the cell indices in burst mode is a work around for hardware procedure
 deficiency that produces wrong data for dark runs except for the first storage cell.
 This is why multiple acquisitions are taken to switch the used storage cells and
 acquire data through two cells for each of the 16 cells instead of acquiring darks through all 16 cells.
 """

 print(f"Maximum trains to process is set to {max_trains}")

 noise_map = dict()
 offset_map = dict()
 bad_pixels_map = dict()

 for mod in karabo_da:
    step_timer.start()
    instrument_src = instrument_source_template.format(
        karabo_id, receiver_template.format(int(mod[-2:])))

    print(f"\n- Instrument data path for {mod} is {instrument_src}.")

    offset_map[mod] = context.alloc(shape=(sensor_size+(memory_cells, 3)), fill=0)
    noise_map[mod] = context.alloc(like=offset_map[mod], fill=0)
    bad_pixels_map[mod] = context.alloc(like=offset_map[mod], dtype=np.uint32, fill=0)

    for run_n, [gain, run_dc] in gain_runs.items():

        def process_cell(worker_id, array_index, cell_number):
            cell_slice_idx = acelltable == cell_number
            thiscell = images[..., cell_slice_idx]

+            # Identify cells/trains with images of 0 pixels.
+            # TODO: An investigation is ongoing by DET to identify reason for these empty images.
+            nonzero_adc = np.any(thiscell != 0 , axis=(0, 1))
+
+            # Exclude empty images with 0 pixels, before calculating offset and noise
+            thiscell = thiscell[..., nonzero_adc]
            offset_map[mod][..., cell_number, gain] = np.mean(thiscell, axis=2)
            noise_map[mod][..., cell_number, gain] = np.std(thiscell, axis=2)

            # Check if there are wrong bad gain values.
-            # Indicate pixels with wrong gain value across all trains for each cell.
-            bad_pixels_map[mod][
-                np.average(gain_vals[..., cell_slice_idx], axis=2) != raw_g] |= BadPixels.WRONG_GAIN_VALUE.value
+            # 1. Exclude empty images.
+            # 2. Indicate pixels with wrong gain value for any train for each cell.
+            # TODO: mean is used to use thresholds for accepting gain values, even if not 0 mean value.
+            gain_avg = np.mean(
+                gain_vals[..., cell_slice_idx][..., nonzero_adc], axis=2)
+
+            bad_pixels_map[mod][..., cell_number, gain][gain_avg != raw_g] |= BadPixels.WRONG_GAIN_VALUE.value
        print(f"Gain stage {gain}, run {run_n}")

        # load shape of data for memory cells, and detector size (imgs, cells, x, y)
        n_imgs = run_dc[instrument_src, "data.adc"].shape[0]
        # load number of data available, including trains with empty data.
        n_trains = len(run_dc.train_ids)
        instr_dc = run_dc.select(instrument_src, require_all=True)
        empty_trains = n_trains - n_imgs
        if empty_trains != 0:
            print(f"\tWARNING: {mod} has {empty_trains} trains with empty data out of {n_trains} trains")  # noqa
        if max_trains > 0:
            n_imgs = min(n_imgs, max_trains)
        print(f"Processing {n_imgs} images.")
        # Select only requested number of images to process darks.
        instr_dc = instr_dc.select_trains(np.s_[:n_imgs])

        if n_imgs < min_trains:
            raise ValueError(
                f"Less than {min_trains} trains are available in RAW data."
                " Not enough data to process darks.")

        images = np.transpose(
            instr_dc[instrument_src, "data.adc"].ndarray(), (3, 2, 1, 0))
        acelltable = np.transpose(instr_dc[instrument_src, "data.memoryCell"].ndarray())
        gain_vals = np.transpose(
            instr_dc[instrument_src, "data.gain"].ndarray(), (3, 2, 1, 0))

        # define gain value as saved in raw gain map
        raw_g = 3 if gain == 2 else gain

        if memory_cells == 1:
            acelltable -= sc_start
        # Only for dynamic medium and low gain runs [forceswitchg1, forceswitchg2] in burst mode.

        if gain_mode == 0 and gain > 0 and memory_cells == 16:
            # 255 similar to the receiver which uses the 255
            # value to indicate a cell without an image.
            # image shape for forceswitchg1 and forceswitchg2 = (1024, 512, 2, trains)
            # compared to expected shape of (1024, 512, 16, trains) for high gain run.
            acelltable[1:] = 255

        # Calculate offset and noise maps
        context.map(process_cell, range(memory_cells))

    step_timer.done_step(f'Creating Offset and noise constants for a module.')
 ```

-%% Cell type:markdown id: tags:
-
-## Offset and Noise Maps ##
-
-Below offset and noise maps for the high ($g_0$) gain stage are shown, alongside the distribution of these values. One expects block-like structures mapping to the ASICs of the detector
-
 %% Cell type:code id: tags:

 ``` python
-g_name = ['G0', 'G1', 'G2']
-g_range = [(0, 8000), (8000, 16000), (8000, 16000)]
-n_range = [(0., 50.), (0., 50.), (0., 50.)]
-
-unit = '[ADCu]'
-```
-
-%% Cell type:code id: tags:
+if detailed_report:
+    display(Markdown("## Offset and Noise Maps:"))
+    display(Markdown(
+        "Below offset and noise maps for the high ($g_0$) gain stage are shown, "
+        "alongside the distribution of these values. One expects block-like "
+        "structures mapping to the ASICs of the detector"))
+    g_name = ['G0', 'G1', 'G2']
+    g_range = [(0, 8000), (8000, 16000), (8000, 16000)]
+    n_range = [(0., 50.), (0., 50.), (0., 50.)]

-``` python
-# TODO: Fix plots arrangment and speed for Jungfrau burst mode.
-step_timer.start()
-for pdu, mod in zip(db_modules, karabo_da):
-    for g_idx in gains:
-        for cell in range(0, memory_cells):
-            f_o0 = heatmapPlot(
-                np.swapaxes(offset_map[mod][..., cell, g_idx], 0, 1),
-                y_label="Row",
-                x_label="Column",
-                lut_label=unit,
-                aspect=1.,
-                vmin=g_range[g_idx][0],
-                vmax=g_range[g_idx][1],
-                title=f'Pedestal {g_name[g_idx]} - Cell {cell:02d} - Module {mod} ({pdu})')
-
-            fo0, ax_o0 = plt.subplots()
-            res_o0 = histPlot(
-                ax_o0, offset_map[mod][..., cell, g_idx],
-                bins=800,
-                range=g_range[g_idx],
-                facecolor='b',
-                histotype='stepfilled',
-            )
-
-            ax_o0.tick_params(axis='both',which='major',labelsize=15)
-            ax_o0.set_title(
-                f'Module pedestal distribution - Cell {cell:02d} - Module {mod} ({pdu})',
-                fontsize=15)
-            ax_o0.set_xlabel(f'Pedestal {g_name[g_idx]} {unit}',fontsize=15)
-            ax_o0.set_yscale('log')
-
-            f_n0 = heatmapPlot(
-                np.swapaxes(noise_map[mod][..., cell, g_idx], 0, 1),
-                y_label="Row",
-                x_label="Column",
-                lut_label= unit,
-                aspect=1.,
-                vmin=n_range[g_idx][0],
-                vmax=n_range[g_idx][1],
-                title=f"RMS noise {g_name[g_idx]} - Cell {cell:02d} - Module {mod} ({pdu})",
-            )
-
-            fn0, ax_n0 = plt.subplots()
-            res_n0 = histPlot(
-                ax_n0,
-                noise_map[mod][..., cell, g_idx],
-                bins=100,
-                range=n_range[g_idx],
-                facecolor='b',
-                histotype='stepfilled',
-            )
-
-            ax_n0.tick_params(axis='both', which='major', labelsize=15)
-            ax_n0.set_title(
-                f'Module noise distribution - Cell {cell:02d} - Module {mod} ({pdu})',
-                fontsize=15)
-            ax_n0.set_xlabel(
-                f'RMS noise {g_name[g_idx]} ' + unit, fontsize=15)
-            plt.show()
-step_timer.done_step(f'Plotting offset and noise maps.')
+    unit = '[ADCu]'
+    # TODO: Fix plots arrangment and speed for Jungfrau burst mode.
+    step_timer.start()
+    for pdu, mod in zip(db_modules, karabo_da):
+        for g_idx in gains:
+            for cell in range(0, memory_cells):
+                f_o0 = heatmapPlot(
+                    np.swapaxes(offset_map[mod][..., cell, g_idx], 0, 1),
+                    y_label="Row",
+                    x_label="Column",
+                    lut_label=unit,
+                    aspect=1.,
+                    vmin=g_range[g_idx][0],
+                    vmax=g_range[g_idx][1],
+                    title=f'Pedestal {g_name[g_idx]} - Cell {cell:02d} - Module {mod} ({pdu})')
+
+                fo0, ax_o0 = plt.subplots()
+                res_o0 = histPlot(
+                    ax_o0, offset_map[mod][..., cell, g_idx],
+                    bins=800,
+                    range=g_range[g_idx],
+                    facecolor='b',
+                    histotype='stepfilled',
+                )
+
+                ax_o0.tick_params(axis='both',which='major',labelsize=15)
+                ax_o0.set_title(
+                    f'Module pedestal distribution - Cell {cell:02d} - Module {mod} ({pdu})',
+                    fontsize=15)
+                ax_o0.set_xlabel(f'Pedestal {g_name[g_idx]} {unit}',fontsize=15)
+                ax_o0.set_yscale('log')
+
+                f_n0 = heatmapPlot(
+                    np.swapaxes(noise_map[mod][..., cell, g_idx], 0, 1),
+                    y_label="Row",
+                    x_label="Column",
+                    lut_label= unit,
+                    aspect=1.,
+                    vmin=n_range[g_idx][0],
+                    vmax=n_range[g_idx][1],
+                    title=f"RMS noise {g_name[g_idx]} - Cell {cell:02d} - Module {mod} ({pdu})",
+                )
+
+                fn0, ax_n0 = plt.subplots()
+                res_n0 = histPlot(
+                    ax_n0,
+                    noise_map[mod][..., cell, g_idx],
+                    bins=100,
+                    range=n_range[g_idx],
+                    facecolor='b',
+                    histotype='stepfilled',
+                )
+
+                ax_n0.tick_params(axis='both', which='major', labelsize=15)
+                ax_n0.set_title(
+                    f'Module noise distribution - Cell {cell:02d} - Module {mod} ({pdu})',
+                    fontsize=15)
+                ax_n0.set_xlabel(
+                    f'RMS noise {g_name[g_idx]} ' + unit, fontsize=15)
+                plt.show()
+    step_timer.done_step(f'Plotting offset and noise maps.')
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixel Map ###

 The bad pixel map is deduced by comparing offset and noise of each pixel ($v_i$) and each gain ($g$) against the median value for that gain stage:

 $$
 v_i > \mathrm{median}(v_{k,g}) + n \sigma_{v_{k,g}}
 $$
 or
 $$
 v_i < \mathrm{median}(v_{k,g}) - n \sigma_{v_{k,g}}
 $$

 Values are encoded in a 32 bit mask, where for the dark image deduced bad pixels the following non-zero entries are relevant:

 %% Cell type:code id: tags:

 ``` python
 def print_bp_entry(bp):
    print("{:<30s} {:032b} -> {}".format(bp.name, bp.value, int(bp.value)))

 print_bp_entry(BadPixels.OFFSET_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.NOISE_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.OFFSET_NOISE_EVAL_ERROR)
 print_bp_entry(BadPixels.WRONG_GAIN_VALUE)

 def eval_bpidx(d):

    mdn = np.nanmedian(d, axis=(0, 1))[None, None, :, :]
    std = np.nanstd(d, axis=(0, 1))[None, None, :, :]
    idx = (d > badpixel_threshold_sigma*std+mdn) | (d < (-badpixel_threshold_sigma)*std+mdn)

    return idx
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 for pdu, mod in zip(db_modules, karabo_da):
    display(Markdown(f"### Badpixels for module {mod} ({pdu}):"))
    offset_abs_threshold = np.array(offset_abs_threshold)

    bad_pixels_map[mod][eval_bpidx(offset_map[mod])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value

    bad_pixels_map[mod][~np.isfinite(offset_map[mod])] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    bad_pixels_map[mod][eval_bpidx(noise_map[mod])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value

    bad_pixels_map[mod][~np.isfinite(noise_map[mod])] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    bad_pixels_map[mod][(offset_map[mod] < offset_abs_threshold[0][None, None, None, :]) | (offset_map[mod] > offset_abs_threshold[1][None, None, None, :])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value  # noqa

-    for g_idx in gains:
-        for cell in range(memory_cells):
-            bad_pixels = bad_pixels_map[mod][:, :, cell, g_idx]
-            fn_0 = heatmapPlot(
-                np.swapaxes(bad_pixels, 0, 1),
-                y_label="Row",
-                x_label="Column",
-                lut_label=f"Badpixels {g_name[g_idx]} [ADCu]",
-                aspect=1.,
-                vmin=0, vmax=5,
-                title=f'G{g_idx} Bad pixel map - Cell {cell:02d} - Module {mod} ({pdu})')
-step_timer.done_step(f'Creating bad pixels constant and plotting it for a module.')
+    if detailed_report:
+
+        for g_idx in gains:
+            for cell in range(memory_cells):
+                bad_pixels = bad_pixels_map[mod][:, :, cell, g_idx]
+                fn_0 = heatmapPlot(
+                    np.swapaxes(bad_pixels, 0, 1),
+                    y_label="Row",
+                    x_label="Column",
+                    lut_label=f"Badpixels {g_name[g_idx]} [ADCu]",
+                    aspect=1.,
+                    vmin=0, vmax=5,
+                    title=f'G{g_idx} Bad pixel map - Cell {cell:02d} - Module {mod} ({pdu})')
+step_timer.done_step(f'Creating bad pixels constant')
 ```

 %% Cell type:markdown id: tags:

 ## Inject and save calibration constants

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 for mod, db_mod in zip(karabo_da, db_modules):
    constants = {
        'Offset': np.moveaxis(offset_map[mod], 0, 1),
        'Noise': np.moveaxis(noise_map[mod], 0, 1),
        'BadPixelsDark': np.moveaxis(bad_pixels_map[mod], 0, 1),
    }

    md = None

    for key, const_data in constants.items():

        const =  getattr(Constants.jungfrau, key)()
        const.data = const_data

        for parm in condition.parameters:
            if parm.name == "Integration Time":
                parm.lower_deviation = time_limits
                parm.upper_deviation = time_limits

        if db_output:
            md = send_to_db(
                db_module=db_mod,
                karabo_id=karabo_id,
                constant=const,
                condition=condition,
                file_loc=file_loc,
                report_path=report,
                cal_db_interface=cal_db_interface,
                creation_time=creation_time,
                timeout=cal_db_timeout,
            )
        if local_output:
            md = save_const_to_h5(
                db_module=db_mod,
                karabo_id=karabo_id,
                constant=const,
                condition=condition,
                data=const.data,
                file_loc=file_loc,
                report=report,
                creation_time=creation_time,
                out_folder=out_folder,
            )
            print(f"Calibration constant {key} is stored locally at {out_folder}.\n")

 print("Constants parameter conditions are:\n")
 print(
    f"• Bias voltage: {bias_voltage}\n"
    f"• Memory cells: {memory_cells}\n"
    f"• Integration time: {integration_time}\n"
    f"• Gain setting: {gain_setting}\n"
    f"• Gain mode: {gain_mode}\n"
    f"• Creation time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")  # noqa
 step_timer.done_step("Injecting constants.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```
+
+%% Cell type:code id: tags:
+
+``` python
+# now we need the old constants
+old_const = {}
+old_mdata = {}
+old_retrieval_res.wait()
+
+for (mod, const), (data, timestamp, filepath, h5path) in zip(
+    mod_x_const, old_retrieval_res.get()):
+    old_const.setdefault(mod, {})[const] = data
+    old_mdata.setdefault(mod, {})[const] = {
+        "timestamp": timestamp,
+        "filepath": filepath,
+        "h5path": h5path,
+    }
+```
+
+%% Cell type:code id: tags:
+
+``` python
+display(Markdown("## The following pre-existing constants are used for comparison:"))
+
+for mod, consts in old_mdata.items():
+    pdu = db_modules[karabo_da.index(mod)]
+    display(Markdown(f"- {mod} ({pdu})"))
+    for const in consts:
+        display(Markdown(f"    - {const} at {consts[const]['timestamp']}"))
+    # saving locations of old constants for summary notebook
+    with open(f"{metadata_folder or out_folder}/module_metadata_{mod}.yml", "w") as fd:
+        yaml.safe_dump(
+            {
+                "module": mod,
+                "pdu": pdu,
+                "old-constants": old_mdata[mod],
+            },
+            fd,
+        )
+```

--- a/notebooks/Jungfrau/Jungfrau_darks_Summary_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_darks_Summary_NBC.ipynb
+%% Cell type:markdown id: tags:
+
+# Jungfrau Dark Summary
+
+Author: European XFEL Detector Department, Version: 1.0
+
+Summary for process dark constants and a comparison with previously injected constants with the same conditions.
+
+%% Cell type:code id: tags:
+
+``` python
+out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/jungfrau_assembeled_dark"  # path to output to, required
+metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate.
+
+# Parameters used to access raw data.
+karabo_da = []  # list of data aggregators, which corresponds to different JF modules. This is only needed for the detectors of one module.
+karabo_id = "FXE_XAD_JF1M"  # detector identifier.
+
+# Parameters to be used for injecting dark calibration constants.
+local_output = True  # Boolean indicating that local constants were stored in the out_folder
+
+# Skip the whole notebook if local_output is false in the preceding notebooks.
+if not local_output:
+    print('No local constants saved. Skipping summary plots')
+    import sys
+    sys.exit(0)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+import warnings
+from collections import OrderedDict
+from pathlib import Path
+
+warnings.filterwarnings('ignore')
+
+import h5py
+import matplotlib
+import matplotlib.gridspec as gridspec
+import matplotlib.pyplot as plt
+import numpy as np
+import yaml
+from IPython.display import Markdown, display
+
+matplotlib.use("agg")
+%matplotlib inline
+
+from cal_tools.enums import BadPixels, JungfrauSettings
+from cal_tools.ana_tools import get_range
+from cal_tools.plotting import init_jungfrau_geom, show_processed_modules_jungfrau
+from cal_tools.tools import CalibrationMetadata
+from XFELDetAna.plotting.simpleplot import simplePlot
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Prepare paths and load previous constants' metadata.
+out_folder = Path(out_folder)
+metadata = CalibrationMetadata(metadata_folder or out_folder)
+mod_mapping = metadata.setdefault("modules-mapping", {})
+dark_constants = ["Offset", "Noise", "BadPixelsDark"]
+
+prev_const_metadata = {}
+for fn in Path(metadata_folder or out_folder).glob("module_metadata_*.yml"):
+    with fn.open("r") as fd:
+        fdict = yaml.safe_load(fd)
+    module = fdict["module"]
+    mod_mapping[module] = fdict["pdu"]
+    prev_const_metadata[module] = fdict["old-constants"]
+
+metadata.save()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+expected_modules, geom = init_jungfrau_geom(
+    karabo_id=karabo_id, karabo_da=karabo_da)
+nmods = len(expected_modules)
+```
+
+%% Cell type:markdown id: tags:
+
+Preparing newly injected and previous constants from produced local folder in out_folder.
+
+%% Cell type:code id: tags:
+
+``` python
+fixed_gain = False  # constant is adaptive by default.
+# Get the constant shape from one of the local constants.
+# This is one way to realize the number of memory cells.
+with h5py.File(list(out_folder.glob("const_Offset_*"))[0], 'r') as f:
+    const_shape = f["data"][()].shape
+    # Get fixed gain value to decide offset vmin, vmax
+    # for later constant map plots.
+    gain_mode = "condition/Gain mode/value"
+    if gain_mode in f:
+        fixed_gain = f[gain_mode][()]
+
+
+initial_stacked_constants = np.full(((nmods,)+const_shape), np.nan)
+curr_constants = { c: initial_stacked_constants.copy() for c in dark_constants}
+prev_constants = { c: initial_stacked_constants.copy() for c in dark_constants}
+
+exculded_constants = []  # constants excluded from comparison plots.
+
+# Loop over modules
+for cname in dark_constants:
+    excluded_modules = []  # modules with no previous constants.
+    for i, mod in enumerate(sorted(expected_modules)):
+        # Loop over expected dark constants in out_folder.
+        # Some constants can be missing in out_folder.
+        pdu = mod_mapping[mod]
+
+        # first load new constant
+        fpath = out_folder / f"const_{cname}_{pdu}.h5"
+        with h5py.File(fpath, 'r') as f:
+            curr_constants[cname][i, ...] = f["data"][()]
+
+        # Load previous constants.
+        old_mod_mdata = prev_const_metadata[mod]
+
+        if cname in old_mod_mdata:  # a module can be missing from detector dark processing.
+            filepath = old_mod_mdata[cname]["filepath"]
+            h5path = old_mod_mdata[cname]["h5path"]
+            if not filepath or not h5path:
+                excluded_modules.append(mod)
+                prev_constants[cname][i, ...].fill(np.nan)
+            else:
+                with h5py.File(filepath, "r") as fd:
+                    prev_constants[cname][i, ...] = fd[f"{h5path}/data"][()]
+
+    if excluded_modules:
+        print(f"Previous {cname} constants for {excluded_modules} are not available.\n.")
+    # Exclude constants from comparison plots, if the corresponding
+    # previous constants are not available for all modules.
+    if len(excluded_modules) == nmods:
+        exculded_constants.append(cname)
+        print(f"No comparison plots for {cname}.\n")
+```
+
+%% Cell type:code id: tags:
+
+``` python
+display(Markdown('## Processed modules'))
+
+processed_modules = list(mod_mapping.keys())
+processed_pdus = list(mod_mapping.values())
+show_processed_modules_jungfrau(
+    jungfrau_geom=geom,
+    constants=curr_constants,
+    processed_modules=processed_modules,
+    expected_modules=expected_modules,
+    display_module_names=processed_pdus,
+    )
+```
+
+%% Cell type:code id: tags:
+
+``` python
+gainstages = 3
+gain_names = ["High Gain", "Medium Gain", "Low Gain" ]
+const_range = {
+    "Offset": [(0, 8000), (8000, 16000), (8000, 16000)],
+    "Noise": [(0., 50.), (0., 50.), (0., 50.)],
+    "BadPixelsDark": [(0., 5.), (0., 5.), (0., 5.)],
+}
+# vmin and vmax are different for Offset for fixed gain constants.
+if fixed_gain:
+    const_range["Offset"] = [(0, 8000), (0, 8000), (0, 8000)]
+
+diff_const_range = {
+    "Offset": [(0, 500), (0, 500), (0, 500)],
+    "Noise": [(0., 5.), (0., 5.), (0., 5.)],
+    "BadPixelsDark": [(0., 5.), (0., 5.), (0., 5.)],
+}
+percentage_range = (0, 100)
+perc_const_range = {c: [percentage_range]*3 for c in dark_constants}
+gs = gridspec.GridSpec(2, 4)
+
+axes = {
+    "ax0": {
+        "gs": gs[0, 1:3],
+        "shrink": 0.7,
+        "pad": 0.05,
+        "label": "ADCu",
+        "title": "{}",
+        "location": "right",
+        "range": const_range,
+        },
+    "ax1": {
+        "gs": gs[1, :2],
+        "shrink": 0.7,
+        "pad": 0.02,
+        "label": "ADCu",
+        "location": "left",
+        "title": "Difference with previous {}",
+        "range": diff_const_range,
+        },
+    "ax2": {
+        "gs": gs[1, 2:],
+        "shrink": 0.7,
+        "pad": 0.02,
+        "label": "%",
+        "location": "right",
+        "title": "Difference with previous {} %",
+        "range": perc_const_range,
+        },
+    }
+```
+
+%% Cell type:markdown id: tags:
+
+## Summary figures across pixels and memory cells.
+
+The following plots give an overview of calibration constants averaged across pixels and memory cells. A bad pixel mask is applied.
+
+%% Cell type:code id: tags:
+
+``` python
+for cname, const in curr_constants.items():
+
+    # Prepare the stacked mean of constant,
+    # the difference with the previous constant
+    # and the fraction of that difference.
+    mean_const = np.nanmean(const, axis=3)
+    mean_diff = np.abs(np.nanmean(const, axis=3) - np.nanmean(prev_constants[cname], axis=3))  # noqa
+    mean_frac = np.abs(mean_diff / mean_const) * 100
+
+    for gain in range(gainstages):
+        data_to_plot = {
+            f'ax0': mean_const[..., gain],
+            f'ax1': mean_diff[..., gain],
+            f'ax2': mean_frac[..., gain],
+            }
+
+        # Plotting constant overall modules.
+        display(Markdown(f'### {cname} - {gain_names[gain]}'))
+        if nmods > 1:
+            fig = plt.figure(figsize=(20, 20))
+        else:
+            fig = plt.figure(figsize=(20, 10))
+
+        for axname, axv in axes.items():
+
+            # Avoid difference plots if previous constants
+            # are missing for the detector.
+            if cname in exculded_constants and axname != "ax0":
+                break
+            ax = fig.add_subplot(axv["gs"])
+            vmin, vmax = axv["range"][cname][gain]
+            geom.plot_data(
+                data_to_plot[axname],
+                vmin=vmin, vmax=vmax, ax=ax,
+                colorbar={
+                    "shrink": axv["shrink"],
+                    "pad": axv["pad"],
+                    "location": axv["location"],
+                }
+            )
+
+            colorbar = ax.images[0].colorbar
+            colorbar.set_label(axv["label"], fontsize=15)
+            colorbar.ax.tick_params(labelsize=15)
+            ax.tick_params(labelsize=1)
+            ax.set_title(axv["title"].format(
+                f"{cname} {gain_names[gain]}"), fontsize=15)
+
+            if axname == "ax0":
+                ax.set_xlabel('Columns', fontsize=15)
+                ax.set_ylabel('Rows', fontsize=15)
+                ax.tick_params(labelsize=15)
+            else:
+                ax.tick_params(labelsize=0)
+                # Remove axes labels for comparison plots.
+                ax.set_xlabel('')
+                ax.set_ylabel('')
+        plt.show()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+if curr_constants["Offset"].shape[-2] > 1:
+    display(Markdown("## Summary across pixels per memory cells"))
+
+    # Plot mean and std of memcells for each module, gain, and constant
+    # across trains.
+    for const_name, const in curr_constants.items():
+        display(Markdown(f'### {const_name}'))
+
+        for gain in range(gainstages):
+            data = np.copy(const[..., gain])
+
+            if const_name == 'BadPixelsDark':
+                data[data > 0] = 1.0
+                datamean = np.nanmean(data, axis=(1, 2))
+                datamean[datamean == 1.0] = np.nan
+
+                fig = plt.figure(
+                    figsize=(15, 6),
+                    tight_layout={'pad': 0.2, 'w_pad': 1.3, 'h_pad': 1.3})
+                label = 'Fraction of bad pixels'
+                ax = fig.add_subplot(1, 1, 1)
+
+            else:
+                datamean = np.nanmean(data, axis=(1, 2))
+                fig = plt.figure(
+                    figsize=(15, 6),
+                    tight_layout={'pad': 0.2, 'w_pad': 1.3, 'h_pad': 1.3})
+                label = f'{const_name} value [ADU], good pixels only'
+                ax = fig.add_subplot(1, 2, 1)
+
+            d = []
+            for i, mod in enumerate(datamean):
+                d.append({
+                    'x': np.arange(mod.shape[0]),
+                    'y': mod,
+                    'drawstyle': 'steps-pre',
+                    'label': processed_modules[i],
+                    })
+
+            simplePlot(
+                d, figsize=(10, 10), xrange=(-12, 510),
+                x_label='Memory Cell ID',
+                y_label=label,
+                use_axis=ax,
+                title=f'{gain_names[gain]}',
+                title_position=[0.5, 1.18],
+                legend='outside-top-ncol6-frame',
+                legend_size='18%',
+                legend_pad=0.00,
+                )
+
+            # Extra Sigma plot for Offset and Noise constants.
+            if const_name != 'BadPixelsDark':
+                ax = fig.add_subplot(1, 2, 2)
+                label = f'$\sigma$ {const_name} [ADU], good pixels only'
+                d = []
+                for i, mod in enumerate(np.nanstd(data, axis=(1, 2))):
+                    d.append({
+                        'x': np.arange(mod.shape[0]),
+                        'y': mod,
+                        'drawstyle': 'steps-pre',
+                        'label': processed_modules[i],
+                        })
+
+                simplePlot(
+                    d, figsize=(10, 10), xrange=(-12, 510),
+                    x_label='Memory Cell ID',
+                    y_label=label,
+                    use_axis=ax,
+                    title=f'{gain_names[gain]} $\sigma$',
+                    title_position=[0.5, 1.18],
+                    legend='outside-top-ncol6-frame',
+                    legend_size='18%',
+                    legend_pad=0.00,
+                    )
+            plt.show()
+```
+%% Cell type:markdown id: tags:
+
+# Jungfrau Dark Summary
+
+Author: European XFEL Detector Department, Version: 1.0
+
+Summary for process dark constants and a comparison with previously injected constants with the same conditions.
+
+%% Cell type:code id: tags:
+
+``` python
+out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/jungfrau_assembeled_dark"  # path to output to, required
+metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate.
+
+# Parameters used to access raw data.
+karabo_da = []  # list of data aggregators, which corresponds to different JF modules. This is only needed for the detectors of one module.
+karabo_id = "FXE_XAD_JF1M"  # detector identifier.
+
+# Parameters to be used for injecting dark calibration constants.
+local_output = True  # Boolean indicating that local constants were stored in the out_folder
+
+# Skip the whole notebook if local_output is false in the preceding notebooks.
+if not local_output:
+    print('No local constants saved. Skipping summary plots')
+    import sys
+    sys.exit(0)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+import warnings
+from collections import OrderedDict
+from pathlib import Path
+
+warnings.filterwarnings('ignore')
+
+import h5py
+import matplotlib
+import matplotlib.gridspec as gridspec
+import matplotlib.pyplot as plt
+import numpy as np
+import yaml
+from IPython.display import Markdown, display
+
+matplotlib.use("agg")
+%matplotlib inline
+
+from cal_tools.enums import BadPixels, JungfrauSettings
+from cal_tools.ana_tools import get_range
+from cal_tools.plotting import init_jungfrau_geom, show_processed_modules_jungfrau
+from cal_tools.tools import CalibrationMetadata
+from XFELDetAna.plotting.simpleplot import simplePlot
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Prepare paths and load previous constants' metadata.
+out_folder = Path(out_folder)
+metadata = CalibrationMetadata(metadata_folder or out_folder)
+mod_mapping = metadata.setdefault("modules-mapping", {})
+dark_constants = ["Offset", "Noise", "BadPixelsDark"]
+
+prev_const_metadata = {}
+for fn in Path(metadata_folder or out_folder).glob("module_metadata_*.yml"):
+    with fn.open("r") as fd:
+        fdict = yaml.safe_load(fd)
+    module = fdict["module"]
+    mod_mapping[module] = fdict["pdu"]
+    prev_const_metadata[module] = fdict["old-constants"]
+
+metadata.save()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+expected_modules, geom = init_jungfrau_geom(
+    karabo_id=karabo_id, karabo_da=karabo_da)
+nmods = len(expected_modules)
+```
+
+%% Cell type:markdown id: tags:
+
+Preparing newly injected and previous constants from produced local folder in out_folder.
+
+%% Cell type:code id: tags:
+
+``` python
+fixed_gain = False  # constant is adaptive by default.
+# Get the constant shape from one of the local constants.
+# This is one way to realize the number of memory cells.
+with h5py.File(list(out_folder.glob("const_Offset_*"))[0], 'r') as f:
+    const_shape = f["data"][()].shape
+    # Get fixed gain value to decide offset vmin, vmax
+    # for later constant map plots.
+    gain_mode = "condition/Gain mode/value"
+    if gain_mode in f:
+        fixed_gain = f[gain_mode][()]
+
+
+initial_stacked_constants = np.full(((nmods,)+const_shape), np.nan)
+curr_constants = { c: initial_stacked_constants.copy() for c in dark_constants}
+prev_constants = { c: initial_stacked_constants.copy() for c in dark_constants}
+
+exculded_constants = []  # constants excluded from comparison plots.
+
+# Loop over modules
+for cname in dark_constants:
+    excluded_modules = []  # modules with no previous constants.
+    for i, mod in enumerate(sorted(expected_modules)):
+        # Loop over expected dark constants in out_folder.
+        # Some constants can be missing in out_folder.
+        pdu = mod_mapping[mod]
+
+        # first load new constant
+        fpath = out_folder / f"const_{cname}_{pdu}.h5"
+        with h5py.File(fpath, 'r') as f:
+            curr_constants[cname][i, ...] = f["data"][()]
+
+        # Load previous constants.
+        old_mod_mdata = prev_const_metadata[mod]
+
+        if cname in old_mod_mdata:  # a module can be missing from detector dark processing.
+            filepath = old_mod_mdata[cname]["filepath"]
+            h5path = old_mod_mdata[cname]["h5path"]
+            if not filepath or not h5path:
+                excluded_modules.append(mod)
+                prev_constants[cname][i, ...].fill(np.nan)
+            else:
+                with h5py.File(filepath, "r") as fd:
+                    prev_constants[cname][i, ...] = fd[f"{h5path}/data"][()]
+
+    if excluded_modules:
+        print(f"Previous {cname} constants for {excluded_modules} are not available.\n.")
+    # Exclude constants from comparison plots, if the corresponding
+    # previous constants are not available for all modules.
+    if len(excluded_modules) == nmods:
+        exculded_constants.append(cname)
+        print(f"No comparison plots for {cname}.\n")
+```
+
+%% Cell type:code id: tags:
+
+``` python
+display(Markdown('## Processed modules'))
+
+processed_modules = list(mod_mapping.keys())
+processed_pdus = list(mod_mapping.values())
+show_processed_modules_jungfrau(
+    jungfrau_geom=geom,
+    constants=curr_constants,
+    processed_modules=processed_modules,
+    expected_modules=expected_modules,
+    display_module_names=processed_pdus,
+    )
+```
+
+%% Cell type:code id: tags:
+
+``` python
+gainstages = 3
+gain_names = ["High Gain", "Medium Gain", "Low Gain" ]
+const_range = {
+    "Offset": [(0, 8000), (8000, 16000), (8000, 16000)],
+    "Noise": [(0., 50.), (0., 50.), (0., 50.)],
+    "BadPixelsDark": [(0., 5.), (0., 5.), (0., 5.)],
+}
+# vmin and vmax are different for Offset for fixed gain constants.
+if fixed_gain:
+    const_range["Offset"] = [(0, 8000), (0, 8000), (0, 8000)]
+
+diff_const_range = {
+    "Offset": [(0, 500), (0, 500), (0, 500)],
+    "Noise": [(0., 5.), (0., 5.), (0., 5.)],
+    "BadPixelsDark": [(0., 5.), (0., 5.), (0., 5.)],
+}
+percentage_range = (0, 100)
+perc_const_range = {c: [percentage_range]*3 for c in dark_constants}
+gs = gridspec.GridSpec(2, 4)
+
+axes = {
+    "ax0": {
+        "gs": gs[0, 1:3],
+        "shrink": 0.7,
+        "pad": 0.05,
+        "label": "ADCu",
+        "title": "{}",
+        "location": "right",
+        "range": const_range,
+        },
+    "ax1": {
+        "gs": gs[1, :2],
+        "shrink": 0.7,
+        "pad": 0.02,
+        "label": "ADCu",
+        "location": "left",
+        "title": "Difference with previous {}",
+        "range": diff_const_range,
+        },
+    "ax2": {
+        "gs": gs[1, 2:],
+        "shrink": 0.7,
+        "pad": 0.02,
+        "label": "%",
+        "location": "right",
+        "title": "Difference with previous {} %",
+        "range": perc_const_range,
+        },
+    }
+```
+
+%% Cell type:markdown id: tags:
+
+## Summary figures across pixels and memory cells.
+
+The following plots give an overview of calibration constants averaged across pixels and memory cells. A bad pixel mask is applied.
+
+%% Cell type:code id: tags:
+
+``` python
+for cname, const in curr_constants.items():
+
+    # Prepare the stacked mean of constant,
+    # the difference with the previous constant
+    # and the fraction of that difference.
+    mean_const = np.nanmean(const, axis=3)
+    mean_diff = np.abs(np.nanmean(const, axis=3) - np.nanmean(prev_constants[cname], axis=3))  # noqa
+    mean_frac = np.abs(mean_diff / mean_const) * 100
+
+    for gain in range(gainstages):
+        data_to_plot = {
+            f'ax0': mean_const[..., gain],
+            f'ax1': mean_diff[..., gain],
+            f'ax2': mean_frac[..., gain],
+            }
+
+        # Plotting constant overall modules.
+        display(Markdown(f'### {cname} - {gain_names[gain]}'))
+        if nmods > 1:
+            fig = plt.figure(figsize=(20, 20))
+        else:
+            fig = plt.figure(figsize=(20, 10))
+
+        for axname, axv in axes.items():
+
+            # Avoid difference plots if previous constants
+            # are missing for the detector.
+            if cname in exculded_constants and axname != "ax0":
+                break
+            ax = fig.add_subplot(axv["gs"])
+            vmin, vmax = axv["range"][cname][gain]
+            geom.plot_data(
+                data_to_plot[axname],
+                vmin=vmin, vmax=vmax, ax=ax,
+                colorbar={
+                    "shrink": axv["shrink"],
+                    "pad": axv["pad"],
+                    "location": axv["location"],
+                }
+            )
+
+            colorbar = ax.images[0].colorbar
+            colorbar.set_label(axv["label"], fontsize=15)
+            colorbar.ax.tick_params(labelsize=15)
+            ax.tick_params(labelsize=1)
+            ax.set_title(axv["title"].format(
+                f"{cname} {gain_names[gain]}"), fontsize=15)
+
+            if axname == "ax0":
+                ax.set_xlabel('Columns', fontsize=15)
+                ax.set_ylabel('Rows', fontsize=15)
+                ax.tick_params(labelsize=15)
+            else:
+                ax.tick_params(labelsize=0)
+                # Remove axes labels for comparison plots.
+                ax.set_xlabel('')
+                ax.set_ylabel('')
+        plt.show()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+if curr_constants["Offset"].shape[-2] > 1:
+    display(Markdown("## Summary across pixels per memory cells"))
+
+    # Plot mean and std of memcells for each module, gain, and constant
+    # across trains.
+    for const_name, const in curr_constants.items():
+        display(Markdown(f'### {const_name}'))
+
+        for gain in range(gainstages):
+            data = np.copy(const[..., gain])
+
+            if const_name == 'BadPixelsDark':
+                data[data > 0] = 1.0
+                datamean = np.nanmean(data, axis=(1, 2))
+                datamean[datamean == 1.0] = np.nan
+
+                fig = plt.figure(
+                    figsize=(15, 6),
+                    tight_layout={'pad': 0.2, 'w_pad': 1.3, 'h_pad': 1.3})
+                label = 'Fraction of bad pixels'
+                ax = fig.add_subplot(1, 1, 1)
+
+            else:
+                datamean = np.nanmean(data, axis=(1, 2))
+                fig = plt.figure(
+                    figsize=(15, 6),
+                    tight_layout={'pad': 0.2, 'w_pad': 1.3, 'h_pad': 1.3})
+                label = f'{const_name} value [ADU], good pixels only'
+                ax = fig.add_subplot(1, 2, 1)
+
+            d = []
+            for i, mod in enumerate(datamean):
+                d.append({
+                    'x': np.arange(mod.shape[0]),
+                    'y': mod,
+                    'drawstyle': 'steps-pre',
+                    'label': processed_modules[i],
+                    })
+
+            simplePlot(
+                d, figsize=(10, 10), xrange=(-12, 510),
+                x_label='Memory Cell ID',
+                y_label=label,
+                use_axis=ax,
+                title=f'{gain_names[gain]}',
+                title_position=[0.5, 1.18],
+                legend='outside-top-ncol6-frame',
+                legend_size='18%',
+                legend_pad=0.00,
+                )
+
+            # Extra Sigma plot for Offset and Noise constants.
+            if const_name != 'BadPixelsDark':
+                ax = fig.add_subplot(1, 2, 2)
+                label = f'$\sigma$ {const_name} [ADU], good pixels only'
+                d = []
+                for i, mod in enumerate(np.nanstd(data, axis=(1, 2))):
+                    d.append({
+                        'x': np.arange(mod.shape[0]),
+                        'y': mod,
+                        'drawstyle': 'steps-pre',
+                        'label': processed_modules[i],
+                        })
+
+                simplePlot(
+                    d, figsize=(10, 10), xrange=(-12, 510),
+                    x_label='Memory Cell ID',
+                    y_label=label,
+                    use_axis=ax,
+                    title=f'{gain_names[gain]} $\sigma$',
+                    title_position=[0.5, 1.18],
+                    legend='outside-top-ncol6-frame',
+                    legend_size='18%',
+                    legend_pad=0.00,
+                    )
+            plt.show()
+```
--- a/notebooks/Jungfrau/Jungfrau_retrieve_constants_precorrection_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_retrieve_constants_precorrection_NBC.ipynb
 %% Cell type:markdown id: tags:

 # JUNGFRAU Retrieving Constants Pre-correction #

 Author: European XFEL Detector Group, Version: 1.0

 Retrieving Required Constants for Offline Calibration of the JUNGFRAU Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202130/p900204/raw"  # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/remove"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run = 112  # run to process, required
 sequences = [-1]  # sequences to correct, set to [-1] for all, range allowed
 sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel

 # Parameters used to access raw data.
 karabo_id = "SPB_IRDA_JF4M"  # karabo prefix of Jungfrau devices
 karabo_da = ['JNGFR01', 'JNGFR02', 'JNGFR03', 'JNGFR04', 'JNGFR05', 'JNGFR06', 'JNGFR07', 'JNGFR08']  # data aggregators
 receiver_template = "JNGFR{:02d}"  # Detector receiver template for accessing raw data files. e.g. "JNGFR{:02d}"
 instrument_source_template = '{}/DET/{}:daqOutput'  # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = '{}/DET/CONTROL'  # template for control source name (filled with karabo_id_control)
 karabo_id_control = ""  # if control is on a different ID, set to empty string if it is the same a karabo-id

 # Parameters for calibration database.
 use_dir_creation_date = True  # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8017#8025" # the database interface to use
 cal_db_timeout = 180000  # timeout on cal db requests

 # Parameters affecting corrected data.
 relative_gain = True  # do relative gain correction

 # Parameters for retrieving calibration constants
 manual_slow_data = False  # if true, use manually entered bias_voltage, integration_time, gain_setting, and gain_mode values
 integration_time = 4.96  # integration time in us, will be overwritten by value in file
 gain_setting = 0  # 0 for dynamic gain, 1 for dynamic HG0, will be overwritten by value in file
 gain_mode = 0  # 0 for runs with dynamic gain setting, 1 for fixgain. It will be overwritten by value in file, if manual_slow_data is set to True.
 mem_cells = -1  # Set mem_cells to -1 to automatically use the value stored in RAW data.#
 bias_voltage = 180  # will be overwritten by value in file
 ```

 %% Cell type:code id: tags:

 ``` python
 import datetime
 from functools import partial

 import multiprocessing
 from extra_data import RunDirectory
 from pathlib import Path

 from cal_tools.jungfraulib import JungfrauCtrl
 from cal_tools.tools import (
    get_dir_creation_date,
    get_from_db,
    CalibrationMetadata,
 )
 from iCalibrationDB import Conditions, Constants
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 metadata = CalibrationMetadata(metadata_folder or out_folder)
 run_folder = in_folder / f'r{run:04d}'
 run_dc = RunDirectory(run_folder)

 out_folder.mkdir(parents=True, exist_ok=True)

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time} as creation time")

 if karabo_id_control == "":
    karabo_id_control = karabo_id
 ```

 %% Cell type:code id: tags:

 ``` python
 ctrl_src = ctrl_source_template.format(karabo_id_control)
 ctrl_data = JungfrauCtrl(run_dc, ctrl_src)

 if mem_cells < 0:
    memory_cells, sc_start = ctrl_data.get_memory_cells()

    mem_cells_name = "single cell" if memory_cells == 1 else "burst"
    print(f"Run is in {mem_cells_name} mode.\nStorage cell start: {sc_start:02d}")
 else:
    memory_cells = mem_cells
    mem_cells_name = "single cell" if memory_cells == 1 else "burst"
    print(f"Run is in manually set to {mem_cells_name} mode. With {memory_cells} memory cells")

 if not manual_slow_data:
    integration_time = ctrl_data.get_integration_time()
    bias_voltage = ctrl_data.get_bias_voltage()
    gain_setting = ctrl_data.get_gain_setting()
    gain_mode = ctrl_data.get_gain_mode()

 print(f"Integration time is {integration_time} us")
-print(f"Gain setting is {gain_setting} (run settings: "
-      f"{ctrl_data.run_settings.value if ctrl_data.run_settings else ctrl_data.run_settings})")  # noqa
-print(f"Gain mode is {gain_mode}")
+print(f"Gain setting is {gain_setting} (run settings: {ctrl_data.run_settings}")
+print(f"Gain mode is {gain_mode} ({ctrl_data.run_mode})")
 print(f"Bias voltage is {bias_voltage} V")
 print(f"Number of memory cells are {memory_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 condition = Conditions.Dark.jungfrau(
    memory_cells=memory_cells,
    bias_voltage=bias_voltage,
    integration_time=integration_time,
    gain_setting=gain_setting,
    gain_mode=gain_mode,
 )

 def get_constants_for_module(mod: str):
    """Get calibration constants for given module for Jungfrau."""
    retrieval_function = partial(
        get_from_db,
        karabo_id=karabo_id,
        karabo_da=mod,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time,
        timeout=cal_db_timeout,
        verbosity=0,
        meta_only=True,
        load_data=False,
        empty_constant=None
    )

    mdata_dict = dict()
    mdata_dict["constants"] = dict()
    constants = [
        "Offset", "BadPixelsDark",
        "RelativeGain", "BadPixelsFF",
    ]
    for cname in constants:
        mdata_dict["constants"][cname] = dict()
        if not relative_gain and cname in ["BadPixelsFF", "RelativeGain"]:
            continue
        _, mdata = retrieval_function(
            condition=condition,
            constant=getattr(Constants.jungfrau, cname)(),
        )
        mdata_const = mdata.calibration_constant_version
        const_mdata = mdata_dict["constants"][cname]
        # check if constant was successfully retrieved.
        if mdata.comm_db_success:
            const_mdata["file-path"] = (
                f"{mdata_const.hdf5path}" f"{mdata_const.filename}"
            )
            const_mdata["dataset-name"] = mdata_const.h5path
            const_mdata["creation-time"] = f"{mdata_const.begin_at}"
            mdata_dict["physical-detector-unit"] = mdata_const.device_name
        else:
            const_mdata["file-path"] = None
            const_mdata["creation-time"] = None
    return mdata_dict, mod
 ```

 %% Cell type:code id: tags:

 ``` python
 # Constant paths are saved under retrieved-constants in calibration_metadata.yml
 retrieved_constants = metadata.setdefault("retrieved-constants", {})
 # Avoid retrieving constants for available modules in calibration_metadata.yml
 # This is used during reproducability.
 query_karabo_da = []
 for mod in karabo_da:
    if mod in retrieved_constants.keys():
        print(f"Constant for {mod} already in "
              "calibration_metadata.yml, won't query again.")
        continue
    query_karabo_da.append(mod)

 with multiprocessing.Pool() as pool:
    results = pool.map(get_constants_for_module, query_karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 timestamps = dict()
 for md_dict, mod in results:
    retrieved_constants[mod] = md_dict

    module_timestamps = timestamps[mod] = dict()
    module_constants = retrieved_constants[mod]

    print(f"Module: {mod}:")
    for cname, mdata in module_constants["constants"].items():
        if hasattr(mdata["creation-time"], 'strftime'):
            mdata["creation-time"] = mdata["creation-time"].strftime('%y-%m-%d %H:%M')
        print(f'{cname:.<12s}', mdata["creation-time"])

    for cname in ["Offset", "BadPixelsDark", "RelativeGain", "BadPixelsFF"]:
        if cname in module_constants["constants"]:
            module_timestamps[cname] = module_constants["constants"][cname]["creation-time"]
        else:
            module_timestamps[cname] = "NA"

 time_summary = retrieved_constants.setdefault("time-summary", {})
 time_summary = timestamps

 metadata.save()
 ```

 %% Cell type:markdown id: tags:

 # JUNGFRAU Retrieving Constants Pre-correction #

 Author: European XFEL Detector Group, Version: 1.0

 Retrieving Required Constants for Offline Calibration of the JUNGFRAU Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202130/p900204/raw"  # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/remove"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run = 112  # run to process, required
 sequences = [-1]  # sequences to correct, set to [-1] for all, range allowed
 sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel

 # Parameters used to access raw data.
 karabo_id = "SPB_IRDA_JF4M"  # karabo prefix of Jungfrau devices
 karabo_da = ['JNGFR01', 'JNGFR02', 'JNGFR03', 'JNGFR04', 'JNGFR05', 'JNGFR06', 'JNGFR07', 'JNGFR08']  # data aggregators
 receiver_template = "JNGFR{:02d}"  # Detector receiver template for accessing raw data files. e.g. "JNGFR{:02d}"
 instrument_source_template = '{}/DET/{}:daqOutput'  # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = '{}/DET/CONTROL'  # template for control source name (filled with karabo_id_control)
 karabo_id_control = ""  # if control is on a different ID, set to empty string if it is the same a karabo-id

 # Parameters for calibration database.
 use_dir_creation_date = True  # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8017#8025" # the database interface to use
 cal_db_timeout = 180000  # timeout on cal db requests

 # Parameters affecting corrected data.
 relative_gain = True  # do relative gain correction

 # Parameters for retrieving calibration constants
 manual_slow_data = False  # if true, use manually entered bias_voltage, integration_time, gain_setting, and gain_mode values
 integration_time = 4.96  # integration time in us, will be overwritten by value in file
 gain_setting = 0  # 0 for dynamic gain, 1 for dynamic HG0, will be overwritten by value in file
 gain_mode = 0  # 0 for runs with dynamic gain setting, 1 for fixgain. It will be overwritten by value in file, if manual_slow_data is set to True.
 mem_cells = -1  # Set mem_cells to -1 to automatically use the value stored in RAW data.#
 bias_voltage = 180  # will be overwritten by value in file
 ```

 %% Cell type:code id: tags:

 ``` python
 import datetime
 from functools import partial

 import multiprocessing
 from extra_data import RunDirectory
 from pathlib import Path

 from cal_tools.jungfraulib import JungfrauCtrl
 from cal_tools.tools import (
    get_dir_creation_date,
    get_from_db,
    CalibrationMetadata,
 )
 from iCalibrationDB import Conditions, Constants
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 metadata = CalibrationMetadata(metadata_folder or out_folder)
 run_folder = in_folder / f'r{run:04d}'
 run_dc = RunDirectory(run_folder)

 out_folder.mkdir(parents=True, exist_ok=True)

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time} as creation time")

 if karabo_id_control == "":
    karabo_id_control = karabo_id
 ```

 %% Cell type:code id: tags:

 ``` python
 ctrl_src = ctrl_source_template.format(karabo_id_control)
 ctrl_data = JungfrauCtrl(run_dc, ctrl_src)

 if mem_cells < 0:
    memory_cells, sc_start = ctrl_data.get_memory_cells()

    mem_cells_name = "single cell" if memory_cells == 1 else "burst"
    print(f"Run is in {mem_cells_name} mode.\nStorage cell start: {sc_start:02d}")
 else:
    memory_cells = mem_cells
    mem_cells_name = "single cell" if memory_cells == 1 else "burst"
    print(f"Run is in manually set to {mem_cells_name} mode. With {memory_cells} memory cells")

 if not manual_slow_data:
    integration_time = ctrl_data.get_integration_time()
    bias_voltage = ctrl_data.get_bias_voltage()
    gain_setting = ctrl_data.get_gain_setting()
    gain_mode = ctrl_data.get_gain_mode()

 print(f"Integration time is {integration_time} us")
-print(f"Gain setting is {gain_setting} (run settings: "
-      f"{ctrl_data.run_settings.value if ctrl_data.run_settings else ctrl_data.run_settings})")  # noqa
-print(f"Gain mode is {gain_mode}")
+print(f"Gain setting is {gain_setting} (run settings: {ctrl_data.run_settings}")
+print(f"Gain mode is {gain_mode} ({ctrl_data.run_mode})")
 print(f"Bias voltage is {bias_voltage} V")
 print(f"Number of memory cells are {memory_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 condition = Conditions.Dark.jungfrau(
    memory_cells=memory_cells,
    bias_voltage=bias_voltage,
    integration_time=integration_time,
    gain_setting=gain_setting,
    gain_mode=gain_mode,
 )

 def get_constants_for_module(mod: str):
    """Get calibration constants for given module for Jungfrau."""
    retrieval_function = partial(
        get_from_db,
        karabo_id=karabo_id,
        karabo_da=mod,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time,
        timeout=cal_db_timeout,
        verbosity=0,
        meta_only=True,
        load_data=False,
        empty_constant=None
    )

    mdata_dict = dict()
    mdata_dict["constants"] = dict()
    constants = [
        "Offset", "BadPixelsDark",
        "RelativeGain", "BadPixelsFF",
    ]
    for cname in constants:
        mdata_dict["constants"][cname] = dict()
        if not relative_gain and cname in ["BadPixelsFF", "RelativeGain"]:
            continue
        _, mdata = retrieval_function(
            condition=condition,
            constant=getattr(Constants.jungfrau, cname)(),
        )
        mdata_const = mdata.calibration_constant_version
        const_mdata = mdata_dict["constants"][cname]
        # check if constant was successfully retrieved.
        if mdata.comm_db_success:
            const_mdata["file-path"] = (
                f"{mdata_const.hdf5path}" f"{mdata_const.filename}"
            )
            const_mdata["dataset-name"] = mdata_const.h5path
            const_mdata["creation-time"] = f"{mdata_const.begin_at}"
            mdata_dict["physical-detector-unit"] = mdata_const.device_name
        else:
            const_mdata["file-path"] = None
            const_mdata["creation-time"] = None
    return mdata_dict, mod
 ```

 %% Cell type:code id: tags:

 ``` python
 # Constant paths are saved under retrieved-constants in calibration_metadata.yml
 retrieved_constants = metadata.setdefault("retrieved-constants", {})
 # Avoid retrieving constants for available modules in calibration_metadata.yml
 # This is used during reproducability.
 query_karabo_da = []
 for mod in karabo_da:
    if mod in retrieved_constants.keys():
        print(f"Constant for {mod} already in "
              "calibration_metadata.yml, won't query again.")
        continue
    query_karabo_da.append(mod)

 with multiprocessing.Pool() as pool:
    results = pool.map(get_constants_for_module, query_karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 timestamps = dict()
 for md_dict, mod in results:
    retrieved_constants[mod] = md_dict

    module_timestamps = timestamps[mod] = dict()
    module_constants = retrieved_constants[mod]

    print(f"Module: {mod}:")
    for cname, mdata in module_constants["constants"].items():
        if hasattr(mdata["creation-time"], 'strftime'):
            mdata["creation-time"] = mdata["creation-time"].strftime('%y-%m-%d %H:%M')
        print(f'{cname:.<12s}', mdata["creation-time"])

    for cname in ["Offset", "BadPixelsDark", "RelativeGain", "BadPixelsFF"]:
        if cname in module_constants["constants"]:
            module_timestamps[cname] = module_constants["constants"][cname]["creation-time"]
        else:
            module_timestamps[cname] = "NA"

 time_summary = retrieved_constants.setdefault("time-summary", {})
 time_summary = timestamps

 metadata.save()
 ```

--- a/notebooks/REMI/REMI_Digitize_and_Transform.ipynb
+++ b/notebooks/REMI/REMI_Digitize_and_Transform.ipynb
 %% Cell type:code id: tags:

 ``` python
 # Data selection parameters.
 run = 104  # Run ID.
 in_folder = '/gpfs/exfel/exp/SQS/202101/p002535/raw'  # Partial input path appended with run ID.
 out_folder = '/gpfs/exfel/exp/SQS/202101/p002535/scratch/cal_test'  # Full path to output folder.

 calib_config_path = '/gpfs/exfel/exp/SQS/202101/p002535/usr/config_board2+4.yaml'  # Path to correction and transform configuration

 # These parameters are required by xfel-calibrate but ignored in this notebook.
 cycle = ''  # Proposal cycle, currently not used.
 cal_db_timeout = 0  # Calibration DB timeout, currently not used.
 cal_db_interface = 'foo'  # Calibration DB interface, currently not used.
 karabo_da = 'bar'  # Karabo data aggregator name, currently not used

 # Output parameters.
 karabo_id = 'SQS_REMI_DLD6'  # Karabo device ID root for virtual output device.
 proposal = ''  # Proposal, leave empty for auto detection based on in_folder
-out_filename = 'CORR-R{run:04d}-REMI01-S{sequence:05d}.h5'  # Pattern for output filenames.
+out_aggregator = 'REMI01'  # Aggregator name for output files.
 out_seq_len = 5000  # Number of trains per sequence file in output.
 det_device_id = '{karabo_id}/DET/{det_name}'  # Karabo device ID for virtual output device.
 det_output_key = 'output'  # Pipeline name for fast data output.
 save_raw_triggers = True  # Whether to save trigger position in files.
 save_raw_edges = True  # Whether to save digitized edge positions in files.
 save_rec_signals = True  # Whether to save reconstructed signals (u1-w2, mcp) in files.
 save_rec_hits = True  # Whether to save reoncstructed hits (x,y,t,m) in files.
 chunks_triggers = [500]  # HDF chunk size for triggers.
 chunks_edges = [500, 7, 50]  # HDF chunk size for edges.
 chunks_hits = [50, 50]  # HDF chunk size for hits.
 chunks_signals = [50, 50]  # HDF chunk size for signals.
 dataset_compression = 'gzip'  # HDF compression method.
 dataset_compression_opts = 3  # HDF GZIP compression level.

+# Detector parameters.
+quad_anode = False  # Reconstruction assumes a hex anode by default, change for quad anodes.
+
+# Trigger parameters.
+ppt_source = 'SQS_RR_UTC/TSYS/TIMESERVER:outputBunchPattern'
+ignore_ppl = False  # Ignore any PPL entries in the PPT.
+ppl_offset = 0  # In units of the PPT.
+laser_ppt_mask = -1  # Bit mask for used laser, negative to auto-detect from instrument.
+instrument_sase = 3
+first_pulse_offset = 1000
+single_pulse_length = 25000
+
 # Parallelization parameters.
-mp_pulse_info = 8  # Parallelization for pulse statistics.
 mp_find_triggers = 0.5  # Parallelization for finding triggers.
 mp_find_edges = 0.5  # Parallelization for digitizing analog signal.
 mt_avg_trace = 2  # Parallelization for trace averaging.
 mp_rec_hits = 1.0  # Parallelization for hit reconstruction.
 ```

 %% Cell type:code id: tags:

 ``` python
 import numpy as np
 import matplotlib.pyplot as plt
 from matplotlib.colors import LogNorm
 from threadpoolctl import threadpool_limits

 import re
 import h5py
 from pathlib import Path
 from datetime import datetime

 import pasha as psh
+from euxfel_bunch_pattern import indices_at_sase, indices_at_laser
 from extra_data import RunDirectory
 from extra_remi import Analysis, trigger_dt
 from extra_remi.util import timing
-from extra_remi.plots import plot_detector_diagnostics
 from extra_remi.rd_resort import signal_dt, hit_dt
+from extra_remi.files import DataFile, sequence_pulses
+
+if quad_anode:
+    from extra_remi.plots import plot_detector_diagnostics_quad as plot_detector_diagnostics
+else:
+    from extra_remi.plots import plot_detector_diagnostics_hex as plot_detector_diagnostics

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 calib_config_path = Path(calib_config_path)

 if not calib_config_path.is_file():
    # If the path cannot be resolved right now, try the same path relative to in_folder.
    calib_config_path = Path(in_folder) / calib_config_path

    if not calib_config_path.is_file():
        # Disallow implicit config file creation.
        raise ValueError('calib_config_path not found - neither absolute nor relative to in_folder')

-remi = Analysis(calib_config_path)
+remi = Analysis(calib_config_path, use_hex=not quad_anode)

 with timing('open_run'):
-    dc = remi.prepare_dc(RunDirectory(Path(in_folder) / f'r{run:04d}', inc_suspect_trains=True, _use_voview=False))
+    dc = remi.prepare_dc(RunDirectory(Path(in_folder) / f'r{run:04d}', inc_suspect_trains=True))
 ```

 %% Cell type:markdown id: tags:

 # Transformation parameters

 %% Cell type:code id: tags:

 ``` python
 def print_leaf(leaf, indent=0):
    for key, value in leaf.items():
        if isinstance(value, dict):
            print(indent * 4 * ' ' + key)
            print_leaf(value, indent=indent+1)
        else:
            print(indent * 4 * ' ' + f'{key}: {value}')

 print_leaf(remi.tree)
 ```

 %% Cell type:markdown id: tags:

 # Pulse and trigger information

 %% Cell type:markdown id: tags:

 ### Count pulses per train and compute offsets

 %% Cell type:code id: tags:

 ``` python
 # Based on the pulse pattern tables, three global variables are obtained:
 #
 # * `pulse_counts [int32: len(dc.train_ids)]` containing the number of pulses per train.
 # * `pulse_offsets [int32: len(dc.train_ids)]` containing the global offset for the first pulse of each train.
 # * `num_pulses = pulse_counts.sum(axis=0)`

+ppt_data = dc[ppt_source, 'data.bunchPatternTable']
+
+def get_pulse_positions(ppt, sase, laser, ppl_offset):
+    # Combine FEL and PPL positions.
+
+    fel_pos = indices_at_sase(ppt, sase)
+    ppl_pos = indices_at_laser(ppt, laser) if not ignore_ppl else np.array([])
+
+    if len(fel_pos) > 0:
+        # Move PPL up to the FEL position.
+        ppl_pos += fel_pos[0] + ppl_offset
+
+    return np.union1d(fel_pos, ppl_pos), fel_pos, ppl_pos
+
+if laser_ppt_mask < 0:
+    # If laser PPT mask is not specified, try to figure it out from device IDs.
+    from euxfel_bunch_pattern import PPL_BITS
+
+    instrument = karabo_id[:karabo_id.index('_')]
+
+    try:
+        laser_ppt_mask = PPL_BITS[f'LP_{instrument}']
+    except KeyError:
+        raise ValueError(f'Laser PPT mask unknown for instrument `{instrument}`')
+
 with timing('pulse_info'):
-    pulse_counts, pulse_offsets, num_pulses = remi.get_pulse_info(dc, mp_pulse_info)
+    psh.set_default_context('processes', num_workers=remi.get_num_workers(mp_find_triggers))
+
+    # Build the pulse index
+    pulse_counts = psh.alloc(shape=len(dc.train_ids), dtype=np.uint64)
+    has_ppt = psh.alloc(shape=len(dc.train_ids), dtype=bool, fill=False)
+
+    def count_pulses(wid, index, tid, ppt):
+        pulse_counts[index] = len(get_pulse_positions(ppt, instrument_sase, laser_ppt_mask, ppl_offset)[0])
+        has_ppt[index] = True
+
+    psh.map(count_pulses, ppt_data)
+
+    # Fill any missing values with the highest.
+    pulse_counts[has_ppt == False] = pulse_counts.max()
+
+    # Compute offsets based on pulse counts.
+    pulse_offsets = np.zeros_like(pulse_counts)
+    np.cumsum(pulse_counts[:-1], out=pulse_offsets[1:])
+
+    # Total number of pulses.
+    num_pulses = int(pulse_counts.sum())
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(num=1, ncols=1, nrows=1, figsize=(9, 4), clear=True)

 ax.set_title('Pulse count')
 ax.plot(dc.train_ids, pulse_counts, lw=1)
 ax.set_xlabel('Train ID')
 ax.set_ylabel('Number of pulses')
 ax.set_ylim(0, max(300, pulse_counts.max() + 10))
 ax.ticklabel_format(style='plain')
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Find triggers

 The trigger defines the boundary of a pulse on the digitizer trace, which is stored per train.

 %% Cell type:code id: tags:

 ``` python
 # A trigger defines the boundary of a pulse on the digitizer trace stored per train. This cell creates a
 # global variable:
-# * `triggers [(start: int32, stop: int32, offset: float64): num_pulses]` containing the triggers for each
-#   pulse.
-#
-# Triggers may be obtained through two different methods:
+# * `triggers [(start: int32, stop: int32, offset: float64, fel: bool, ppl: bool): num_pulses]`
+#   containing the triggers for each pulse.
 #
-# * `ppt` uses the pulse puttern table to locate the pulse positions on the trace. Only number of pulses and
-#   their distance can be drawn this way, leaving the absolute offset for the very first pulse to be
-#   configured via `trigger/ppt/first_pulse_offset`.
-#
-# * `edge` uses the digitizer channel `trigger/edge/channel` and builds triggers around the edges found on it.
-#   The boundaries relative to this edge may be configured with the `group_start`, `group_end` and `dead_time`
-#   parameters.
+# This uses the pulse puttern table to locate the pulse positions on the trace. Only number of pulses and
+# their distance can be drawn this way, leaving the absolute offset for the very first pulse to be
+# configured via `trigger/ppt/first_pulse_offset`. If a PPL is used, it will be included in the trigger
+# pattern. The ppt_offset parameter allows taking into account an offset betwen PPL and FEL.

 psh.set_default_context('processes', num_workers=remi.get_num_workers(mp_find_triggers))

-triggers = psh.alloc(shape=(num_pulses,), dtype=trigger_dt, fill=(-1, -1, np.nan))
-
-if remi['trigger']['method'] == 'ppt':
-    from euxfel_bunch_pattern import indices_at_sase
-
-    pptc = remi['trigger']['ppt']
-
-    keydata = dc[remi.get_ppt_sourcekey()]
-    sase = remi['instrument']['timeserver']['sase']
-    first_pulse_offset = pptc['first_pulse_offset']
-    single_pulse_length = pptc['single_pulse_length']
-    clock_factor = remi['digitizer']['clock_factor']
-
-    def trigger_by_ppt(worker_id, index, train_id, ppt):
-        abs_pos = indices_at_sase(ppt, sase)
-        num_pulses = len(abs_pos)
-
-        if num_pulses > 1:
-            rel_pos = (abs_pos - abs_pos[0])
-            pulse_len = rel_pos[1] - rel_pos[0]
-        elif num_pulses == 1:
-            rel_pos = np.zeros(1)
-            pulse_len = single_pulse_length
-        elif num_pulses == 0:
-            return
-
-        pulse_offset = pulse_offsets[index]
-        pulse_count = pulse_counts[index]
-        train_triggers = triggers[pulse_offset:pulse_offset+pulse_count]
-
-        start_frac = first_pulse_offset + rel_pos * 2 * clock_factor
-        start = start_frac.astype(int)
-
-        if start.shape != train_triggers.shape:
-            print(f'pulse number mismatch in train {index} / {train_id}, SKIPPING')
-            return
-
-        train_triggers['start'] = start
-        train_triggers['stop'] = start + int(pulse_len * 2 * clock_factor) - 1
-        train_triggers['offset'] = start_frac - start
-
-    with timing('find_triggers'):
-        psh.map(trigger_by_ppt, keydata)
-
-elif remi['trigger']['method'] == 'edge':
-    edgec = remi['trigger']['edge']
-    keydata = dc[remi.get_channel_sourcekey(edgec['channel'])]
-
-    trace_len = keydata.entry_shape[0]
-    group_start = edgec['group_start']
-    group_end = edgec['group_end']
-    dead_time = edgec['dead_time']
-
-    def prepare_trigger_edge_worker(worker_id):
-        correct_func = remi.get_baseline_corrector()
-        discr_func, discr_params = remi.get_discriminator([edgec['channel']])
-
-        bl_start, bl_stop, _ = remi.get_baseline_limits(trace_len)
-        bl_sym = remi['digitizer']['baseline_symmetry']
-
-        edge_pos = np.empty(10000, dtype=np.float64)
-        trace_corr = np.empty(trace_len, dtype=np.float64)
-        baselines = np.empty(bl_sym, dtype=np.float64)
-        yield
-
-    def group_boundaries(trigger_edges):
-        cur_edge = trigger_edges[0]
-
-        for i in range(1, len(trigger_edges)):
-            next_edge = trigger_edges[i]
-            edge_diff = int(next_edge) - int(cur_edge)
-
-            if edge_diff <= dead_time:
-                pass
-
-            elif edge_diff > dead_time and edge_diff >= group_end:
-                yield cur_edge, int(cur_edge) + group_start, int(cur_edge) + group_end
-                cur_edge = trigger_edges[i]
-
-            elif edge_diff > dead_time and edge_diff < group_end:
-                yield cur_edge, int(cur_edge) + group_start, int(next_edge)
-                cur_edge = trigger_edges[i]
-
-            elif edge_diff < group_end:
-                pass
-
-        yield cur_edge, int(cur_edge) + group_start, int(cur_edge) + group_end
-
-    @psh.with_init(prepare_trigger_edge_worker)
-    def trigger_by_edge(worker_id, index, train_id, trace_raw):
-        correct_func(trace_raw, trace_corr, baselines, bl_start, bl_stop)
-
-        pulse_offset = pulse_offsets[index]
-        pulse_count = pulse_counts[index]
-        num_triggers = discr_func(trace_corr, edge_pos, **discr_params[0])
-
-        groups = group_boundaries(edge_pos[:num_triggers])
-        train_triggers = triggers[pulse_offset:pulse_offset+pulse_count]
-
-        if num_triggers == 0 or num_triggers != pulse_count:
-            print(f'index={index}, train_id={train_id}: Unexpected '
-                  f'num_triggers={num_triggers} for pulse_count={pulse_count}')
-            return
+triggers = psh.alloc(shape=(num_pulses,), dtype=trigger_dt, fill=(-1, -1, np.nan, -1, 0, 0))

-        for (edge, start, stop), pulse_trigger in zip(groups, train_triggers):
-            pulse_trigger['start'] = start
-            pulse_trigger['stop'] = stop
-            pulse_trigger['offset'] = start - edge
+clock_factor = remi['digitizer']['clock_factor']

-    with timing('find_triggers'):
-        psh.map(trigger_by_edge, keydata)
+def trigger_by_ppt(worker_id, index, train_id, ppt):
+    all_pos, fel_pos, ppl_pos = get_pulse_positions(ppt, instrument_sase, laser_ppt_mask, ppl_offset)
+    num_pulses = len(all_pos)
+
+    if num_pulses == 0:
+        return
+    elif len(ppl_pos) == 0 and ppl_offset < 0:
+        # No PPL pulses, but a negative offset is configured. This will cause
+        # first_pulse_offset to start early and most likely miss pulses at the
+        # end, so we correct by adding the ppl_offset to relative positions
+        # when computing trace positions.
+        pos_corr = abs(ppl_offset)
+    else:
+        pos_corr = 0
+
+
+    rel_pos = all_pos - all_pos[0]
+
+    if num_pulses > 1:
+        pulse_len = np.unique(rel_pos[1:] - rel_pos[:-1]).min()
+    elif num_pulses == 1:
+        pulse_len = single_pulse_length
+
+    start_frac = first_pulse_offset + (rel_pos + pos_corr) * 2 * clock_factor
+    start_int = start_frac.astype(int)
+
+    pulse_offset = pulse_offsets[index]
+    pulse_count = pulse_counts[index]
+
+    train_triggers = triggers[pulse_offset:pulse_offset+pulse_count]
+    train_triggers['start'] = start_int
+    train_triggers['stop'] = start_int + int(pulse_len * 2 * clock_factor) - 1
+    train_triggers['offset'] = start_frac - start_int
+    train_triggers['pulse'] = all_pos.astype(np.int16)
+    train_triggers['fel'] = [pos in fel_pos for pos in all_pos]
+    train_triggers['ppl'] = [pos in ppl_pos for pos in all_pos]
+
+with timing('find_triggers'):
+    psh.map(trigger_by_ppt, ppt_data)
+
+if (np.unique(triggers['pulse'][1:] - triggers['pulse'][:-1]) > 0).sum() > 1:
+    # There is more than one delta between pulse entries across all pulses. This is not
+    # necessarily a problem, as the pattern could simply have changed in between trains
+    # with each train being split properly.
+    # If there's more than one delta in a single train, this likely points to a mismatch
+    # of FEL and PPL repetition rate. This is most likely not intended.
+
+    one = np.uint64(1)  # Because np.uint64 + int = np.float64
+    pulse_deltas = set()
+
+    for pulse_id, (offset, count) in enumerate(zip(pulse_offsets, pulse_counts)):
+        deltas = triggers['pulse'][offset+one:offset+count] - triggers['pulse'][offset:offset+count-one]
+
+        if len(np.unique(deltas)) > 1:
+            for delta in deltas:
+                pulse_deltas.add(delta)
+
+    if len(pulse_deltas) > 1:
+        delta_str = ', '.join([str(x) for x in sorted(pulse_deltas)])
+        print(f'WARNING: Different pulse lengths (PPT: {delta_str}) encountered within single trains, '
+              f'separated pulse spectra may split up signals!')
+    else:
+        print('WARNING: Different pulse lengths encountered across trains, separation may be unstable!')
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, (lx, rx) = plt.subplots(num=2, ncols=2, nrows=1, figsize=(9, 4), clear=True,
-                       gridspec_kw=dict(top=0.75))
+                             gridspec_kw=dict(top=0.75))

 # Display ~400 pulses or 10 trains, whatever is lower
-n_trains = max(abs(pulse_offsets - 400).argmin(), 10)
+n_trains = max(abs(pulse_offsets - 200).argmin(), 5)

-visible_trigger_starts = triggers['start'][:pulse_offsets[n_trains]]
+visible_triggers = triggers[:pulse_offsets[n_trains]]

-lx.plot(visible_trigger_starts, '.', ms=2)
-lx.vlines(pulse_offsets[:n_trains], 0, visible_trigger_starts.max(), color='grey', linewidth=1, alpha=0.2)
+pulse_index = np.arange(len(visible_triggers))
+pumped = visible_triggers['fel'] & visible_triggers['ppl']
+fel_only = visible_triggers['fel'] & ~pumped
+ppl_only = visible_triggers['ppl'] & ~pumped
+
+lx.plot(pulse_index[pumped], visible_triggers[pumped]['start'], ' .', ms=3, c='C0', label='FEL+PPL')
+lx.plot(pulse_index[fel_only], visible_triggers[fel_only]['start'], '.', ms=3, c='C1', label='FEL-only')
+lx.plot(pulse_index[ppl_only], visible_triggers[ppl_only]['start'], '.', ms=2, c='C2', label='PPL-only')
+
+max_start = visible_triggers['start'].max()
+
+lx.vlines(pulse_offsets[:n_trains], 0, max_start, color='grey', linewidth=1, alpha=0.2)
 lx.tick_params(right=True)

 lx.set_xlabel('Pulse index')
 lx.set_xlim(-15, pulse_offsets[n_trains]+15)

 lx.set_ylabel('Trigger position')
-lx.set_ylim(0, visible_trigger_starts.max())
+lx.set_ylim(-max_start // 20, max_start + max_start // 20)
+
+lx.legend(fontsize='small', loc='lower right')

 train_lx = lx.twiny()
 train_lx.set_xlabel('Train ID', labelpad=8)
 train_lx.set_xlim(lx.get_xlim())
 train_lx.set_xticks(pulse_offsets[:n_trains])
 train_lx.set_xticklabels([str(int(x)) for x in dc.train_ids[:n_trains]],
                         rotation=-45, fontsize='x-small')

 rx.plot(triggers['start'], lw=0.2)
+
+rx.set_xlabel('Pulse index')
 rx.tick_params(left=False, labelleft=False, right=True, labelright=True)
+
 pass
 ```

 %% Cell type:markdown id: tags:

 # Analog signal to digital edges

 %% Cell type:markdown id: tags:

 ### Find edges in analog signal

 %% Cell type:code id: tags:

 ``` python
 psh.set_default_context('processes', num_workers=remi.get_num_workers(mp_find_edges))
 threadpool_limits(limits=remi.get_num_workers(mt_avg_trace))

 edges_by_det = {}
 avg_traces_by_det = {}

 for det_name, det in remi['detector'].items():
    det_sourcekeys = remi.get_detector_sourcekeys(det_name)
    det_get_traces = remi.get_traces_getter(det_name)
    trace_len = dc[next(iter(det_sourcekeys))].entry_shape[0]

    edges = psh.alloc(shape=(num_pulses, 7, det['max_hits']),
                      dtype=np.float64, fill=np.nan)
    avg_traces = psh.alloc_per_worker(shape=(7, trace_len), dtype=np.float64)

    def prepare_edge_worker(worker_id):
        correct_func = remi.get_baseline_corrector()
        discr_func, discr_params = remi.get_discriminator(det['channels'])

        source_name = remi['digitizer']['source']
        bl_start, bl_stop, _ = remi.get_baseline_limits(trace_len)
        bl_sym = remi['digitizer']['baseline_symmetry']
        time_cal = remi.get_time_calibration()

        traces_corr = np.empty((7, trace_len), dtype=np.float64)
        baselines = np.empty(bl_sym, dtype=np.float64)
        yield

    @psh.with_init(prepare_edge_worker)
    def find_edges(worker_id, index, train_id, data):
        try:
            data = det_get_traces(data[source_name])
        except KeyError:
            return

        for channel_idx in range(7):
            correct_func(data[channel_idx], traces_corr[channel_idx],
                         baselines, bl_start, bl_stop)

        avg_traces[worker_id] += traces_corr

        pulses_slice = np.s_[pulse_offsets[index]:pulse_offsets[index]+pulse_counts[index]]

        for trigger, pulse_edges in zip(triggers[pulses_slice], edges[pulses_slice]):
            trigger_slice = np.s_[trigger['start']:trigger['stop']]

            for trace, channel_params, channel_edges in zip(traces_corr, discr_params, pulse_edges):
                discr_func(trace[trigger_slice], channel_edges, **channel_params)

            pulse_edges += trigger['offset']
            pulse_edges *= time_cal

    with timing(f'find_edges, {det_name}'):
        psh.map(find_edges, dc.select(det_sourcekeys))

    edges_by_det[det_name] = edges
    avg_traces_by_det[det_name] = avg_traces.sum(axis=0) / len(dc.train_ids)

    with np.printoptions(precision=2, suppress=True):
        print(edges[:5, :, :8])
 ```

 %% Cell type:markdown id: tags:

 ### Global average of analog signals

 %% Cell type:code id: tags:

 ``` python
 for i, det_name in enumerate(remi['detector'].keys()):
    fig, axs = plt.subplots(num=10+i, nrows=7, figsize=(9.5, 8), clear=True,
                            gridspec_kw=dict(left=0.1, right=0.98, top=0.98, bottom=0.1))
    fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')

    for edge_idx, edge_name in enumerate(['u1', 'u2', 'v1', 'v2', 'w1', 'w2', 'mcp']):
        axs[edge_idx].plot(avg_traces_by_det[det_name][edge_idx], lw=1)
        axs[edge_idx].tick_params(labelbottom=False)
        axs[edge_idx].set_ylabel(edge_name)

    axs[-1].tick_params(labelbottom=True)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Sample for digitized traces

 %% Cell type:code id: tags:

 ``` python
 for i, det_name in enumerate(remi['detector'].keys()):
    edges = edges_by_det[det_name]

    fig = plt.figure(num=100+i, figsize=(9.5, 8))
    grid = fig.add_gridspec(ncols=2, nrows=4, left=0.1, right=0.98, top=0.98, bottom=0.1)

    fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')

    for signal_idx, signal_name in enumerate(['u1', 'u2', 'v1', 'v2', 'w1', 'w2', 'mcp']):
        row = (1 + signal_idx // 2) if signal_idx < 6 else 0
        col = (signal_idx % 2) if signal_idx < 6 else np.s_[:]
        ax = fig.add_subplot(grid[row, col])

        finite_edges = np.isfinite(edges[:, signal_idx, 0])
        if not finite_edges.any():
            continue

-        pulse_idx = finite_edges.nonzero()[0][0]
+        pulse_idx = np.uint64(finite_edges.nonzero()[0][0])  # Is combined with other uint64 values below.
        train_idx = (pulse_idx >= pulse_offsets).nonzero()[0][-1]
        trigger = triggers[pulse_idx]

        sourcekey = remi.get_channel_sourcekey(
            remi['detector'][det_name]['channels'][signal_idx])
        train_trace = dc[sourcekey].select_trains(np.s_[train_idx:train_idx+1]).ndarray()[0]
        corr_trace = np.zeros_like(train_trace, dtype=np.float64)

        remi.get_baseline_corrector()(
            train_trace, corr_trace,
            np.empty(remi['digitizer']['baseline_symmetry'], dtype=np.float64),
            *remi.get_baseline_limits(len(train_trace))[:2])

        pulse_trace = corr_trace[np.s_[trigger['start']:trigger['stop']]]

        x_time = remi.get_time_calibration() * (np.arange(len(pulse_trace)) + trigger['offset'])

        ax.plot(x_time, pulse_trace, lw=1)
        ax.set_xlim(x_time[0], x_time[-1])
        ax.set_ylim(-200, pulse_trace.max()*1.1)
        ax.text(x_time[-1], pulse_trace.max(),
                f'T{train_idx} P{pulse_idx - pulse_offsets[train_idx]} ',
                va='top', ha='right')
        ax.tick_params(labelbottom=False)
        ax.set_ylabel(signal_name)

        ax.vlines(edges[pulse_idx, signal_idx, :], *ax.get_ylim(), color='red', linewidth=1)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Digitized channel spectra

 %% Cell type:code id: tags:

 ``` python
 for i, det_name in enumerate(remi['detector'].keys()):
    fig = plt.figure(num=20+i, figsize=(9.5, 6))

    edges = edges_by_det[det_name]

    min_edge = edges[np.isfinite(edges)].min()
    max_edge = edges[np.isfinite(edges)].max()

    grid = fig.add_gridspec(ncols=3, nrows=3, left=0.08, right=0.98, top=0.95, hspace=0.4)

    fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')

    numx = fig.add_subplot(grid[0, 0])
    numx.set_title('Edges per pulse')

    agg_window = num_pulses // 60
    max_num_edges = 0.0
    max_spectral_intensity = 0
    hist_axs = []

    for edge_idx, edge_name in enumerate(['u1', 'u2', 'v1', 'v2', 'w1', 'w2', 'mcp']):
        num_edges = np.isfinite(edges[:, edge_idx, :]).sum(axis=1)
        num_edges = num_edges[:((len(num_edges) // agg_window) * agg_window)]
        num_edges = num_edges.reshape(-1, agg_window).mean(axis=1)

        if edge_idx < 6:
            plot_kwargs = dict(c=f'C{edge_idx}', ls='solid', lw=1.0)
        else:
            plot_kwargs = dict(c='k', ls='dashed', lw=1.0)

        numx.plot(np.arange(len(num_edges)) * agg_window, num_edges, label=edge_name, **plot_kwargs)
        max_num_edges = max(max_num_edges, num_edges.max())

        cur_edges = edges[:, edge_idx, :].flatten()

        if edge_idx < 6:
            row = 1 + edge_idx % 2
            col = edge_idx // 2
        else:
            row = 0
            col = np.s_[1:3]

        ax = fig.add_subplot(grid[row, col])
        ax.set_title(f'TOF spectrum: {edge_name}')
        y, _, _ = ax.hist(cur_edges[np.isfinite(cur_edges)], bins=int((max_edge - min_edge) // 5),
                          range=(min_edge, max_edge), color=plot_kwargs['c'], histtype='step', linewidth=1)
        hist_axs.append(ax)

        max_spectral_intensity = max(max_spectral_intensity, y.max())

    numx.tick_params(labelbottom=False)
    numx.set_ylim(0, 1.2*max_num_edges)

    for ax in hist_axs:
        ax.set_ylim(0, max_spectral_intensity*1.1)
        ax.ticklabel_format(axis='y', style='sci', scilimits=(0, 3))
 pass
 ```

 %% Cell type:markdown id: tags:

 # Detector diagnostics

 %% Cell type:code id: tags:

 ``` python
 for i, det_name in enumerate(remi['detector'].keys()):
    edges = edges_by_det[det_name]

    sort = remi.get_dld_sorter(det_name)

    sum_shifts = sort.sum_shifts if sort.sum_shifts != (0.0, 0.0, 0.0) else None

-    is_valid = remi.get_presort_mask(edges, edge_idx=0,
+    is_valid = remi.get_presort_mask(edges, edge_idx=0, w=not quad_anode,
                                     sum_limit=max(sort.uncorrected_time_sum_half_widths),
                                     sum_shifts=sum_shifts)
    signals, sums = remi.get_signals_and_sums(edges, indices=sort.channel_indices, sum_shifts=sum_shifts,
                                              mask=is_valid)
    fig = plot_detector_diagnostics(signals=signals, sums=sums, fig_num=30+i, im_scale=1.5,
                                    sum_range=max(sort.uncorrected_time_sum_half_widths),
                                    sorter=sort)
    fig.text(0.02, 0.98, det_name.upper() + ' before corrections', rotation=90, ha='left', va='top', size='x-large')

    if remi['detector'][det_name]['use_sum_correction'] or remi['detector'][det_name]['use_pos_correction']:
        n_masked = is_valid.sum()
        signals = np.full((n_masked, 3), np.nan, dtype=np.float64)
        sums = np.full((n_masked, 3), np.nan, dtype=np.float64)

        sort.correct(edges[is_valid], signals, sums)
        fig = plot_detector_diagnostics(signals=signals, sums=sums, fig_num=40+i, im_scale=1.5,
                                        sum_range=max(sort.uncorrected_time_sum_half_widths),
                                        sorter=sort)
        fig.text(0.02, 0.98, det_name.upper() + ' after corrections', rotation=90, ha='left', va='top', size='x-large')
 pass
 ```

 %% Cell type:markdown id: tags:

 # Hit reconstruction

 %% Cell type:code id: tags:

 ``` python
 psh.set_default_context('processes', num_workers=remi.get_num_workers(mp_rec_hits))

 signals_by_det = {}
 hits_by_det = {}
 hit_counts_by_det = {}

 for det_name, det in remi['detector'].items():
    edges = edges_by_det[det_name]

    signals = psh.alloc(shape=(num_pulses, 50), dtype=signal_dt, fill=np.nan)
    hits = psh.alloc(shape=(num_pulses, 50), dtype=hit_dt, fill=(np.nan, np.nan, np.nan, -1))
    hit_counts = psh.alloc(shape=len(dc.train_ids), dtype=np.uint32)

    def prepare_hit_worker(worker_id):
        sort = remi.get_dld_sorter(det_name)
        yield

    @psh.with_init(prepare_hit_worker)
    def reconstruct_hits(worker_id, index, train_id):
        hit_counts[index] += sort.run_on_train(
            edges, signals, hits, pulse_offsets[index], pulse_offsets[index] + pulse_counts[index])

    with timing(f'rec_hits, {det_name}'):
        psh.map(reconstruct_hits, dc.train_ids)

    signals_by_det[det_name] = signals
    hits_by_det[det_name] = hits
    hit_counts_by_det[det_name] = hit_counts
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(num=50+i, figsize=(9.5, 4), ncols=1, clear=True,
                       gridspec_kw=dict(top=0.92, right=0.98, left=0.05, bottom=0.12))

 max_num_hits = 0.0

 for det_name in remi['detector'].keys():
    agg_window = num_pulses // 1000

    num_hits = np.isfinite(hits_by_det[det_name]['x']).sum(axis=1)
    num_hits = num_hits[:(len(num_hits) // agg_window) * agg_window]
    num_hits = num_hits.reshape(-1, agg_window).mean(axis=1)
    max_num_hits = max(max_num_hits, num_hits.max())

    ax.plot(np.arange(0, (num_pulses // agg_window) * agg_window, agg_window), num_hits,
            lw=1, label=det_name.upper())

 ax.set_title('Hits per pulse')
 ax.set_xlabel('Pulse index')
 ax.set_ylim(0, max_num_hits*1.1)
 ax.legend()
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Reconstruction methods
 Each hit may be reconstructed by one of 19 different methods. These differ by the number of real signals across the channels, which could be combined to form the hit. Each of these methods is designed by a number between `0` and `19` (with empty hits using `-1`), which can be found in the `m` key of a hit, e.g.:

 * `0`: All six anode signals and the corresponding MCP signal were found.
 * `4`: One signal on layer `u` is missing, all other signals for this event were found.
 * `18`: Only one anode signal on each layer was found and the MCP signal is missing. There is no way to check whether this combination of signals is actually valid.

 | Method | `u+v+w +mcp` |
 | - | - |
 | 0 | `2+2+2 +1` |
 | 1 | `0+2+2 +1` |
 | 2 | `2+0+2 +1` |
 | 3 | `2+2+0 +1` |
 | 4 | `1+2+2 +1` (2 permutations) |
 | 5 | `2+1+2 +1` (2 permutations) |
 | 6 | `2+2+1 +1` (2 permutations) |
 | 7 | `2+2+2 +0` |
 | 8 | `0+2+2 +0` |
 | 9 | `2+0+2 +0` |
 | 10 | `2+2+0 +0` |
 | 11 | `1+2+2 +0` (2 permutations) |
 | 12 | `2+1+2 +0` (2 permutations) |
 | 13 | `2+2+1 +0` (2 permutations) |
 | 14 | `2+1+1 +1` `1+2+1 +1` `1+1+2 +1` (12 permutations) |
 | 15 | `2+1+0 +1` `2+0+1 +1` `1+2+0 +1` `1+0+2 +1` `0+2+1 +1` `0+1+2 +1` (12 permutations) |
 | 16 | `1+1+1 +1` (8 permutations) |
 | 17 | `2+1+1 +0` `1+2+1 +0` `1+1+2 +0` (12 permutations) |
 | 18 | `1+1+1 +0` (8 permutations) |
 | 19 | `2+1+0 +0` `2+0+1 +0` `1+2+0 +0` `1+0+2 +0` `0+1+2 +0` `0+2+1 +0` (12 permutations) |

 * For hits reconstructed with method `> 10`, extra attention should be given to ensure they add meaningful signal.
 * Any method `> 14` has to considered risky, because neither a time sum nor the position can be checked. If the scale factors and/or `w` shift are not correct, then the number of events reconstructed with the risky methods will increase. They will most likely be *ghost hits*, which do not correspond to actual impacts on the detector.

 %% Cell type:code id: tags:

 ``` python
 for i, det_name in enumerate(remi['detector'].keys()):
    hits = hits_by_det[det_name]

    fig, ax = plt.subplots(num=60+i, figsize=(9.5, 5), ncols=1, clear=True,
                           gridspec_kw=dict(left=0.08, right=0.91, top=0.8))

    fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')

    method_bins = np.bincount(hits['m'][hits['m'] >= 0], minlength=20)
    ax.bar(np.arange(20), method_bins, width=0.5)

    ax.set_xlabel('Reconstruction method')
    ax.set_xlim(-0.5, 19.5)
    ax.set_xticks(np.arange(20))

    ax.set_ylabel('Number of hits')
    ax.set_ylim(0, method_bins.max()*1.05)
    ylims = ax.get_ylim()

    ax.tick_params(which='both', right=True, labelright=True)

    num_risky = method_bins[15:].sum()
    num_total = method_bins.sum()

    ax.text(14.2, method_bins.max(), f'{(100*(num_total-num_risky)/num_total):.2g}%',
            va='top', ha='right', color='black')
    ax.text(14.8, method_bins.max(), f'{(100*num_risky/num_total):.2g}%',
            va='top', ha='left', color='red')

    ax.fill([14.5, 19.5, 19.5, 14.5], [ylims[0], ylims[0], ylims[1], ylims[1]], c='r', alpha=0.2)

    labelx = ax.twiny()
    labelx.set_xlim(*ax.get_xlim())
    labelx.set_xticks(ax.get_xticks())
    labelx.set_xticklabels([
        '2+2+2 +1',
        '0+2+2 +1', '2+0+2 +1', '2+2+0 +1',
        '1+2+2 +1', '2+1+2 +1', '2+2+1 +1',
        '2+2+2 +0',
        '0+2+2 +0', '2+0+2 +0', '2+2+0 +0', '1+2+2 +0', '2+1+2 +0', '2+2+1 +0',
        '2+1+1 +1',
        '2+1+0 +1',
        '1+1+1 +1',
        '2+1+1 +0',
        '1+1+1 +0',
        '2+1+0 +0',
    ], rotation=90)

    min_rel_tick = np.ceil((ax.get_ylim()[0] / num_total) / 0.1) * 0.1
    max_rel_tick = np.floor((method_bins.max() / num_total) / 0.1) * 0.1

    rely = ax.twinx()
    rely.set_ylim(*ax.get_ylim())
    rely.set_yticks(np.arange(0.0, max_rel_tick+0.01, 0.1)*num_total)
    rely.set_yticks(np.arange(0.0, ylims[1]/num_total, 0.02)*num_total, minor=True)
    rely.set_yticklabels([f'{(y/num_total)*100:.0f}%' for y in rely.get_yticks()])
    rely.set_ylabel('Percentage of total hits')
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Detector image and fishes

 %% Cell type:code id: tags:

 ``` python
 for i, det_name in enumerate(remi['detector'].keys()):
    flat_hits = hits_by_det[det_name].reshape(-1)
    flat_hits = flat_hits[np.isfinite(flat_hits[:]['x'])]
    flat_hits = flat_hits[flat_hits['m'] < 10]

    fig = plt.figure(num=70+i, figsize=(9, 13.5))

    fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')
    fig.text(0.02, 0.02, det_name.upper(), rotation=90, ha='left', va='bottom', size='x-large')

    imp = fig.add_axes([0.1 + 0.25/2, 0.56, 0.6, 0.4])
    txp = fig.add_axes([0.1, 0.28, 0.85, 0.22])
    typ = fig.add_axes([0.1, 0.04, 0.85, 0.22])

    im_radius = remi['detector'][det_name]['mcp_radius']*1.1

    min_tof = flat_hits['t'].min()
    max_tof = flat_hits['t'].max()

    imp.hist2d(flat_hits['x'], flat_hits['y'], bins=(256, 256),
               range=[[-im_radius, im_radius], [-im_radius, im_radius]], norm=LogNorm())
    imp.xaxis.set_label_position('top')
    imp.set_xlabel('X / mm')
    imp.set_ylabel('Y / mm')
    imp.tick_params(right=True, labelright=True, top=True, labeltop=True)
    imp.grid()

    for ax, dim_label in zip([txp, typ], ['x', 'y']):
        ax.hist2d(flat_hits['t'], flat_hits[dim_label], bins=(int((max_tof - min_tof) // 5), 256),
                   range=[[min_tof, max_tof], [-im_radius, im_radius]], norm=LogNorm())
        ax.set_ylabel(f'{dim_label.upper()} / mm')

    typ.set_xlabel('Time-of-flight / ns')
    txp.tick_params(bottom=True, labelbottom=False, top=True, labeltop=True, right=True, labelright=True)
    typ.tick_params(right=True, labelright=True, top=True)
 pass
 ```

 %% Cell type:markdown id: tags:

 # Transformed data files

 %% Cell type:code id: tags:

 ``` python
-inp_seq_len = len(dc.files[0].train_ids)
-seq_len = out_seq_len if out_seq_len > 0 else inp_seq_len
+seq_len = out_seq_len if out_seq_len > 0 else len(dc.files[0].train_ids)
+dataset_kwargs = {k[8:]: v for k, v in locals().items() if k.startswith('dataset_compression')}

-# Take first field for detector data as sample for naming
-out_path = (Path(out_folder) / Path(out_filename)).resolve()
+Path(out_folder).mkdir(parents=True, exist_ok=True)
+print('Writing sequence files', flush=True, end='')

 t_write = timing('write_files')
 t_write.__enter__()

-out_path.parent.mkdir(parents=True, exist_ok=True)
+for seq_id, train_mask, pulse_mask in sequence_pulses(dc.train_ids, pulse_counts, pulse_offsets, seq_len):
+    seq_train_ids = dc.train_ids[train_mask]

-dataset_kwargs = {k[8:]: v for k, v in locals().items() if k.startswith('dataset_compression')}
+    with DataFile.from_details(out_folder, out_aggregator, run, seq_id) as outp:
+        outp.create_index(seq_train_ids)

-metadata = dc.run_metadata()
-daq_library_bytes = metadata.get('daqLibrary', '0.0').encode('ascii')
-karabo_framework_bytes = metadata.get('karaboFramework', '0.0').encode('ascii')
-proposal_number = int(proposal) if proposal else metadata.get('proposalNumber', -1)
+        for det_name in remi['detector']:
+            cur_device_id = det_device_id.format(karabo_id=karabo_id, det_name=det_name.upper())

-print('Writing sequence files', flush=True, end='')
+            cur_control_data = outp.create_control_source(cur_device_id)
+            # Manually manipulate the file here, still creates the index properly.
+            remi.attach_detector_config(det_name, cur_control_data.get_run_group())
+            cur_control_data.create_index(len(seq_train_ids))

-for seq_id, start in enumerate(range(0, len(dc.train_ids), seq_len)):
-    train_ids = dc.train_ids[start:start+seq_len]
-    first_train_idx = start
-    final_train_idx = start + len(train_ids) - 1
-
-    train_sel = np.s_[first_train_idx:final_train_idx+1]
-    pulse_sel = np.s_[pulse_offsets[first_train_idx]:(pulse_offsets[final_train_idx]+pulse_counts[final_train_idx])]
-
-    with h5py.File(str(out_path).format(run=run, sequence=seq_id), 'w') as h5out:
-        h5out.create_dataset('INDEX/trainId', data=train_ids, dtype=np.uint64)
-        h5out.create_dataset('INDEX/timestamp', data=np.zeros_like(train_ids, dtype=np.uint64), dtype=np.uint64)
-        h5out.create_dataset('INDEX/flag', data=np.ones_like(train_ids, dtype=np.int32))
+            cur_fast_data = outp.create_instrument_source(f'{cur_device_id}:{det_output_key}')

-        m_data_sources = []
+            if save_raw_triggers:
+                cur_fast_data.create_key('raw.triggers', triggers[pulse_mask],
+                                         chunks=tuple(chunks_triggers), **dataset_kwargs)

-        for det_name in remi['detector']:
-            cur_device_id = det_device_id.format(karabo_id=karabo_id, det_name=det_name.upper())
-            cur_fast_data = f"{cur_device_id}:{det_output_key}"
+            if save_raw_edges:
+                cur_fast_data.create_key('raw.edges', edges_by_det[det_name][pulse_mask],
+                                         chunks=tuple(chunks_edges), **dataset_kwargs)
+
+            if save_rec_signals:
+                cur_fast_data.create_key('rec.signals', signals_by_det[det_name][pulse_mask],
+                                         chunks=tuple(chunks_signals), **dataset_kwargs)
+
+            if save_rec_hits:
+                cur_fast_data.create_key('rec.hits', hits_by_det[det_name][pulse_mask],
+                                         chunks=tuple(chunks_hits), **dataset_kwargs)

-            pipeline_prefixes = set()
+            cur_fast_data.create_index(raw=pulse_counts[train_mask], rec=pulse_counts[train_mask])

-            h5out.create_group(f'CONTROL/{cur_device_id}')
-            run_group = h5out.create_group(f'RUN/{cur_device_id}')
-            remi.attach_detector_config(det_name, run_group)
-
-            h5out.create_group(f'INDEX/{cur_device_id}')
-            h5out.create_dataset(f'INDEX/{cur_device_id}/count',
-                                 data=np.ones_like(train_ids), dtype=np.uint64)
-            h5out.create_dataset(f'INDEX/{cur_device_id}/first',
-                                 data=np.arange(len(train_ids)), dtype=np.uint64)
-
-            m_data_sources.append(('CONTROL', cur_device_id))
-
-            for flag, data, chunks, path in [
-                (save_raw_triggers, triggers, chunks_triggers, 'raw/triggers'),
-                (save_raw_edges, edges_by_det[det_name], chunks_edges, 'raw/edges'),
-                (save_rec_signals, signals_by_det[det_name], chunks_signals, 'rec/signals'),
-                (save_rec_hits, hits_by_det[det_name], chunks_hits, 'rec/hits')
-            ]:
-                if not flag:
-                    continue
-
-                subdata = data[pulse_sel]
-
-                kwargs = dict(data=subdata, chunks=tuple(chunks), **dataset_kwargs) \
-                    if len(subdata) > 0 else dict(shape=(0,), dtype=data.dtype)
-
-                h5out.create_dataset(f'INSTRUMENT/{cur_fast_data}/{path}', **kwargs)
-                pipeline_prefixes.add(path[:path.find('/')])
-
-            for pipeline_prefix in pipeline_prefixes:
-                h5out.create_dataset(f'INDEX/{cur_fast_data}/{pipeline_prefix}/count',
-                                     data=pulse_counts[train_sel])
-                h5out.create_dataset(f'INDEX/{cur_fast_data}/{pipeline_prefix}/first',
-                                     data=pulse_offsets[train_sel] - pulse_offsets[first_train_idx])
-
-                m_data_sources.append(('INSTRUMENT', f'{cur_fast_data}/{pipeline_prefix}'))
-
-        now_str = datetime.now().strftime('%Y%m%dT%H%M%SZ').encode('ascii')
-
-        h5m = h5out.require_group('METADATA')
-        h5m.create_dataset('creationDate', shape=(1,), data=now_str)
-        h5m.create_dataset('daqLibrary', shape=(1,), data=daq_library_bytes)
-        h5m.create_dataset('dataFormatVersion', shape=(1,), data=b'1.0')
-
-        m_data_sources.sort(key=lambda x: f'{x[0]}/{x[1]}')
-        h5m.create_dataset('dataSources/dataSourceId', shape=(len(m_data_sources),),
-                           data=[f'{x[0]}/{x[1]}'.encode('ascii') for x in m_data_sources])
-        h5m.create_dataset('dataSources/deviceId', shape=(len(m_data_sources),),
-                           data=[x[1].encode('ascii') for x in m_data_sources])
-        h5m.create_dataset('dataSources/root', shape=(len(m_data_sources),),
-                           data=[x[0].encode('ascii') for x in m_data_sources])
-
-        h5m.create_dataset('karaboFramework', shape=(1,), data=karabo_framework_bytes)
-        h5m.create_dataset('proposalNumber', shape=(1,), dtype=np.uint32, data=proposal_number)
-        h5m.create_dataset('runNumber', shape=(1,), dtype=np.uint32, data=run)
-        h5m.create_dataset('sequenceNumber', shape=(1,), dtype=np.uint32, data=seq_id)
-        h5m.create_dataset('updateDate', shape=(1,), data=now_str)
+        outp.create_metadata(like=dc)

    print('.', flush=True, end='')

 print('')
 t_write.__exit__()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Data selection parameters.
 run = 104  # Run ID.
 in_folder = '/gpfs/exfel/exp/SQS/202101/p002535/raw'  # Partial input path appended with run ID.
 out_folder = '/gpfs/exfel/exp/SQS/202101/p002535/scratch/cal_test'  # Full path to output folder.

 calib_config_path = '/gpfs/exfel/exp/SQS/202101/p002535/usr/config_board2+4.yaml'  # Path to correction and transform configuration

 # These parameters are required by xfel-calibrate but ignored in this notebook.
 cycle = ''  # Proposal cycle, currently not used.
 cal_db_timeout = 0  # Calibration DB timeout, currently not used.
 cal_db_interface = 'foo'  # Calibration DB interface, currently not used.
 karabo_da = 'bar'  # Karabo data aggregator name, currently not used

 # Output parameters.
 karabo_id = 'SQS_REMI_DLD6'  # Karabo device ID root for virtual output device.
 proposal = ''  # Proposal, leave empty for auto detection based on in_folder
-out_filename = 'CORR-R{run:04d}-REMI01-S{sequence:05d}.h5'  # Pattern for output filenames.
+out_aggregator = 'REMI01'  # Aggregator name for output files.
 out_seq_len = 5000  # Number of trains per sequence file in output.
 det_device_id = '{karabo_id}/DET/{det_name}'  # Karabo device ID for virtual output device.
 det_output_key = 'output'  # Pipeline name for fast data output.
 save_raw_triggers = True  # Whether to save trigger position in files.
 save_raw_edges = True  # Whether to save digitized edge positions in files.
 save_rec_signals = True  # Whether to save reconstructed signals (u1-w2, mcp) in files.
 save_rec_hits = True  # Whether to save reoncstructed hits (x,y,t,m) in files.
 chunks_triggers = [500]  # HDF chunk size for triggers.
 chunks_edges = [500, 7, 50]  # HDF chunk size for edges.
 chunks_hits = [50, 50]  # HDF chunk size for hits.
 chunks_signals = [50, 50]  # HDF chunk size for signals.
 dataset_compression = 'gzip'  # HDF compression method.
 dataset_compression_opts = 3  # HDF GZIP compression level.

+# Detector parameters.
+quad_anode = False  # Reconstruction assumes a hex anode by default, change for quad anodes.
+
+# Trigger parameters.
+ppt_source = 'SQS_RR_UTC/TSYS/TIMESERVER:outputBunchPattern'
+ignore_ppl = False  # Ignore any PPL entries in the PPT.
+ppl_offset = 0  # In units of the PPT.
+laser_ppt_mask = -1  # Bit mask for used laser, negative to auto-detect from instrument.
+instrument_sase = 3
+first_pulse_offset = 1000
+single_pulse_length = 25000
+
 # Parallelization parameters.
-mp_pulse_info = 8  # Parallelization for pulse statistics.
 mp_find_triggers = 0.5  # Parallelization for finding triggers.
 mp_find_edges = 0.5  # Parallelization for digitizing analog signal.
 mt_avg_trace = 2  # Parallelization for trace averaging.
 mp_rec_hits = 1.0  # Parallelization for hit reconstruction.
 ```

 %% Cell type:code id: tags:

 ``` python
 import numpy as np
 import matplotlib.pyplot as plt
 from matplotlib.colors import LogNorm
 from threadpoolctl import threadpool_limits

 import re
 import h5py
 from pathlib import Path
 from datetime import datetime

 import pasha as psh
+from euxfel_bunch_pattern import indices_at_sase, indices_at_laser
 from extra_data import RunDirectory
 from extra_remi import Analysis, trigger_dt
 from extra_remi.util import timing
-from extra_remi.plots import plot_detector_diagnostics
 from extra_remi.rd_resort import signal_dt, hit_dt
+from extra_remi.files import DataFile, sequence_pulses
+
+if quad_anode:
+    from extra_remi.plots import plot_detector_diagnostics_quad as plot_detector_diagnostics
+else:
+    from extra_remi.plots import plot_detector_diagnostics_hex as plot_detector_diagnostics

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 calib_config_path = Path(calib_config_path)

 if not calib_config_path.is_file():
    # If the path cannot be resolved right now, try the same path relative to in_folder.
    calib_config_path = Path(in_folder) / calib_config_path

    if not calib_config_path.is_file():
        # Disallow implicit config file creation.
        raise ValueError('calib_config_path not found - neither absolute nor relative to in_folder')

-remi = Analysis(calib_config_path)
+remi = Analysis(calib_config_path, use_hex=not quad_anode)

 with timing('open_run'):
-    dc = remi.prepare_dc(RunDirectory(Path(in_folder) / f'r{run:04d}', inc_suspect_trains=True, _use_voview=False))
+    dc = remi.prepare_dc(RunDirectory(Path(in_folder) / f'r{run:04d}', inc_suspect_trains=True))
 ```

 %% Cell type:markdown id: tags:

 # Transformation parameters

 %% Cell type:code id: tags:

 ``` python
 def print_leaf(leaf, indent=0):
    for key, value in leaf.items():
        if isinstance(value, dict):
            print(indent * 4 * ' ' + key)
            print_leaf(value, indent=indent+1)
        else:
            print(indent * 4 * ' ' + f'{key}: {value}')

 print_leaf(remi.tree)
 ```

 %% Cell type:markdown id: tags:

 # Pulse and trigger information

 %% Cell type:markdown id: tags:

 ### Count pulses per train and compute offsets

 %% Cell type:code id: tags:

 ``` python
 # Based on the pulse pattern tables, three global variables are obtained:
 #
 # * `pulse_counts [int32: len(dc.train_ids)]` containing the number of pulses per train.
 # * `pulse_offsets [int32: len(dc.train_ids)]` containing the global offset for the first pulse of each train.
 # * `num_pulses = pulse_counts.sum(axis=0)`

+ppt_data = dc[ppt_source, 'data.bunchPatternTable']
+
+def get_pulse_positions(ppt, sase, laser, ppl_offset):
+    # Combine FEL and PPL positions.
+
+    fel_pos = indices_at_sase(ppt, sase)
+    ppl_pos = indices_at_laser(ppt, laser) if not ignore_ppl else np.array([])
+
+    if len(fel_pos) > 0:
+        # Move PPL up to the FEL position.
+        ppl_pos += fel_pos[0] + ppl_offset
+
+    return np.union1d(fel_pos, ppl_pos), fel_pos, ppl_pos
+
+if laser_ppt_mask < 0:
+    # If laser PPT mask is not specified, try to figure it out from device IDs.
+    from euxfel_bunch_pattern import PPL_BITS
+
+    instrument = karabo_id[:karabo_id.index('_')]
+
+    try:
+        laser_ppt_mask = PPL_BITS[f'LP_{instrument}']
+    except KeyError:
+        raise ValueError(f'Laser PPT mask unknown for instrument `{instrument}`')
+
 with timing('pulse_info'):
-    pulse_counts, pulse_offsets, num_pulses = remi.get_pulse_info(dc, mp_pulse_info)
+    psh.set_default_context('processes', num_workers=remi.get_num_workers(mp_find_triggers))
+
+    # Build the pulse index
+    pulse_counts = psh.alloc(shape=len(dc.train_ids), dtype=np.uint64)
+    has_ppt = psh.alloc(shape=len(dc.train_ids), dtype=bool, fill=False)
+
+    def count_pulses(wid, index, tid, ppt):
+        pulse_counts[index] = len(get_pulse_positions(ppt, instrument_sase, laser_ppt_mask, ppl_offset)[0])
+        has_ppt[index] = True
+
+    psh.map(count_pulses, ppt_data)
+
+    # Fill any missing values with the highest.
+    pulse_counts[has_ppt == False] = pulse_counts.max()
+
+    # Compute offsets based on pulse counts.
+    pulse_offsets = np.zeros_like(pulse_counts)
+    np.cumsum(pulse_counts[:-1], out=pulse_offsets[1:])
+
+    # Total number of pulses.
+    num_pulses = int(pulse_counts.sum())
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(num=1, ncols=1, nrows=1, figsize=(9, 4), clear=True)

 ax.set_title('Pulse count')
 ax.plot(dc.train_ids, pulse_counts, lw=1)
 ax.set_xlabel('Train ID')
 ax.set_ylabel('Number of pulses')
 ax.set_ylim(0, max(300, pulse_counts.max() + 10))
 ax.ticklabel_format(style='plain')
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Find triggers

 The trigger defines the boundary of a pulse on the digitizer trace, which is stored per train.

 %% Cell type:code id: tags:

 ``` python
 # A trigger defines the boundary of a pulse on the digitizer trace stored per train. This cell creates a
 # global variable:
-# * `triggers [(start: int32, stop: int32, offset: float64): num_pulses]` containing the triggers for each
-#   pulse.
-#
-# Triggers may be obtained through two different methods:
+# * `triggers [(start: int32, stop: int32, offset: float64, fel: bool, ppl: bool): num_pulses]`
+#   containing the triggers for each pulse.
 #
-# * `ppt` uses the pulse puttern table to locate the pulse positions on the trace. Only number of pulses and
-#   their distance can be drawn this way, leaving the absolute offset for the very first pulse to be
-#   configured via `trigger/ppt/first_pulse_offset`.
-#
-# * `edge` uses the digitizer channel `trigger/edge/channel` and builds triggers around the edges found on it.
-#   The boundaries relative to this edge may be configured with the `group_start`, `group_end` and `dead_time`
-#   parameters.
+# This uses the pulse puttern table to locate the pulse positions on the trace. Only number of pulses and
+# their distance can be drawn this way, leaving the absolute offset for the very first pulse to be
+# configured via `trigger/ppt/first_pulse_offset`. If a PPL is used, it will be included in the trigger
+# pattern. The ppt_offset parameter allows taking into account an offset betwen PPL and FEL.

 psh.set_default_context('processes', num_workers=remi.get_num_workers(mp_find_triggers))

-triggers = psh.alloc(shape=(num_pulses,), dtype=trigger_dt, fill=(-1, -1, np.nan))
-
-if remi['trigger']['method'] == 'ppt':
-    from euxfel_bunch_pattern import indices_at_sase
-
-    pptc = remi['trigger']['ppt']
-
-    keydata = dc[remi.get_ppt_sourcekey()]
-    sase = remi['instrument']['timeserver']['sase']
-    first_pulse_offset = pptc['first_pulse_offset']
-    single_pulse_length = pptc['single_pulse_length']
-    clock_factor = remi['digitizer']['clock_factor']
-
-    def trigger_by_ppt(worker_id, index, train_id, ppt):
-        abs_pos = indices_at_sase(ppt, sase)
-        num_pulses = len(abs_pos)
-
-        if num_pulses > 1:
-            rel_pos = (abs_pos - abs_pos[0])
-            pulse_len = rel_pos[1] - rel_pos[0]
-        elif num_pulses == 1:
-            rel_pos = np.zeros(1)
-            pulse_len = single_pulse_length
-        elif num_pulses == 0:
-            return
-
-        pulse_offset = pulse_offsets[index]
-        pulse_count = pulse_counts[index]
-        train_triggers = triggers[pulse_offset:pulse_offset+pulse_count]
-
-        start_frac = first_pulse_offset + rel_pos * 2 * clock_factor
-        start = start_frac.astype(int)
-
-        if start.shape != train_triggers.shape:
-            print(f'pulse number mismatch in train {index} / {train_id}, SKIPPING')
-            return
-
-        train_triggers['start'] = start
-        train_triggers['stop'] = start + int(pulse_len * 2 * clock_factor) - 1
-        train_triggers['offset'] = start_frac - start
-
-    with timing('find_triggers'):
-        psh.map(trigger_by_ppt, keydata)
-
-elif remi['trigger']['method'] == 'edge':
-    edgec = remi['trigger']['edge']
-    keydata = dc[remi.get_channel_sourcekey(edgec['channel'])]
-
-    trace_len = keydata.entry_shape[0]
-    group_start = edgec['group_start']
-    group_end = edgec['group_end']
-    dead_time = edgec['dead_time']
-
-    def prepare_trigger_edge_worker(worker_id):
-        correct_func = remi.get_baseline_corrector()
-        discr_func, discr_params = remi.get_discriminator([edgec['channel']])
-
-        bl_start, bl_stop, _ = remi.get_baseline_limits(trace_len)
-        bl_sym = remi['digitizer']['baseline_symmetry']
-
-        edge_pos = np.empty(10000, dtype=np.float64)
-        trace_corr = np.empty(trace_len, dtype=np.float64)
-        baselines = np.empty(bl_sym, dtype=np.float64)
-        yield
-
-    def group_boundaries(trigger_edges):
-        cur_edge = trigger_edges[0]
-
-        for i in range(1, len(trigger_edges)):
-            next_edge = trigger_edges[i]
-            edge_diff = int(next_edge) - int(cur_edge)
-
-            if edge_diff <= dead_time:
-                pass
-
-            elif edge_diff > dead_time and edge_diff >= group_end:
-                yield cur_edge, int(cur_edge) + group_start, int(cur_edge) + group_end
-                cur_edge = trigger_edges[i]
-
-            elif edge_diff > dead_time and edge_diff < group_end:
-                yield cur_edge, int(cur_edge) + group_start, int(next_edge)
-                cur_edge = trigger_edges[i]
-
-            elif edge_diff < group_end:
-                pass
-
-        yield cur_edge, int(cur_edge) + group_start, int(cur_edge) + group_end
-
-    @psh.with_init(prepare_trigger_edge_worker)
-    def trigger_by_edge(worker_id, index, train_id, trace_raw):
-        correct_func(trace_raw, trace_corr, baselines, bl_start, bl_stop)
-
-        pulse_offset = pulse_offsets[index]
-        pulse_count = pulse_counts[index]
-        num_triggers = discr_func(trace_corr, edge_pos, **discr_params[0])
-
-        groups = group_boundaries(edge_pos[:num_triggers])
-        train_triggers = triggers[pulse_offset:pulse_offset+pulse_count]
-
-        if num_triggers == 0 or num_triggers != pulse_count:
-            print(f'index={index}, train_id={train_id}: Unexpected '
-                  f'num_triggers={num_triggers} for pulse_count={pulse_count}')
-            return
+triggers = psh.alloc(shape=(num_pulses,), dtype=trigger_dt, fill=(-1, -1, np.nan, -1, 0, 0))

-        for (edge, start, stop), pulse_trigger in zip(groups, train_triggers):
-            pulse_trigger['start'] = start
-            pulse_trigger['stop'] = stop
-            pulse_trigger['offset'] = start - edge
+clock_factor = remi['digitizer']['clock_factor']

-    with timing('find_triggers'):
-        psh.map(trigger_by_edge, keydata)
+def trigger_by_ppt(worker_id, index, train_id, ppt):
+    all_pos, fel_pos, ppl_pos = get_pulse_positions(ppt, instrument_sase, laser_ppt_mask, ppl_offset)
+    num_pulses = len(all_pos)
+
+    if num_pulses == 0:
+        return
+    elif len(ppl_pos) == 0 and ppl_offset < 0:
+        # No PPL pulses, but a negative offset is configured. This will cause
+        # first_pulse_offset to start early and most likely miss pulses at the
+        # end, so we correct by adding the ppl_offset to relative positions
+        # when computing trace positions.
+        pos_corr = abs(ppl_offset)
+    else:
+        pos_corr = 0
+
+
+    rel_pos = all_pos - all_pos[0]
+
+    if num_pulses > 1:
+        pulse_len = np.unique(rel_pos[1:] - rel_pos[:-1]).min()
+    elif num_pulses == 1:
+        pulse_len = single_pulse_length
+
+    start_frac = first_pulse_offset + (rel_pos + pos_corr) * 2 * clock_factor
+    start_int = start_frac.astype(int)
+
+    pulse_offset = pulse_offsets[index]
+    pulse_count = pulse_counts[index]
+
+    train_triggers = triggers[pulse_offset:pulse_offset+pulse_count]
+    train_triggers['start'] = start_int
+    train_triggers['stop'] = start_int + int(pulse_len * 2 * clock_factor) - 1
+    train_triggers['offset'] = start_frac - start_int
+    train_triggers['pulse'] = all_pos.astype(np.int16)
+    train_triggers['fel'] = [pos in fel_pos for pos in all_pos]
+    train_triggers['ppl'] = [pos in ppl_pos for pos in all_pos]
+
+with timing('find_triggers'):
+    psh.map(trigger_by_ppt, ppt_data)
+
+if (np.unique(triggers['pulse'][1:] - triggers['pulse'][:-1]) > 0).sum() > 1:
+    # There is more than one delta between pulse entries across all pulses. This is not
+    # necessarily a problem, as the pattern could simply have changed in between trains
+    # with each train being split properly.
+    # If there's more than one delta in a single train, this likely points to a mismatch
+    # of FEL and PPL repetition rate. This is most likely not intended.
+
+    one = np.uint64(1)  # Because np.uint64 + int = np.float64
+    pulse_deltas = set()
+
+    for pulse_id, (offset, count) in enumerate(zip(pulse_offsets, pulse_counts)):
+        deltas = triggers['pulse'][offset+one:offset+count] - triggers['pulse'][offset:offset+count-one]
+
+        if len(np.unique(deltas)) > 1:
+            for delta in deltas:
+                pulse_deltas.add(delta)
+
+    if len(pulse_deltas) > 1:
+        delta_str = ', '.join([str(x) for x in sorted(pulse_deltas)])
+        print(f'WARNING: Different pulse lengths (PPT: {delta_str}) encountered within single trains, '
+              f'separated pulse spectra may split up signals!')
+    else:
+        print('WARNING: Different pulse lengths encountered across trains, separation may be unstable!')
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, (lx, rx) = plt.subplots(num=2, ncols=2, nrows=1, figsize=(9, 4), clear=True,
-                       gridspec_kw=dict(top=0.75))
+                             gridspec_kw=dict(top=0.75))

 # Display ~400 pulses or 10 trains, whatever is lower
-n_trains = max(abs(pulse_offsets - 400).argmin(), 10)
+n_trains = max(abs(pulse_offsets - 200).argmin(), 5)

-visible_trigger_starts = triggers['start'][:pulse_offsets[n_trains]]
+visible_triggers = triggers[:pulse_offsets[n_trains]]

-lx.plot(visible_trigger_starts, '.', ms=2)
-lx.vlines(pulse_offsets[:n_trains], 0, visible_trigger_starts.max(), color='grey', linewidth=1, alpha=0.2)
+pulse_index = np.arange(len(visible_triggers))
+pumped = visible_triggers['fel'] & visible_triggers['ppl']
+fel_only = visible_triggers['fel'] & ~pumped
+ppl_only = visible_triggers['ppl'] & ~pumped
+
+lx.plot(pulse_index[pumped], visible_triggers[pumped]['start'], ' .', ms=3, c='C0', label='FEL+PPL')
+lx.plot(pulse_index[fel_only], visible_triggers[fel_only]['start'], '.', ms=3, c='C1', label='FEL-only')
+lx.plot(pulse_index[ppl_only], visible_triggers[ppl_only]['start'], '.', ms=2, c='C2', label='PPL-only')
+
+max_start = visible_triggers['start'].max()
+
+lx.vlines(pulse_offsets[:n_trains], 0, max_start, color='grey', linewidth=1, alpha=0.2)
 lx.tick_params(right=True)

 lx.set_xlabel('Pulse index')
 lx.set_xlim(-15, pulse_offsets[n_trains]+15)

 lx.set_ylabel('Trigger position')
-lx.set_ylim(0, visible_trigger_starts.max())
+lx.set_ylim(-max_start // 20, max_start + max_start // 20)
+
+lx.legend(fontsize='small', loc='lower right')

 train_lx = lx.twiny()
 train_lx.set_xlabel('Train ID', labelpad=8)
 train_lx.set_xlim(lx.get_xlim())
 train_lx.set_xticks(pulse_offsets[:n_trains])
 train_lx.set_xticklabels([str(int(x)) for x in dc.train_ids[:n_trains]],
                         rotation=-45, fontsize='x-small')

 rx.plot(triggers['start'], lw=0.2)
+
+rx.set_xlabel('Pulse index')
 rx.tick_params(left=False, labelleft=False, right=True, labelright=True)
+
 pass
 ```

 %% Cell type:markdown id: tags:

 # Analog signal to digital edges

 %% Cell type:markdown id: tags:

 ### Find edges in analog signal

 %% Cell type:code id: tags:

 ``` python
 psh.set_default_context('processes', num_workers=remi.get_num_workers(mp_find_edges))
 threadpool_limits(limits=remi.get_num_workers(mt_avg_trace))

 edges_by_det = {}
 avg_traces_by_det = {}

 for det_name, det in remi['detector'].items():
    det_sourcekeys = remi.get_detector_sourcekeys(det_name)
    det_get_traces = remi.get_traces_getter(det_name)
    trace_len = dc[next(iter(det_sourcekeys))].entry_shape[0]

    edges = psh.alloc(shape=(num_pulses, 7, det['max_hits']),
                      dtype=np.float64, fill=np.nan)
    avg_traces = psh.alloc_per_worker(shape=(7, trace_len), dtype=np.float64)

    def prepare_edge_worker(worker_id):
        correct_func = remi.get_baseline_corrector()
        discr_func, discr_params = remi.get_discriminator(det['channels'])

        source_name = remi['digitizer']['source']
        bl_start, bl_stop, _ = remi.get_baseline_limits(trace_len)
        bl_sym = remi['digitizer']['baseline_symmetry']
        time_cal = remi.get_time_calibration()

        traces_corr = np.empty((7, trace_len), dtype=np.float64)
        baselines = np.empty(bl_sym, dtype=np.float64)
        yield

    @psh.with_init(prepare_edge_worker)
    def find_edges(worker_id, index, train_id, data):
        try:
            data = det_get_traces(data[source_name])
        except KeyError:
            return

        for channel_idx in range(7):
            correct_func(data[channel_idx], traces_corr[channel_idx],
                         baselines, bl_start, bl_stop)

        avg_traces[worker_id] += traces_corr

        pulses_slice = np.s_[pulse_offsets[index]:pulse_offsets[index]+pulse_counts[index]]

        for trigger, pulse_edges in zip(triggers[pulses_slice], edges[pulses_slice]):
            trigger_slice = np.s_[trigger['start']:trigger['stop']]

            for trace, channel_params, channel_edges in zip(traces_corr, discr_params, pulse_edges):
                discr_func(trace[trigger_slice], channel_edges, **channel_params)

            pulse_edges += trigger['offset']
            pulse_edges *= time_cal

    with timing(f'find_edges, {det_name}'):
        psh.map(find_edges, dc.select(det_sourcekeys))

    edges_by_det[det_name] = edges
    avg_traces_by_det[det_name] = avg_traces.sum(axis=0) / len(dc.train_ids)

    with np.printoptions(precision=2, suppress=True):
        print(edges[:5, :, :8])
 ```

 %% Cell type:markdown id: tags:

 ### Global average of analog signals

 %% Cell type:code id: tags:

 ``` python
 for i, det_name in enumerate(remi['detector'].keys()):
    fig, axs = plt.subplots(num=10+i, nrows=7, figsize=(9.5, 8), clear=True,
                            gridspec_kw=dict(left=0.1, right=0.98, top=0.98, bottom=0.1))
    fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')

    for edge_idx, edge_name in enumerate(['u1', 'u2', 'v1', 'v2', 'w1', 'w2', 'mcp']):
        axs[edge_idx].plot(avg_traces_by_det[det_name][edge_idx], lw=1)
        axs[edge_idx].tick_params(labelbottom=False)
        axs[edge_idx].set_ylabel(edge_name)

    axs[-1].tick_params(labelbottom=True)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Sample for digitized traces

 %% Cell type:code id: tags:

 ``` python
 for i, det_name in enumerate(remi['detector'].keys()):
    edges = edges_by_det[det_name]

    fig = plt.figure(num=100+i, figsize=(9.5, 8))
    grid = fig.add_gridspec(ncols=2, nrows=4, left=0.1, right=0.98, top=0.98, bottom=0.1)

    fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')

    for signal_idx, signal_name in enumerate(['u1', 'u2', 'v1', 'v2', 'w1', 'w2', 'mcp']):
        row = (1 + signal_idx // 2) if signal_idx < 6 else 0
        col = (signal_idx % 2) if signal_idx < 6 else np.s_[:]
        ax = fig.add_subplot(grid[row, col])

        finite_edges = np.isfinite(edges[:, signal_idx, 0])
        if not finite_edges.any():
            continue

-        pulse_idx = finite_edges.nonzero()[0][0]
+        pulse_idx = np.uint64(finite_edges.nonzero()[0][0])  # Is combined with other uint64 values below.
        train_idx = (pulse_idx >= pulse_offsets).nonzero()[0][-1]
        trigger = triggers[pulse_idx]

        sourcekey = remi.get_channel_sourcekey(
            remi['detector'][det_name]['channels'][signal_idx])
        train_trace = dc[sourcekey].select_trains(np.s_[train_idx:train_idx+1]).ndarray()[0]
        corr_trace = np.zeros_like(train_trace, dtype=np.float64)

        remi.get_baseline_corrector()(
            train_trace, corr_trace,
            np.empty(remi['digitizer']['baseline_symmetry'], dtype=np.float64),
            *remi.get_baseline_limits(len(train_trace))[:2])

        pulse_trace = corr_trace[np.s_[trigger['start']:trigger['stop']]]

        x_time = remi.get_time_calibration() * (np.arange(len(pulse_trace)) + trigger['offset'])

        ax.plot(x_time, pulse_trace, lw=1)
        ax.set_xlim(x_time[0], x_time[-1])
        ax.set_ylim(-200, pulse_trace.max()*1.1)
        ax.text(x_time[-1], pulse_trace.max(),
                f'T{train_idx} P{pulse_idx - pulse_offsets[train_idx]} ',
                va='top', ha='right')
        ax.tick_params(labelbottom=False)
        ax.set_ylabel(signal_name)

        ax.vlines(edges[pulse_idx, signal_idx, :], *ax.get_ylim(), color='red', linewidth=1)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Digitized channel spectra

 %% Cell type:code id: tags:

 ``` python
 for i, det_name in enumerate(remi['detector'].keys()):
    fig = plt.figure(num=20+i, figsize=(9.5, 6))

    edges = edges_by_det[det_name]

    min_edge = edges[np.isfinite(edges)].min()
    max_edge = edges[np.isfinite(edges)].max()

    grid = fig.add_gridspec(ncols=3, nrows=3, left=0.08, right=0.98, top=0.95, hspace=0.4)

    fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')

    numx = fig.add_subplot(grid[0, 0])
    numx.set_title('Edges per pulse')

    agg_window = num_pulses // 60
    max_num_edges = 0.0
    max_spectral_intensity = 0
    hist_axs = []

    for edge_idx, edge_name in enumerate(['u1', 'u2', 'v1', 'v2', 'w1', 'w2', 'mcp']):
        num_edges = np.isfinite(edges[:, edge_idx, :]).sum(axis=1)
        num_edges = num_edges[:((len(num_edges) // agg_window) * agg_window)]
        num_edges = num_edges.reshape(-1, agg_window).mean(axis=1)

        if edge_idx < 6:
            plot_kwargs = dict(c=f'C{edge_idx}', ls='solid', lw=1.0)
        else:
            plot_kwargs = dict(c='k', ls='dashed', lw=1.0)

        numx.plot(np.arange(len(num_edges)) * agg_window, num_edges, label=edge_name, **plot_kwargs)
        max_num_edges = max(max_num_edges, num_edges.max())

        cur_edges = edges[:, edge_idx, :].flatten()

        if edge_idx < 6:
            row = 1 + edge_idx % 2
            col = edge_idx // 2
        else:
            row = 0
            col = np.s_[1:3]

        ax = fig.add_subplot(grid[row, col])
        ax.set_title(f'TOF spectrum: {edge_name}')
        y, _, _ = ax.hist(cur_edges[np.isfinite(cur_edges)], bins=int((max_edge - min_edge) // 5),
                          range=(min_edge, max_edge), color=plot_kwargs['c'], histtype='step', linewidth=1)
        hist_axs.append(ax)

        max_spectral_intensity = max(max_spectral_intensity, y.max())

    numx.tick_params(labelbottom=False)
    numx.set_ylim(0, 1.2*max_num_edges)

    for ax in hist_axs:
        ax.set_ylim(0, max_spectral_intensity*1.1)
        ax.ticklabel_format(axis='y', style='sci', scilimits=(0, 3))
 pass
 ```

 %% Cell type:markdown id: tags:

 # Detector diagnostics

 %% Cell type:code id: tags:

 ``` python
 for i, det_name in enumerate(remi['detector'].keys()):
    edges = edges_by_det[det_name]

    sort = remi.get_dld_sorter(det_name)

    sum_shifts = sort.sum_shifts if sort.sum_shifts != (0.0, 0.0, 0.0) else None

-    is_valid = remi.get_presort_mask(edges, edge_idx=0,
+    is_valid = remi.get_presort_mask(edges, edge_idx=0, w=not quad_anode,
                                     sum_limit=max(sort.uncorrected_time_sum_half_widths),
                                     sum_shifts=sum_shifts)
    signals, sums = remi.get_signals_and_sums(edges, indices=sort.channel_indices, sum_shifts=sum_shifts,
                                              mask=is_valid)
    fig = plot_detector_diagnostics(signals=signals, sums=sums, fig_num=30+i, im_scale=1.5,
                                    sum_range=max(sort.uncorrected_time_sum_half_widths),
                                    sorter=sort)
    fig.text(0.02, 0.98, det_name.upper() + ' before corrections', rotation=90, ha='left', va='top', size='x-large')

    if remi['detector'][det_name]['use_sum_correction'] or remi['detector'][det_name]['use_pos_correction']:
        n_masked = is_valid.sum()
        signals = np.full((n_masked, 3), np.nan, dtype=np.float64)
        sums = np.full((n_masked, 3), np.nan, dtype=np.float64)

        sort.correct(edges[is_valid], signals, sums)
        fig = plot_detector_diagnostics(signals=signals, sums=sums, fig_num=40+i, im_scale=1.5,
                                        sum_range=max(sort.uncorrected_time_sum_half_widths),
                                        sorter=sort)
        fig.text(0.02, 0.98, det_name.upper() + ' after corrections', rotation=90, ha='left', va='top', size='x-large')
 pass
 ```

 %% Cell type:markdown id: tags:

 # Hit reconstruction

 %% Cell type:code id: tags:

 ``` python
 psh.set_default_context('processes', num_workers=remi.get_num_workers(mp_rec_hits))

 signals_by_det = {}
 hits_by_det = {}
 hit_counts_by_det = {}

 for det_name, det in remi['detector'].items():
    edges = edges_by_det[det_name]

    signals = psh.alloc(shape=(num_pulses, 50), dtype=signal_dt, fill=np.nan)
    hits = psh.alloc(shape=(num_pulses, 50), dtype=hit_dt, fill=(np.nan, np.nan, np.nan, -1))
    hit_counts = psh.alloc(shape=len(dc.train_ids), dtype=np.uint32)

    def prepare_hit_worker(worker_id):
        sort = remi.get_dld_sorter(det_name)
        yield

    @psh.with_init(prepare_hit_worker)
    def reconstruct_hits(worker_id, index, train_id):
        hit_counts[index] += sort.run_on_train(
            edges, signals, hits, pulse_offsets[index], pulse_offsets[index] + pulse_counts[index])

    with timing(f'rec_hits, {det_name}'):
        psh.map(reconstruct_hits, dc.train_ids)

    signals_by_det[det_name] = signals
    hits_by_det[det_name] = hits
    hit_counts_by_det[det_name] = hit_counts
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(num=50+i, figsize=(9.5, 4), ncols=1, clear=True,
                       gridspec_kw=dict(top=0.92, right=0.98, left=0.05, bottom=0.12))

 max_num_hits = 0.0

 for det_name in remi['detector'].keys():
    agg_window = num_pulses // 1000

    num_hits = np.isfinite(hits_by_det[det_name]['x']).sum(axis=1)
    num_hits = num_hits[:(len(num_hits) // agg_window) * agg_window]
    num_hits = num_hits.reshape(-1, agg_window).mean(axis=1)
    max_num_hits = max(max_num_hits, num_hits.max())

    ax.plot(np.arange(0, (num_pulses // agg_window) * agg_window, agg_window), num_hits,
            lw=1, label=det_name.upper())

 ax.set_title('Hits per pulse')
 ax.set_xlabel('Pulse index')
 ax.set_ylim(0, max_num_hits*1.1)
 ax.legend()
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Reconstruction methods
 Each hit may be reconstructed by one of 19 different methods. These differ by the number of real signals across the channels, which could be combined to form the hit. Each of these methods is designed by a number between `0` and `19` (with empty hits using `-1`), which can be found in the `m` key of a hit, e.g.:

 * `0`: All six anode signals and the corresponding MCP signal were found.
 * `4`: One signal on layer `u` is missing, all other signals for this event were found.
 * `18`: Only one anode signal on each layer was found and the MCP signal is missing. There is no way to check whether this combination of signals is actually valid.

 | Method | `u+v+w +mcp` |
 | - | - |
 | 0 | `2+2+2 +1` |
 | 1 | `0+2+2 +1` |
 | 2 | `2+0+2 +1` |
 | 3 | `2+2+0 +1` |
 | 4 | `1+2+2 +1` (2 permutations) |
 | 5 | `2+1+2 +1` (2 permutations) |
 | 6 | `2+2+1 +1` (2 permutations) |
 | 7 | `2+2+2 +0` |
 | 8 | `0+2+2 +0` |
 | 9 | `2+0+2 +0` |
 | 10 | `2+2+0 +0` |
 | 11 | `1+2+2 +0` (2 permutations) |
 | 12 | `2+1+2 +0` (2 permutations) |
 | 13 | `2+2+1 +0` (2 permutations) |
 | 14 | `2+1+1 +1` `1+2+1 +1` `1+1+2 +1` (12 permutations) |
 | 15 | `2+1+0 +1` `2+0+1 +1` `1+2+0 +1` `1+0+2 +1` `0+2+1 +1` `0+1+2 +1` (12 permutations) |
 | 16 | `1+1+1 +1` (8 permutations) |
 | 17 | `2+1+1 +0` `1+2+1 +0` `1+1+2 +0` (12 permutations) |
 | 18 | `1+1+1 +0` (8 permutations) |
 | 19 | `2+1+0 +0` `2+0+1 +0` `1+2+0 +0` `1+0+2 +0` `0+1+2 +0` `0+2+1 +0` (12 permutations) |

 * For hits reconstructed with method `> 10`, extra attention should be given to ensure they add meaningful signal.
 * Any method `> 14` has to considered risky, because neither a time sum nor the position can be checked. If the scale factors and/or `w` shift are not correct, then the number of events reconstructed with the risky methods will increase. They will most likely be *ghost hits*, which do not correspond to actual impacts on the detector.

 %% Cell type:code id: tags:

 ``` python
 for i, det_name in enumerate(remi['detector'].keys()):
    hits = hits_by_det[det_name]

    fig, ax = plt.subplots(num=60+i, figsize=(9.5, 5), ncols=1, clear=True,
                           gridspec_kw=dict(left=0.08, right=0.91, top=0.8))

    fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')

    method_bins = np.bincount(hits['m'][hits['m'] >= 0], minlength=20)
    ax.bar(np.arange(20), method_bins, width=0.5)

    ax.set_xlabel('Reconstruction method')
    ax.set_xlim(-0.5, 19.5)
    ax.set_xticks(np.arange(20))

    ax.set_ylabel('Number of hits')
    ax.set_ylim(0, method_bins.max()*1.05)
    ylims = ax.get_ylim()

    ax.tick_params(which='both', right=True, labelright=True)

    num_risky = method_bins[15:].sum()
    num_total = method_bins.sum()

    ax.text(14.2, method_bins.max(), f'{(100*(num_total-num_risky)/num_total):.2g}%',
            va='top', ha='right', color='black')
    ax.text(14.8, method_bins.max(), f'{(100*num_risky/num_total):.2g}%',
            va='top', ha='left', color='red')

    ax.fill([14.5, 19.5, 19.5, 14.5], [ylims[0], ylims[0], ylims[1], ylims[1]], c='r', alpha=0.2)

    labelx = ax.twiny()
    labelx.set_xlim(*ax.get_xlim())
    labelx.set_xticks(ax.get_xticks())
    labelx.set_xticklabels([
        '2+2+2 +1',
        '0+2+2 +1', '2+0+2 +1', '2+2+0 +1',
        '1+2+2 +1', '2+1+2 +1', '2+2+1 +1',
        '2+2+2 +0',
        '0+2+2 +0', '2+0+2 +0', '2+2+0 +0', '1+2+2 +0', '2+1+2 +0', '2+2+1 +0',
        '2+1+1 +1',
        '2+1+0 +1',
        '1+1+1 +1',
        '2+1+1 +0',
        '1+1+1 +0',
        '2+1+0 +0',
    ], rotation=90)

    min_rel_tick = np.ceil((ax.get_ylim()[0] / num_total) / 0.1) * 0.1
    max_rel_tick = np.floor((method_bins.max() / num_total) / 0.1) * 0.1

    rely = ax.twinx()
    rely.set_ylim(*ax.get_ylim())
    rely.set_yticks(np.arange(0.0, max_rel_tick+0.01, 0.1)*num_total)
    rely.set_yticks(np.arange(0.0, ylims[1]/num_total, 0.02)*num_total, minor=True)
    rely.set_yticklabels([f'{(y/num_total)*100:.0f}%' for y in rely.get_yticks()])
    rely.set_ylabel('Percentage of total hits')
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Detector image and fishes

 %% Cell type:code id: tags:

 ``` python
 for i, det_name in enumerate(remi['detector'].keys()):
    flat_hits = hits_by_det[det_name].reshape(-1)
    flat_hits = flat_hits[np.isfinite(flat_hits[:]['x'])]
    flat_hits = flat_hits[flat_hits['m'] < 10]

    fig = plt.figure(num=70+i, figsize=(9, 13.5))

    fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')
    fig.text(0.02, 0.02, det_name.upper(), rotation=90, ha='left', va='bottom', size='x-large')

    imp = fig.add_axes([0.1 + 0.25/2, 0.56, 0.6, 0.4])
    txp = fig.add_axes([0.1, 0.28, 0.85, 0.22])
    typ = fig.add_axes([0.1, 0.04, 0.85, 0.22])

    im_radius = remi['detector'][det_name]['mcp_radius']*1.1

    min_tof = flat_hits['t'].min()
    max_tof = flat_hits['t'].max()

    imp.hist2d(flat_hits['x'], flat_hits['y'], bins=(256, 256),
               range=[[-im_radius, im_radius], [-im_radius, im_radius]], norm=LogNorm())
    imp.xaxis.set_label_position('top')
    imp.set_xlabel('X / mm')
    imp.set_ylabel('Y / mm')
    imp.tick_params(right=True, labelright=True, top=True, labeltop=True)
    imp.grid()

    for ax, dim_label in zip([txp, typ], ['x', 'y']):
        ax.hist2d(flat_hits['t'], flat_hits[dim_label], bins=(int((max_tof - min_tof) // 5), 256),
                   range=[[min_tof, max_tof], [-im_radius, im_radius]], norm=LogNorm())
        ax.set_ylabel(f'{dim_label.upper()} / mm')

    typ.set_xlabel('Time-of-flight / ns')
    txp.tick_params(bottom=True, labelbottom=False, top=True, labeltop=True, right=True, labelright=True)
    typ.tick_params(right=True, labelright=True, top=True)
 pass
 ```

 %% Cell type:markdown id: tags:

 # Transformed data files

 %% Cell type:code id: tags:

 ``` python
-inp_seq_len = len(dc.files[0].train_ids)
-seq_len = out_seq_len if out_seq_len > 0 else inp_seq_len
+seq_len = out_seq_len if out_seq_len > 0 else len(dc.files[0].train_ids)
+dataset_kwargs = {k[8:]: v for k, v in locals().items() if k.startswith('dataset_compression')}

-# Take first field for detector data as sample for naming
-out_path = (Path(out_folder) / Path(out_filename)).resolve()
+Path(out_folder).mkdir(parents=True, exist_ok=True)
+print('Writing sequence files', flush=True, end='')

 t_write = timing('write_files')
 t_write.__enter__()

-out_path.parent.mkdir(parents=True, exist_ok=True)
+for seq_id, train_mask, pulse_mask in sequence_pulses(dc.train_ids, pulse_counts, pulse_offsets, seq_len):
+    seq_train_ids = dc.train_ids[train_mask]

-dataset_kwargs = {k[8:]: v for k, v in locals().items() if k.startswith('dataset_compression')}
+    with DataFile.from_details(out_folder, out_aggregator, run, seq_id) as outp:
+        outp.create_index(seq_train_ids)

-metadata = dc.run_metadata()
-daq_library_bytes = metadata.get('daqLibrary', '0.0').encode('ascii')
-karabo_framework_bytes = metadata.get('karaboFramework', '0.0').encode('ascii')
-proposal_number = int(proposal) if proposal else metadata.get('proposalNumber', -1)
+        for det_name in remi['detector']:
+            cur_device_id = det_device_id.format(karabo_id=karabo_id, det_name=det_name.upper())

-print('Writing sequence files', flush=True, end='')
+            cur_control_data = outp.create_control_source(cur_device_id)
+            # Manually manipulate the file here, still creates the index properly.
+            remi.attach_detector_config(det_name, cur_control_data.get_run_group())
+            cur_control_data.create_index(len(seq_train_ids))

-for seq_id, start in enumerate(range(0, len(dc.train_ids), seq_len)):
-    train_ids = dc.train_ids[start:start+seq_len]
-    first_train_idx = start
-    final_train_idx = start + len(train_ids) - 1
-
-    train_sel = np.s_[first_train_idx:final_train_idx+1]
-    pulse_sel = np.s_[pulse_offsets[first_train_idx]:(pulse_offsets[final_train_idx]+pulse_counts[final_train_idx])]
-
-    with h5py.File(str(out_path).format(run=run, sequence=seq_id), 'w') as h5out:
-        h5out.create_dataset('INDEX/trainId', data=train_ids, dtype=np.uint64)
-        h5out.create_dataset('INDEX/timestamp', data=np.zeros_like(train_ids, dtype=np.uint64), dtype=np.uint64)
-        h5out.create_dataset('INDEX/flag', data=np.ones_like(train_ids, dtype=np.int32))
+            cur_fast_data = outp.create_instrument_source(f'{cur_device_id}:{det_output_key}')

-        m_data_sources = []
+            if save_raw_triggers:
+                cur_fast_data.create_key('raw.triggers', triggers[pulse_mask],
+                                         chunks=tuple(chunks_triggers), **dataset_kwargs)

-        for det_name in remi['detector']:
-            cur_device_id = det_device_id.format(karabo_id=karabo_id, det_name=det_name.upper())
-            cur_fast_data = f"{cur_device_id}:{det_output_key}"
+            if save_raw_edges:
+                cur_fast_data.create_key('raw.edges', edges_by_det[det_name][pulse_mask],
+                                         chunks=tuple(chunks_edges), **dataset_kwargs)
+
+            if save_rec_signals:
+                cur_fast_data.create_key('rec.signals', signals_by_det[det_name][pulse_mask],
+                                         chunks=tuple(chunks_signals), **dataset_kwargs)
+
+            if save_rec_hits:
+                cur_fast_data.create_key('rec.hits', hits_by_det[det_name][pulse_mask],
+                                         chunks=tuple(chunks_hits), **dataset_kwargs)

-            pipeline_prefixes = set()
+            cur_fast_data.create_index(raw=pulse_counts[train_mask], rec=pulse_counts[train_mask])

-            h5out.create_group(f'CONTROL/{cur_device_id}')
-            run_group = h5out.create_group(f'RUN/{cur_device_id}')
-            remi.attach_detector_config(det_name, run_group)
-
-            h5out.create_group(f'INDEX/{cur_device_id}')
-            h5out.create_dataset(f'INDEX/{cur_device_id}/count',
-                                 data=np.ones_like(train_ids), dtype=np.uint64)
-            h5out.create_dataset(f'INDEX/{cur_device_id}/first',
-                                 data=np.arange(len(train_ids)), dtype=np.uint64)
-
-            m_data_sources.append(('CONTROL', cur_device_id))
-
-            for flag, data, chunks, path in [
-                (save_raw_triggers, triggers, chunks_triggers, 'raw/triggers'),
-                (save_raw_edges, edges_by_det[det_name], chunks_edges, 'raw/edges'),
-                (save_rec_signals, signals_by_det[det_name], chunks_signals, 'rec/signals'),
-                (save_rec_hits, hits_by_det[det_name], chunks_hits, 'rec/hits')
-            ]:
-                if not flag:
-                    continue
-
-                subdata = data[pulse_sel]
-
-                kwargs = dict(data=subdata, chunks=tuple(chunks), **dataset_kwargs) \
-                    if len(subdata) > 0 else dict(shape=(0,), dtype=data.dtype)
-
-                h5out.create_dataset(f'INSTRUMENT/{cur_fast_data}/{path}', **kwargs)
-                pipeline_prefixes.add(path[:path.find('/')])
-
-            for pipeline_prefix in pipeline_prefixes:
-                h5out.create_dataset(f'INDEX/{cur_fast_data}/{pipeline_prefix}/count',
-                                     data=pulse_counts[train_sel])
-                h5out.create_dataset(f'INDEX/{cur_fast_data}/{pipeline_prefix}/first',
-                                     data=pulse_offsets[train_sel] - pulse_offsets[first_train_idx])
-
-                m_data_sources.append(('INSTRUMENT', f'{cur_fast_data}/{pipeline_prefix}'))
-
-        now_str = datetime.now().strftime('%Y%m%dT%H%M%SZ').encode('ascii')
-
-        h5m = h5out.require_group('METADATA')
-        h5m.create_dataset('creationDate', shape=(1,), data=now_str)
-        h5m.create_dataset('daqLibrary', shape=(1,), data=daq_library_bytes)
-        h5m.create_dataset('dataFormatVersion', shape=(1,), data=b'1.0')
-
-        m_data_sources.sort(key=lambda x: f'{x[0]}/{x[1]}')
-        h5m.create_dataset('dataSources/dataSourceId', shape=(len(m_data_sources),),
-                           data=[f'{x[0]}/{x[1]}'.encode('ascii') for x in m_data_sources])
-        h5m.create_dataset('dataSources/deviceId', shape=(len(m_data_sources),),
-                           data=[x[1].encode('ascii') for x in m_data_sources])
-        h5m.create_dataset('dataSources/root', shape=(len(m_data_sources),),
-                           data=[x[0].encode('ascii') for x in m_data_sources])
-
-        h5m.create_dataset('karaboFramework', shape=(1,), data=karabo_framework_bytes)
-        h5m.create_dataset('proposalNumber', shape=(1,), dtype=np.uint32, data=proposal_number)
-        h5m.create_dataset('runNumber', shape=(1,), dtype=np.uint32, data=run)
-        h5m.create_dataset('sequenceNumber', shape=(1,), dtype=np.uint32, data=seq_id)
-        h5m.create_dataset('updateDate', shape=(1,), data=now_str)
+        outp.create_metadata(like=dc)

    print('.', flush=True, end='')

 print('')
 t_write.__exit__()
 ```

--- a/notebooks/ePix100/Characterize_Darks_ePix100_NBC.ipynb
+++ b/notebooks/ePix100/Characterize_Darks_ePix100_NBC.ipynb
 %% Cell type:markdown id: tags:

 # ePix100 Dark Characterization

 Author: European XFEL Detector Group, Version: 2.0

 The following notebook provides dark image analysis and calibration constants of the ePix100 detector.

-Dark characterization evaluates offset and noise of the detector and gives information about bad pixels. Resulting maps are saved as .h5 files for a latter use and injected to calibration DB.
+Dark characterization evaluates offset and noise of the detector and gives information about bad pixels.
+
+Noise and bad pixels maps are calculated independently for each of the 4 ASICs of ePix100, since their noise behaviour can be significantly different.
+
+Common mode correction can be applied to increase sensitivity to noise related bad pixels. Common mode correction is achieved by subtracting the median of all pixels that are read out at the same time along a row/column. This is done in an iterative process, by which a new bad pixels map is calculated and used to mask data as the common mode values across the rows/columns is updated.
+
+Resulting maps are saved as .h5 files for a later use and injected to calibration DB.

 %% Cell type:code id: tags:

 ``` python
-in_folder = '/gpfs/exfel/exp/HED/202230/p900247/raw' # input folder, required
+in_folder = '/gpfs/exfel/exp/HED/202201/p002804/raw' # input folder, required
 out_folder = '' # output folder, required
 metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequence = 0 # sequence file to use
-run = 45 # which run to read data from, required
+run = 281 # which run to read data from, required

 # Parameters for accessing the raw data.
 karabo_id = "HED_IA1_EPX100-1" # karabo karabo_id
 karabo_da = ["EPIX01"]  # data aggregators
 receiver_template = "RECEIVER" # detector receiver template for accessing raw data files
 path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5' # the template to use to access data
 instrument_source_template = '{}/DET/{}:daqOutput' # instrument detector data source in h5files

 # Parameters for the calibration database.
 use_dir_creation_date = True
 cal_db_interface = "tcp://max-exfl016:8020" # calibration DB interface to use
 cal_db_timeout = 300000 # timeout on caldb requests
 db_output = False # Output constants to the calibration database
 local_output = True # Output constants locally

 # Conditions used for injected calibration constants.
 bias_voltage = 200 # Bias voltage
 in_vacuum = False # Detector operated in vacuum
 fix_integration_time = -1 # Integration time. Set to -1 to read from .h5 file
 fix_temperature = -1 # Fixed temperature in Kelvin. Set to -1 to read from .h5 file
 temp_limits = 5 # Limit for parameter Operational temperature
-badpixel_threshold_sigma = 5.  # Bad pixels defined by values outside n times this std from median
+badpixel_threshold_sigma = 5.  # Bad pixels defined by values outside n times this std from median. Default: 5
+CM_N_iterations = 2  # Number of iterations for common mode correction. Set to 0 to skip it

 # Parameters used during selecting raw data trains.
 min_trains = 1 # Minimum number of trains that should be available to process dark constants. Default 1.
 max_trains = 1000  # Maximum number of trains to use for processing dark constants. Set to 0 to use all available trains.

 # Don't delete! myMDC sends this by default.
 operation_mode = ''  # Detector operation mode, optional

 # TODO: delete after removing from calibration_configurations
 db_module = ''  # ID of module in calibration database, this parameter is ignore in the notebook. TODO: remove from calibration_configurations.
 ```

 %% Cell type:code id: tags:

 ``` python
 import os
 import warnings

+import matplotlib.pyplot as plt
 import numpy as np
-import pasha as psh
+from IPython.display import display, Markdown
+from prettytable import PrettyTable
 from extra_data import RunDirectory
+from pathlib import Path

 import XFELDetAna.xfelprofiler as xprof
 from XFELDetAna import xfelpyanatools as xana
+from XFELDetAna import xfelpycaltools as xcal
 from XFELDetAna.plotting.util import prettyPlotting
+
 from cal_tools.enums import BadPixels
+from cal_tools.step_timing import StepTimer
 from cal_tools.epix100 import epix100lib
 from cal_tools.tools import (
    get_dir_creation_date,
    get_pdu_from_db,
-    get_random_db_interface,
    get_report,
    save_const_to_h5,
    send_to_db,
 )
 from iCalibrationDB import Conditions, Constants
 ```

 %% Cell type:code id: tags:

 ``` python
 %matplotlib inline

 warnings.filterwarnings('ignore')

 prettyPlotting = True

 profiler = xprof.Profiler()
 profiler.disable()

 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read report path and create file location tuple to add with the injection
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = f'proposal:{proposal} runs:{run}'
 report = get_report(metadata_folder)

 ped_dir = os.path.join(in_folder, f"r{run:04d}")
 fp_name = path_template.format(run, karabo_da[0]).format(sequence)
 run_dir = RunDirectory(ped_dir)

+print(f"Reading from: {Path(ped_dir) / fp_name}")
 print(f"Run number: {run}")
 print(f"Instrument H5File source: {instrument_src}")
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time.isoformat()} as creation time")
 os.makedirs(out_folder, exist_ok=True)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read sensor size
 sensor_size = run_dir[instrument_src, 'data.image.dims'].as_single_value(reduce_by='first') # (x=768, y=708) expected
 sensor_size = sensor_size[sensor_size != 1] # data.image.dims for old data is [768, 708, 1]
 assert np.array_equal(sensor_size, [768, 708]), 'Unexpected sensor dimensions.'

 # Path to pixels ADC values
 pixels_src = (instrument_src, "data.image.pixels")

 # Specifies total number of images to proceed
 n_trains = run_dir.get_data_counts(*pixels_src).shape[0]

 # Modify n_trains to process based on given maximum
 # and minimun number of trains.
 if max_trains:
    n_trains = min(max_trains, n_trains)

 if n_trains < min_trains:
    raise ValueError(
        f"Less than {min_trains} trains are available in RAW data."
         " Not enough data to process darks.")

 print(f"Number of dark images to analyze: {n_trains}")
 ```

 %% Cell type:code id: tags:

 ``` python
 ctrl_data = epix100lib.epix100Ctrl(
    run_dc=run_dir,
    instrument_src=instrument_src,
    ctrl_src=f"{karabo_id}/DET/CONTROL",
    )
 # Read integration time
 if fix_integration_time == -1:
    integration_time = ctrl_data.get_integration_time()
    integration_time_str_add = ''
 else:
    integration_time = fix_integration_time
    integration_time_str_add = '(manual input)'

 # Read temperature
 if fix_temperature == -1:
    temperature = ctrl_data.get_temprature()
    temperature_k = temperature + 273.15
    temp_str_add = ''
 else:
    temperature_k = fix_temperature
    temperature = fix_temperature - 273.15
    temp_str_add = '(manual input)'

 # Print operating conditions
 print(f"Bias voltage: {bias_voltage} V")
 print(f"Detector integration time: {integration_time} \u03BCs {integration_time_str_add}")
 print(f"Mean temperature: {temperature:0.2f}°C / {temperature_k:0.2f} K {temp_str_add}")
 print(f"Operated in vacuum: {in_vacuum}")
 ```

 %% Cell type:code id: tags:

 ``` python
-# Calculate noise and offset per pixel and global average, std and median
-data_dc = run_dir.select(
-    *pixels_src, require_all=True).select_trains(np.s_[:n_trains])
-
-data = data_dc[pixels_src].ndarray()
+# Passing repetitive code along the notebook to a function
+def stats_calc(data):
+    '''
+    Calculates basic statistical parameters of the input data:
+    mean, standard deviation, median, minimum and maximum.

-noise_data = np.std(data, axis=0)
-offset_data = np.mean(data, axis=0)
+    Returns dictionary with keys: 'mean', 'std', 'median',
+    'min', 'max' and 'legend'.

-noise_mean = np.mean(noise_data)
-noise_sigma = np.std(noise_data)
-noise_median = np.median(noise_data)
-noise_min = np.min(noise_data)
-noise_max = np.max(noise_data)
+    Parameters
+    ----------
+    data : ndarray
+           Data to analyse.
+    '''

-offset_mean = np.mean(offset_data)
-offset_sigma = np.std(offset_data)
-offset_median = np.median(offset_data)
-offset_min = np.min(offset_data)
-offset_max = np.max(offset_data)
+    stats = {}
+    stats['mean'] = np.nanmean(data)
+    stats['std'] = np.nanstd(data)
+    stats['median'] = np.nanmedian(data)
+    stats['min'] = np.nanmin(data)
+    stats['max'] = np.nanmax(data)
+    stats['legend'] = ('mean: ' + str(np.round(stats['mean'],2)) +
+                       '\nstd: ' + str(np.round(stats['std'],2)) +
+                       '\nmedian: ' + str(np.round(stats['median'],2)) +
+                       '\nmin: ' + str(np.round(stats['min'],2)) +
+                       '\nmax: ' + str(np.round(stats['max'],2)))
+    return stats
 ```

 %% Cell type:code id: tags:

 ``` python
-# Create and fill offset and noise maps
+step_timer = StepTimer()
+step_timer.start()
+
+# Read data
+data_dc = run_dir.select(
+    *pixels_src, require_all=True).select_trains(np.s_[:n_trains])
+
+data = data_dc[pixels_src].ndarray()
+
+# Instantiate constant maps to be filled with offset, noise and bad pixels maps
 constant_maps = {}
-constant_maps['Offset'] = offset_data[..., np.newaxis]
-constant_maps['Noise'] = noise_data[..., np.newaxis]
+step_timer.done_step('Darks loaded. Elapsed Time')
 ```

 %% Cell type:markdown id: tags:

-## Offset ###
+## Offset Map

 %% Cell type:code id: tags:

 ``` python
-#************** OFFSET HEAT MAP **************#
+step_timer.start()
+
+# Calculate offset per pixel and store it in constant_maps
+constant_maps['Offset'] = np.nanmean(data, axis=0)[..., np.newaxis]
+
+# Calculate basic statistical parameters
+stats = stats_calc(constant_maps['Offset'].flatten())
+
+# Offset map
 fig = xana.heatmapPlot(
-    constant_maps['Offset'][:, :, 0],
+    constant_maps['Offset'].squeeze(),
    lut_label='[ADU]',
    x_label='Column',
    y_label='Row',
-    x_range=(0, sensor_size[0]),
-    y_range=(0, sensor_size[1]),
-    vmin=max(0, offset_median - badpixel_threshold_sigma*offset_sigma),
-    vmax=min(np.power(2,14)-1, offset_median + badpixel_threshold_sigma*offset_sigma)
+    vmin=max(0, np.round((stats['median']-stats['std'])/250)*250), # Force cb label to be multiple of 250 for reproducibility
+    vmax=min(np.power(2,14)-1, np.round((stats['median']+stats['std'])/250)*250)
 )
 fig.suptitle('Offset Map', x=.48, y=.9, fontsize=16)
 fig.set_size_inches(h=15, w=15)

-#************** OFFSET HISTOGRAM **************#
-binwidth = offset_sigma/100
-ho, co = np.histogram(
+# Offset Histogram
+h, c = np.histogram(
    constant_maps['Offset'].flatten(),
-    bins = np.arange(offset_min, offset_max, binwidth)
+    bins = np.arange(stats['min'], stats['max'], stats['std']/100)
 )
-do = {'x': co[:-1],
-      'y': ho,
-      'y_err': np.sqrt(ho[:]),
-      'drawstyle': 'bars',
-      'color': 'cornflowerblue'}
+d = {'x': c[:-1],
+     'y': h,
+     'color': 'blue'
+     }

 fig = xana.simplePlot(
-    do,
+    d,
    aspect=1.5,
    x_label='Offset [ADU]',
    y_label='Counts',
    x_range=(0, np.power(2,14)-1),
-    y_range=(0, max(ho)*1.1),
+    y_range=(0, max(h)*1.1),
    y_log=True
 )
-fig.suptitle('Offset Distribution', x=.5,y =.92, fontsize=16)
-
-stats_str = (
-    f'mean: {np.round(offset_mean,2)}\n'
-    f'std : {np.round(offset_sigma,2)}\n'
-    f'median: {np.round(offset_median,2)}\n'
-    f'min: {np.round(offset_min,2)}\n'
-    f'max: {np.round(offset_max,2)}'
-)
 fig.text(
-    s=stats_str,
+    s=stats['legend'],
    x=.7,
    y=.7,
    fontsize=14,
-    bbox=dict(facecolor='yellow', edgecolor='black', alpha=.1));
+    bbox=dict(facecolor='yellow', edgecolor='black', alpha=.1)
+)
+fig.suptitle('Offset Map Histogram', x=.5,y =.92, fontsize=16)
+
+step_timer.done_step('Offset Map created. Elapsed Time')
 ```

 %% Cell type:markdown id: tags:

-## Noise ###
+## Noise Map

 %% Cell type:code id: tags:

 ``` python
-#************** NOISE HEAT MAP **************#
+step_timer.start()
+
+# Calculate noise per pixel and store it in constant_maps
+constant_maps['Noise'] = np.nanstd(data, axis=0)[..., np.newaxis]
+
+# Calculate basic statistical parameters
+stats = stats_calc(constant_maps['Noise'].flatten())
+
+# Noise heat map
 fig = xana.heatmapPlot(
-    constant_maps['Noise'][:, :, 0],
+    constant_maps['Noise'].squeeze(),
    lut_label='[ADU]',
    x_label='Column',
    y_label='Row',
-    x_range=(0, sensor_size[0]),
-    y_range=(0, sensor_size[1]),
-    vmin=max(0, noise_median - badpixel_threshold_sigma*noise_sigma),
-    vmax=noise_median + badpixel_threshold_sigma*noise_sigma
+    vmin=max(0, np.round((stats['median'] - badpixel_threshold_sigma*stats['std']))),
+    vmax=np.round(stats['median'] + badpixel_threshold_sigma*stats['std'])
 )
 fig.suptitle('Noise Map', x=.5, y=.9, fontsize=16)
 fig.set_size_inches(h=15, w=15)

-#************** NOISE HISTOGRAM **************#
-binwidth = noise_sigma/100
-hn, cn = np.histogram(constant_maps['Noise'].flatten(),
-                      bins=np.arange((min(constant_maps['Noise'].flatten())),
-                                 max(constant_maps['Noise'].flatten()) + binwidth,
-                                 binwidth))
-dn = {'x': cn[:-1],
-      'y': hn,
-      'y_err': np.sqrt(hn[:]),
-      'drawstyle': 'bars',
-      'color': 'cornflowerblue'}
+# Calculate overall noise histogram
+bins = np.arange(max(0, stats['mean'] - badpixel_threshold_sigma*stats['std']),
+                 stats['mean'] + badpixel_threshold_sigma*stats['std'],
+                 stats['std']/100)
+
+h, c = np.histogram(
+    constant_maps['Noise'].flatten(),
+    bins = bins
+)
+d = [{'x': c[:-1],
+      'y': h,
+      'color': 'black',
+      'label': 'Total'
+     }]
+
+# Calculate per ASIC histogram
+asic = []
+asic.append({'noise': constant_maps['Noise'][:int(sensor_size[1]//2), int(sensor_size[0]//2):],
+            'label': 'ASIC 0 (bottom right)',
+            'color': 'blue'})
+asic.append({'noise': constant_maps['Noise'][int(sensor_size[1]//2):, int(sensor_size[0]//2):],
+            'label': 'ASIC 1 (top right)',
+            'color': 'red'})
+asic.append({'noise': constant_maps['Noise'][int(sensor_size[1]//2):, :int(sensor_size[0]//2)],
+            'label': 'ASIC 2 (top left)',
+            'color': 'green'})
+asic.append({'noise': constant_maps['Noise'][:int(sensor_size[1]//2), :int(sensor_size[0]//2)],
+            'label': 'ASIC 3 (bottom left)',
+            'color': 'magenta'})
+
+min_std = np.inf
+for a in asic:
+    h, c = np.histogram(a['noise'].flatten(), bins=bins)
+    d.append({'x': c[:-1], 'y': h, 'color': a['color'], 'label': a['label']})
+    min_std = np.nanmin((min_std, np.nanstd(a['noise'])))
+    print(a['label'][:6] +
+          ': median = ' + "{:.2f}".format(np.round(np.nanmedian(a['noise']),2)) +
+          ' | std = ' + "{:.2f}".format(np.round(np.nanstd(a['noise']),2)) +
+          a['label'][6:])
+
+arg_max_median = 0
+for i in range(0,np.size(d)):
+    arg_max_median = np.max((arg_max_median, np.argmax(d[i]['y'])))

+# Plot noise histogram
 fig = xana.simplePlot(
-    dn,
+    d,
    aspect=1.5,
    x_label='Noise [ADU]',
    y_label='Counts',
-    x_range=(max(0, noise_median - badpixel_threshold_sigma*noise_sigma),
-             noise_median + badpixel_threshold_sigma*noise_sigma),
-    y_range=(0, max(hn)*1.1),
-    y_log=True
+    x_range=(max(0, stats['median'] - badpixel_threshold_sigma*stats['std']),
+             stats['median'] + badpixel_threshold_sigma*stats['std']),
+    y_range=(0, max(d[0]['y'])*1.1),
 )
-fig.suptitle('Noise Distribution',x=.5,y=.92,fontsize=16);
+plt.grid(linestyle = ':')
+leg = fig.legend(bbox_to_anchor=(.42, .88),fontsize = 14)

-stats_str = (
-    f'mean: {np.round(noise_mean,2)}\n'
-    f'std: {np.round(noise_sigma,2)}\n'
-    f'median: {np.round(noise_median,2)}\n'
-    f'min: {np.round(noise_min,2)}\n'
-    f'max: {np.round(noise_max,2)}'
-)
 fig.text(
-    s=stats_str,
-    x=.7,
+    s=stats['legend'],
+    x=.75,
    y=.7,
    fontsize=14,
-    bbox=dict(facecolor='yellow', edgecolor='black', alpha=.1));
+    bbox=dict(facecolor='yellow', edgecolor='black', alpha=.1)
+)
+fig.suptitle('Noise Map Histogram',x=.5,y=.92,fontsize=16)
+
+step_timer.done_step('\nNoise Map created. Elapsed Time')
 ```

 %% Cell type:markdown id: tags:

-## Bad Pixels ###
+## Bad Pixels

-The bad pixel map is deduced by comparing offset and noise of each pixel ($v_i$) against the median value of the respective maps ($v_k$):
+The bad pixel map is deduced by comparing offset and noise of each pixel ($v_i$) against the median value of the respective maps for each ASIC ($v_k$):

 $$
 v_i > \mathrm{median}(v_{k}) + n \sigma_{v_{k}}
 $$
 or
 $$
 v_i < \mathrm{median}(v_{k}) - n \sigma_{v_{k}}
 $$

 Values are encoded in a 32 bit mask, where for the dark image deduced bad pixels the following non-zero entries are relevant:

 %% Cell type:code id: tags:

 ``` python
 def print_bp_entry(bp):
    '''
    Prints bad pixels bit encoding.

    Parameters
    ----------
    bp : enum 'BadPixels'
    '''
-    print('{:<30s} {:032b}'.format(bp.name, bp.value))
+    print('{:<23s}: {:032b} ({})'.format(bp.name, bp.value, bp.real))

 print_bp_entry(BadPixels.OFFSET_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.NOISE_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.OFFSET_NOISE_EVAL_ERROR)
 ```

 %% Cell type:code id: tags:

 ``` python
-def eval_bpidx(const_map, n_sigma):
+def eval_bpidx(const_map, n_sigma, block_size):
    '''
-    Evaluates bad pixels by comparing offset and noise of each
-    pixel against the median value of the respective maps.
+    Evaluates bad pixels by comparing the average offset and
+    noise of each pixel against the median value of the
+    respective maps of each ASIC.

-    Returns boolean array
+    Returns boolean array.

    Parameters
    ----------
    const_map : ndarray
                Offset or noise constant map to input.
    n_sigma : float
              Standard deviation multiplicity interval outside
              which bad pixels are defined.
+    block_size : ndarray
+                 dimensions ([x,y]) of each ASIC.
    '''

-    mdn = np.nanmedian(const_map)
-    std = np.nanstd(const_map)
-    idx = ( (const_map > mdn + n_sigma*std)
-          | (const_map < mdn - n_sigma*std) )
+    blocks = {} # Each block corresponds to 1 ASIC
+    blocks['0'] = const_map[:int(block_size[1]), int(block_size[0]):] # bottom right
+    blocks['1'] = const_map[int(block_size[1]):, int(block_size[0]):] # top right
+    blocks['2'] = const_map[int(block_size[1]):, :int(block_size[0])] # top left
+    blocks['3'] = const_map[:int(block_size[1]), :int(block_size[0])] # bottom left

-    return idx
+    idx = {}
+    for b in range(0, len(blocks)):
+        mdn = np.nanmedian(blocks[str(b)])
+        std = np.nanstd(blocks[str(b)])
+        idx[str(b)] = ( (blocks[str(b)] > mdn + n_sigma*std) | (blocks[str(b)] < mdn - n_sigma*std) )
+
+    idx_output = np.zeros(const_map.shape, dtype=bool)
+    idx_output[:int(block_size[1]), int(block_size[0]):] = idx['0']
+    idx_output[int(block_size[1]):, int(block_size[0]):] = idx['1']
+    idx_output[int(block_size[1]):, :int(block_size[0])] = idx['2']
+    idx_output[:int(block_size[1]), :int(block_size[0])] = idx['3']
+
+    return idx_output
 ```

 %% Cell type:code id: tags:

 ``` python
-# Add BadPixels to constant_maps
-constant_maps['BadPixelsDark'] = np.zeros(constant_maps['Offset'].shape, np.uint32)
+def bp_analysis_table(bad_pixels_map, title = ''):
+    '''
+    Prints a table with short analysis of the number and
+    percentage of bad pixels on count of offset or noise
+    out of threshold, or evaluation error.

-# Noise related bad pixels
-constant_maps['BadPixelsDark'][eval_bpidx(constant_maps['Noise'], badpixel_threshold_sigma)] = BadPixels.NOISE_OUT_OF_THRESHOLD.value
-constant_maps['BadPixelsDark'][~np.isfinite(constant_maps['Noise'])] = BadPixels.OFFSET_NOISE_EVAL_ERROR.value
+    Returns PrettyTable.

-# Offset related bad pixels
-constant_maps['BadPixelsDark'][eval_bpidx(constant_maps['Offset'], badpixel_threshold_sigma)] = BadPixels.OFFSET_OUT_OF_THRESHOLD.value
-constant_maps['BadPixelsDark'][~np.isfinite(constant_maps['Offset'])] = BadPixels.OFFSET_NOISE_EVAL_ERROR.value
+    Parameters
+    ----------
+    bad_pixels_map : ndarray
+                     Bad pixel map to analyse.
+    title : string, optional
+            Table title to be used.
+    '''

-num_bad_pixels = np.sum(constant_maps['BadPixelsDark']!=0)
+    num_bp = np.sum(bad_pixels_map!=0)
+    offset_bp = np.sum(bad_pixels_map==1)
+    noise_bp = np.sum(bad_pixels_map==2)
+    eval_error_bp = np.sum(bad_pixels_map==4)

-print('Number of Bad Pixels: ' + str(num_bad_pixels) + ' (' + str(np.round(100*num_bad_pixels/(sensor_size[0]*sensor_size[1]),2)) + '%)')
+    t = PrettyTable()
+    t.field_names = [title]
+    t.add_row(['Total number of Bad Pixels : {:<5} ({:<.2f}%)'.format(num_bp, 100*num_bp/np.prod(bad_pixels_map.shape))])
+    t.add_row(['  Offset out of threshold  : {:<5} ({:<.2f}%)'.format(offset_bp, 100*offset_bp/np.prod(bad_pixels_map.shape))])
+    t.add_row(['  Noise out of threshold   : {:<5} ({:<.2f}%)'.format(noise_bp, 100*noise_bp/np.prod(bad_pixels_map.shape))])
+    t.add_row(['  Evaluation error         : {:<5} ({:<.2f}%)'.format(eval_error_bp, 100*eval_error_bp/np.prod(bad_pixels_map.shape))])
+
+    return t
 ```

 %% Cell type:code id: tags:

 ``` python
-#************** BAD PIXELS HEAT MAP **************#
+# Add bad pixels from darks to constant_maps
+constant_maps['BadPixelsDark'] = np.zeros(constant_maps['Offset'].shape, np.uint32)
+
+# Find noise related bad pixels
+constant_maps['BadPixelsDark'][eval_bpidx(constant_maps['Noise'], badpixel_threshold_sigma, sensor_size//2)] = BadPixels.NOISE_OUT_OF_THRESHOLD.value
+constant_maps['BadPixelsDark'][~np.isfinite(constant_maps['Noise'])] = BadPixels.OFFSET_NOISE_EVAL_ERROR.value
+
+# Find offset related bad pixels
+constant_maps['BadPixelsDark'][eval_bpidx(constant_maps['Offset'], badpixel_threshold_sigma, sensor_size//2)] = BadPixels.OFFSET_OUT_OF_THRESHOLD.value
+constant_maps['BadPixelsDark'][~np.isfinite(constant_maps['Offset'])] = BadPixels.OFFSET_NOISE_EVAL_ERROR.value
+
+# Plot Bad Pixels Map
 fig = xana.heatmapPlot(
-    np.log2(constant_maps['BadPixelsDark'][:, :, 0]),
-    lut_label='Bad pixel bit assinged',
+    np.exp(np.nan_to_num(np.log(constant_maps['BadPixelsDark']), neginf=np.nan)).squeeze(), # convert zeros to NaN
+    lut_label='Bad pixel value [ADU]',                                                      # for plotting purposes
    x_label='Column',
    y_label='Row',
    x_range=(0, sensor_size[0]),
    y_range=(0, sensor_size[1])
 )
-fig.suptitle('Bad Pixels Map', x=.5, y=.9, fontsize=16)
+fig.suptitle('Initial Bad Pixels Map', x=.5, y=.9, fontsize=16)
 fig.set_size_inches(h=15, w=15)
+
+step_timer.done_step('Initial Bad Pixels Map created. Elapsed Time')
+print(bp_analysis_table(constant_maps['BadPixelsDark'], title='Initial Bad Pixel Analysis'))
+```
+
+%% Cell type:markdown id: tags:
+
+## Common Mode Correction
+
+Common mode correction is applied here to increase sensitivy to bad pixels. This is done in an iterative process. Each iteration is composed by the follwing steps:
+
+1. Common mode noise is calculated and subtracted from data.
+2. Noise map is recalculated.
+3. Bad pixels are recalulated based on the new noise map.
+4. Data is masked based on the new bad pixels map.
+
+%% Cell type:code id: tags:
+
+``` python
+if CM_N_iterations < 1:
+    print('Common mode correction not applied.')
+else:
+
+    commonModeBlockSize = sensor_size//2
+
+    # Instantiate common mode calculators for column and row CM correction
+    cmCorrection_col = xcal.CommonModeCorrection(
+        sensor_size.tolist(),
+        commonModeBlockSize.tolist(),
+        'col',
+        parallel=False)
+
+    cmCorrection_row = xcal.CommonModeCorrection(
+        sensor_size.tolist(),
+        commonModeBlockSize.tolist(),
+        'row',
+        parallel=False)
 ```

 %% Cell type:code id: tags:

 ``` python
+if CM_N_iterations > 0:
+
+    data = data.astype(float) # This conversion is needed for offset subtraction
+
+    # Subtract Offset
+    data -= constant_maps['Offset'].squeeze()
+
+    noise_map_corrected = np.copy(constant_maps['Noise'])
+
+    bp_offset = [np.sum(constant_maps['BadPixelsDark']==1)]
+    bp_noise = [np.sum(constant_maps['BadPixelsDark']==2)]
+
+    for it in range (0,CM_N_iterations):
+        step_timer.start()
+
+        # Mask bad pixels
+        BadPixels_mask = np.squeeze(constant_maps['BadPixelsDark'] != 0)
+        BadPixels_mask = np.repeat(BadPixels_mask[np.newaxis,...],data.shape[0],axis=0)
+        data[BadPixels_mask] = np.nan
+
+        # Common mode correction
+        data = np.swapaxes(data,0,-1)
+        data = cmCorrection_col.correct(data)
+        data = cmCorrection_row.correct(data)
+        data = np.swapaxes(data,0,-1)
+
+        # Update noise map
+        noise_map_corrected = np.nanstd(data, axis=0)[..., np.newaxis]
+
+        # Update bad pixels map
+        constant_maps['BadPixelsDark'][eval_bpidx(noise_map_corrected, badpixel_threshold_sigma, sensor_size//2)] = BadPixels.NOISE_OUT_OF_THRESHOLD.value
+        bp_offset.append(np.sum(constant_maps['BadPixelsDark']==1))
+        bp_noise.append(np.sum(constant_maps['BadPixelsDark']==2))
+
+        print(bp_analysis_table(constant_maps['BadPixelsDark'], title=f'{it+1} CM correction iterations'))
+        step_timer.done_step('Elapsed Time')
+        print('\n')
+
+    # Apply final bad pixels mask
+    BadPixels_mask = np.broadcast_to(np.squeeze(constant_maps['BadPixelsDark'] != 0), data.shape)
+    data[BadPixels_mask] = np.nan
+
+    it = np.arange(0, CM_N_iterations+1)
+
+    plt.figure()
+    plt.plot(it, np.sum((bp_offset,bp_noise),axis=0), c = 'black', ls = '--', marker = 'o', label = 'Total')
+    plt.plot(it, bp_noise, c = 'red', ls = '--', marker = 'v', label = 'Noise out of threshold')
+    plt.plot(it, bp_offset, c = 'blue', ls = '--', marker = 's',label = 'Offset out of threshold')
+    plt.xticks(it)
+    plt.xlabel('CM correction iteration')
+    plt.ylabel('# Bad Pixels')
+    plt.legend()
+    plt.grid(linestyle = ':')
+```
+
+%% Cell type:code id: tags:
+
+``` python
+if CM_N_iterations > 0:
+    display(Markdown(f'## Common-Mode Corrected Bad Pixels Map\n'))
+```
+
+%% Cell type:code id: tags:
+
+``` python
+if CM_N_iterations > 0:
+
+    # Plot final bad pixels map
+    fig = xana.heatmapPlot(
+        np.exp(np.nan_to_num(np.log(constant_maps['BadPixelsDark']),neginf=np.nan)).squeeze(), # convert zeros to NaN
+        lut_label='Bad pixel value [ADU]',                                                     # for plotting purposes
+        x_label='Column',
+        y_label='Row',
+        x_range=(0, sensor_size[0]),
+        y_range=(0, sensor_size[1])
+    )
+    fig.suptitle('Final Bad Pixels Map', x=.5, y=.9, fontsize=16)
+    fig.set_size_inches(h=15, w=15)
+
+    print(bp_analysis_table(constant_maps['BadPixelsDark'], title='Final Bad Pixels Analysis'))
+```
+
+%% Cell type:markdown id: tags:
+
+## Calibration Constants DB
+
+Send the dark constants to the database and/or save them locally.
+
+%% Cell type:code id: tags:
+
+``` python
 # Save constants to DB
 md = None

 for const_name in constant_maps.keys():
    const = getattr(Constants.ePix100, const_name)()
    const.data = constant_maps[const_name].data

    # Set the operating condition
    condition = Conditions.Dark.ePix100(
        bias_voltage=bias_voltage,
        integration_time=integration_time,
        temperature=temperature_k,
        in_vacuum=in_vacuum)

    for parm in condition.parameters:
        if parm.name == "Sensor Temperature":
            parm.lower_deviation = temp_limits
            parm.upper_deviation = temp_limits

    # Get physical detector unit
    db_module = get_pdu_from_db(
        karabo_id=karabo_id,
        karabo_da=karabo_da,
        constant=const,
        condition=condition,
        cal_db_interface=cal_db_interface,
        snapshot_at=creation_time)[0]

    # Inject or save calibration constants
    if db_output:
        md = send_to_db(
            db_module=db_module,
            karabo_id=karabo_id,
            constant=const,
            condition=condition,
            file_loc=file_loc,
            report_path=report,
            cal_db_interface=cal_db_interface,
            creation_time=creation_time,
            timeout=cal_db_timeout
        )
    if local_output:
        md = save_const_to_h5(
            db_module=db_module,
            karabo_id=karabo_id,
            constant=const,
            condition=condition,
            data=const.data,
            file_loc=file_loc,
            report=report,
            creation_time=creation_time,
            out_folder=out_folder
        )
        print(f"Calibration constant {const_name} is stored locally at {out_folder} \n")

 print("Constants parameter conditions are:\n"
    f"• Bias voltage: {bias_voltage}\n"
    f"• Integration time: {integration_time}\n"
    f"• Temperature: {temperature_k}\n"
    f"• In Vacuum: {in_vacuum}\n"
    f"• Creation time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:markdown id: tags:

 # ePix100 Dark Characterization

 Author: European XFEL Detector Group, Version: 2.0

 The following notebook provides dark image analysis and calibration constants of the ePix100 detector.

-Dark characterization evaluates offset and noise of the detector and gives information about bad pixels. Resulting maps are saved as .h5 files for a latter use and injected to calibration DB.
+Dark characterization evaluates offset and noise of the detector and gives information about bad pixels.
+
+Noise and bad pixels maps are calculated independently for each of the 4 ASICs of ePix100, since their noise behaviour can be significantly different.
+
+Common mode correction can be applied to increase sensitivity to noise related bad pixels. Common mode correction is achieved by subtracting the median of all pixels that are read out at the same time along a row/column. This is done in an iterative process, by which a new bad pixels map is calculated and used to mask data as the common mode values across the rows/columns is updated.
+
+Resulting maps are saved as .h5 files for a later use and injected to calibration DB.

 %% Cell type:code id: tags:

 ``` python
-in_folder = '/gpfs/exfel/exp/HED/202230/p900247/raw' # input folder, required
+in_folder = '/gpfs/exfel/exp/HED/202201/p002804/raw' # input folder, required
 out_folder = '' # output folder, required
 metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequence = 0 # sequence file to use
-run = 45 # which run to read data from, required
+run = 281 # which run to read data from, required

 # Parameters for accessing the raw data.
 karabo_id = "HED_IA1_EPX100-1" # karabo karabo_id
 karabo_da = ["EPIX01"]  # data aggregators
 receiver_template = "RECEIVER" # detector receiver template for accessing raw data files
 path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5' # the template to use to access data
 instrument_source_template = '{}/DET/{}:daqOutput' # instrument detector data source in h5files

 # Parameters for the calibration database.
 use_dir_creation_date = True
 cal_db_interface = "tcp://max-exfl016:8020" # calibration DB interface to use
 cal_db_timeout = 300000 # timeout on caldb requests
 db_output = False # Output constants to the calibration database
 local_output = True # Output constants locally

 # Conditions used for injected calibration constants.
 bias_voltage = 200 # Bias voltage
 in_vacuum = False # Detector operated in vacuum
 fix_integration_time = -1 # Integration time. Set to -1 to read from .h5 file
 fix_temperature = -1 # Fixed temperature in Kelvin. Set to -1 to read from .h5 file
 temp_limits = 5 # Limit for parameter Operational temperature
-badpixel_threshold_sigma = 5.  # Bad pixels defined by values outside n times this std from median
+badpixel_threshold_sigma = 5.  # Bad pixels defined by values outside n times this std from median. Default: 5
+CM_N_iterations = 2  # Number of iterations for common mode correction. Set to 0 to skip it

 # Parameters used during selecting raw data trains.
 min_trains = 1 # Minimum number of trains that should be available to process dark constants. Default 1.
 max_trains = 1000  # Maximum number of trains to use for processing dark constants. Set to 0 to use all available trains.

 # Don't delete! myMDC sends this by default.
 operation_mode = ''  # Detector operation mode, optional

 # TODO: delete after removing from calibration_configurations
 db_module = ''  # ID of module in calibration database, this parameter is ignore in the notebook. TODO: remove from calibration_configurations.
 ```

 %% Cell type:code id: tags:

 ``` python
 import os
 import warnings

+import matplotlib.pyplot as plt
 import numpy as np
-import pasha as psh
+from IPython.display import display, Markdown
+from prettytable import PrettyTable
 from extra_data import RunDirectory
+from pathlib import Path

 import XFELDetAna.xfelprofiler as xprof
 from XFELDetAna import xfelpyanatools as xana
+from XFELDetAna import xfelpycaltools as xcal
 from XFELDetAna.plotting.util import prettyPlotting
+
 from cal_tools.enums import BadPixels
+from cal_tools.step_timing import StepTimer
 from cal_tools.epix100 import epix100lib
 from cal_tools.tools import (
    get_dir_creation_date,
    get_pdu_from_db,
-    get_random_db_interface,
    get_report,
    save_const_to_h5,
    send_to_db,
 )
 from iCalibrationDB import Conditions, Constants
 ```

 %% Cell type:code id: tags:

 ``` python
 %matplotlib inline

 warnings.filterwarnings('ignore')

 prettyPlotting = True

 profiler = xprof.Profiler()
 profiler.disable()

 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read report path and create file location tuple to add with the injection
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = f'proposal:{proposal} runs:{run}'
 report = get_report(metadata_folder)

 ped_dir = os.path.join(in_folder, f"r{run:04d}")
 fp_name = path_template.format(run, karabo_da[0]).format(sequence)
 run_dir = RunDirectory(ped_dir)

+print(f"Reading from: {Path(ped_dir) / fp_name}")
 print(f"Run number: {run}")
 print(f"Instrument H5File source: {instrument_src}")
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time.isoformat()} as creation time")
 os.makedirs(out_folder, exist_ok=True)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read sensor size
 sensor_size = run_dir[instrument_src, 'data.image.dims'].as_single_value(reduce_by='first') # (x=768, y=708) expected
 sensor_size = sensor_size[sensor_size != 1] # data.image.dims for old data is [768, 708, 1]
 assert np.array_equal(sensor_size, [768, 708]), 'Unexpected sensor dimensions.'

 # Path to pixels ADC values
 pixels_src = (instrument_src, "data.image.pixels")

 # Specifies total number of images to proceed
 n_trains = run_dir.get_data_counts(*pixels_src).shape[0]

 # Modify n_trains to process based on given maximum
 # and minimun number of trains.
 if max_trains:
    n_trains = min(max_trains, n_trains)

 if n_trains < min_trains:
    raise ValueError(
        f"Less than {min_trains} trains are available in RAW data."
         " Not enough data to process darks.")

 print(f"Number of dark images to analyze: {n_trains}")
 ```

 %% Cell type:code id: tags:

 ``` python
 ctrl_data = epix100lib.epix100Ctrl(
    run_dc=run_dir,
    instrument_src=instrument_src,
    ctrl_src=f"{karabo_id}/DET/CONTROL",
    )
 # Read integration time
 if fix_integration_time == -1:
    integration_time = ctrl_data.get_integration_time()
    integration_time_str_add = ''
 else:
    integration_time = fix_integration_time
    integration_time_str_add = '(manual input)'

 # Read temperature
 if fix_temperature == -1:
    temperature = ctrl_data.get_temprature()
    temperature_k = temperature + 273.15
    temp_str_add = ''
 else:
    temperature_k = fix_temperature
    temperature = fix_temperature - 273.15
    temp_str_add = '(manual input)'

 # Print operating conditions
 print(f"Bias voltage: {bias_voltage} V")
 print(f"Detector integration time: {integration_time} \u03BCs {integration_time_str_add}")
 print(f"Mean temperature: {temperature:0.2f}°C / {temperature_k:0.2f} K {temp_str_add}")
 print(f"Operated in vacuum: {in_vacuum}")
 ```

 %% Cell type:code id: tags:

 ``` python
-# Calculate noise and offset per pixel and global average, std and median
-data_dc = run_dir.select(
-    *pixels_src, require_all=True).select_trains(np.s_[:n_trains])
-
-data = data_dc[pixels_src].ndarray()
+# Passing repetitive code along the notebook to a function
+def stats_calc(data):
+    '''
+    Calculates basic statistical parameters of the input data:
+    mean, standard deviation, median, minimum and maximum.

-noise_data = np.std(data, axis=0)
-offset_data = np.mean(data, axis=0)
+    Returns dictionary with keys: 'mean', 'std', 'median',
+    'min', 'max' and 'legend'.

-noise_mean = np.mean(noise_data)
-noise_sigma = np.std(noise_data)
-noise_median = np.median(noise_data)
-noise_min = np.min(noise_data)
-noise_max = np.max(noise_data)
+    Parameters
+    ----------
+    data : ndarray
+           Data to analyse.
+    '''

-offset_mean = np.mean(offset_data)
-offset_sigma = np.std(offset_data)
-offset_median = np.median(offset_data)
-offset_min = np.min(offset_data)
-offset_max = np.max(offset_data)
+    stats = {}
+    stats['mean'] = np.nanmean(data)
+    stats['std'] = np.nanstd(data)
+    stats['median'] = np.nanmedian(data)
+    stats['min'] = np.nanmin(data)
+    stats['max'] = np.nanmax(data)
+    stats['legend'] = ('mean: ' + str(np.round(stats['mean'],2)) +
+                       '\nstd: ' + str(np.round(stats['std'],2)) +
+                       '\nmedian: ' + str(np.round(stats['median'],2)) +
+                       '\nmin: ' + str(np.round(stats['min'],2)) +
+                       '\nmax: ' + str(np.round(stats['max'],2)))
+    return stats
 ```

 %% Cell type:code id: tags:

 ``` python
-# Create and fill offset and noise maps
+step_timer = StepTimer()
+step_timer.start()
+
+# Read data
+data_dc = run_dir.select(
+    *pixels_src, require_all=True).select_trains(np.s_[:n_trains])
+
+data = data_dc[pixels_src].ndarray()
+
+# Instantiate constant maps to be filled with offset, noise and bad pixels maps
 constant_maps = {}
-constant_maps['Offset'] = offset_data[..., np.newaxis]
-constant_maps['Noise'] = noise_data[..., np.newaxis]
+step_timer.done_step('Darks loaded. Elapsed Time')
 ```

 %% Cell type:markdown id: tags:

-## Offset ###
+## Offset Map

 %% Cell type:code id: tags:

 ``` python
-#************** OFFSET HEAT MAP **************#
+step_timer.start()
+
+# Calculate offset per pixel and store it in constant_maps
+constant_maps['Offset'] = np.nanmean(data, axis=0)[..., np.newaxis]
+
+# Calculate basic statistical parameters
+stats = stats_calc(constant_maps['Offset'].flatten())
+
+# Offset map
 fig = xana.heatmapPlot(
-    constant_maps['Offset'][:, :, 0],
+    constant_maps['Offset'].squeeze(),
    lut_label='[ADU]',
    x_label='Column',
    y_label='Row',
-    x_range=(0, sensor_size[0]),
-    y_range=(0, sensor_size[1]),
-    vmin=max(0, offset_median - badpixel_threshold_sigma*offset_sigma),
-    vmax=min(np.power(2,14)-1, offset_median + badpixel_threshold_sigma*offset_sigma)
+    vmin=max(0, np.round((stats['median']-stats['std'])/250)*250), # Force cb label to be multiple of 250 for reproducibility
+    vmax=min(np.power(2,14)-1, np.round((stats['median']+stats['std'])/250)*250)
 )
 fig.suptitle('Offset Map', x=.48, y=.9, fontsize=16)
 fig.set_size_inches(h=15, w=15)

-#************** OFFSET HISTOGRAM **************#
-binwidth = offset_sigma/100
-ho, co = np.histogram(
+# Offset Histogram
+h, c = np.histogram(
    constant_maps['Offset'].flatten(),
-    bins = np.arange(offset_min, offset_max, binwidth)
+    bins = np.arange(stats['min'], stats['max'], stats['std']/100)
 )
-do = {'x': co[:-1],
-      'y': ho,
-      'y_err': np.sqrt(ho[:]),
-      'drawstyle': 'bars',
-      'color': 'cornflowerblue'}
+d = {'x': c[:-1],
+     'y': h,
+     'color': 'blue'
+     }

 fig = xana.simplePlot(
-    do,
+    d,
    aspect=1.5,
    x_label='Offset [ADU]',
    y_label='Counts',
    x_range=(0, np.power(2,14)-1),
-    y_range=(0, max(ho)*1.1),
+    y_range=(0, max(h)*1.1),
    y_log=True
 )
-fig.suptitle('Offset Distribution', x=.5,y =.92, fontsize=16)
-
-stats_str = (
-    f'mean: {np.round(offset_mean,2)}\n'
-    f'std : {np.round(offset_sigma,2)}\n'
-    f'median: {np.round(offset_median,2)}\n'
-    f'min: {np.round(offset_min,2)}\n'
-    f'max: {np.round(offset_max,2)}'
-)
 fig.text(
-    s=stats_str,
+    s=stats['legend'],
    x=.7,
    y=.7,
    fontsize=14,
-    bbox=dict(facecolor='yellow', edgecolor='black', alpha=.1));
+    bbox=dict(facecolor='yellow', edgecolor='black', alpha=.1)
+)
+fig.suptitle('Offset Map Histogram', x=.5,y =.92, fontsize=16)
+
+step_timer.done_step('Offset Map created. Elapsed Time')
 ```

 %% Cell type:markdown id: tags:

-## Noise ###
+## Noise Map

 %% Cell type:code id: tags:

 ``` python
-#************** NOISE HEAT MAP **************#
+step_timer.start()
+
+# Calculate noise per pixel and store it in constant_maps
+constant_maps['Noise'] = np.nanstd(data, axis=0)[..., np.newaxis]
+
+# Calculate basic statistical parameters
+stats = stats_calc(constant_maps['Noise'].flatten())
+
+# Noise heat map
 fig = xana.heatmapPlot(
-    constant_maps['Noise'][:, :, 0],
+    constant_maps['Noise'].squeeze(),
    lut_label='[ADU]',
    x_label='Column',
    y_label='Row',
-    x_range=(0, sensor_size[0]),
-    y_range=(0, sensor_size[1]),
-    vmin=max(0, noise_median - badpixel_threshold_sigma*noise_sigma),
-    vmax=noise_median + badpixel_threshold_sigma*noise_sigma
+    vmin=max(0, np.round((stats['median'] - badpixel_threshold_sigma*stats['std']))),
+    vmax=np.round(stats['median'] + badpixel_threshold_sigma*stats['std'])
 )
 fig.suptitle('Noise Map', x=.5, y=.9, fontsize=16)
 fig.set_size_inches(h=15, w=15)

-#************** NOISE HISTOGRAM **************#
-binwidth = noise_sigma/100
-hn, cn = np.histogram(constant_maps['Noise'].flatten(),
-                      bins=np.arange((min(constant_maps['Noise'].flatten())),
-                                 max(constant_maps['Noise'].flatten()) + binwidth,
-                                 binwidth))
-dn = {'x': cn[:-1],
-      'y': hn,
-      'y_err': np.sqrt(hn[:]),
-      'drawstyle': 'bars',
-      'color': 'cornflowerblue'}
+# Calculate overall noise histogram
+bins = np.arange(max(0, stats['mean'] - badpixel_threshold_sigma*stats['std']),
+                 stats['mean'] + badpixel_threshold_sigma*stats['std'],
+                 stats['std']/100)
+
+h, c = np.histogram(
+    constant_maps['Noise'].flatten(),
+    bins = bins
+)
+d = [{'x': c[:-1],
+      'y': h,
+      'color': 'black',
+      'label': 'Total'
+     }]
+
+# Calculate per ASIC histogram
+asic = []
+asic.append({'noise': constant_maps['Noise'][:int(sensor_size[1]//2), int(sensor_size[0]//2):],
+            'label': 'ASIC 0 (bottom right)',
+            'color': 'blue'})
+asic.append({'noise': constant_maps['Noise'][int(sensor_size[1]//2):, int(sensor_size[0]//2):],
+            'label': 'ASIC 1 (top right)',
+            'color': 'red'})
+asic.append({'noise': constant_maps['Noise'][int(sensor_size[1]//2):, :int(sensor_size[0]//2)],
+            'label': 'ASIC 2 (top left)',
+            'color': 'green'})
+asic.append({'noise': constant_maps['Noise'][:int(sensor_size[1]//2), :int(sensor_size[0]//2)],
+            'label': 'ASIC 3 (bottom left)',
+            'color': 'magenta'})
+
+min_std = np.inf
+for a in asic:
+    h, c = np.histogram(a['noise'].flatten(), bins=bins)
+    d.append({'x': c[:-1], 'y': h, 'color': a['color'], 'label': a['label']})
+    min_std = np.nanmin((min_std, np.nanstd(a['noise'])))
+    print(a['label'][:6] +
+          ': median = ' + "{:.2f}".format(np.round(np.nanmedian(a['noise']),2)) +
+          ' | std = ' + "{:.2f}".format(np.round(np.nanstd(a['noise']),2)) +
+          a['label'][6:])
+
+arg_max_median = 0
+for i in range(0,np.size(d)):
+    arg_max_median = np.max((arg_max_median, np.argmax(d[i]['y'])))

+# Plot noise histogram
 fig = xana.simplePlot(
-    dn,
+    d,
    aspect=1.5,
    x_label='Noise [ADU]',
    y_label='Counts',
-    x_range=(max(0, noise_median - badpixel_threshold_sigma*noise_sigma),
-             noise_median + badpixel_threshold_sigma*noise_sigma),
-    y_range=(0, max(hn)*1.1),
-    y_log=True
+    x_range=(max(0, stats['median'] - badpixel_threshold_sigma*stats['std']),
+             stats['median'] + badpixel_threshold_sigma*stats['std']),
+    y_range=(0, max(d[0]['y'])*1.1),
 )
-fig.suptitle('Noise Distribution',x=.5,y=.92,fontsize=16);
+plt.grid(linestyle = ':')
+leg = fig.legend(bbox_to_anchor=(.42, .88),fontsize = 14)

-stats_str = (
-    f'mean: {np.round(noise_mean,2)}\n'
-    f'std: {np.round(noise_sigma,2)}\n'
-    f'median: {np.round(noise_median,2)}\n'
-    f'min: {np.round(noise_min,2)}\n'
-    f'max: {np.round(noise_max,2)}'
-)
 fig.text(
-    s=stats_str,
-    x=.7,
+    s=stats['legend'],
+    x=.75,
    y=.7,
    fontsize=14,
-    bbox=dict(facecolor='yellow', edgecolor='black', alpha=.1));
+    bbox=dict(facecolor='yellow', edgecolor='black', alpha=.1)
+)
+fig.suptitle('Noise Map Histogram',x=.5,y=.92,fontsize=16)
+
+step_timer.done_step('\nNoise Map created. Elapsed Time')
 ```

 %% Cell type:markdown id: tags:

-## Bad Pixels ###
+## Bad Pixels

-The bad pixel map is deduced by comparing offset and noise of each pixel ($v_i$) against the median value of the respective maps ($v_k$):
+The bad pixel map is deduced by comparing offset and noise of each pixel ($v_i$) against the median value of the respective maps for each ASIC ($v_k$):

 $$
 v_i > \mathrm{median}(v_{k}) + n \sigma_{v_{k}}
 $$
 or
 $$
 v_i < \mathrm{median}(v_{k}) - n \sigma_{v_{k}}
 $$

 Values are encoded in a 32 bit mask, where for the dark image deduced bad pixels the following non-zero entries are relevant:

 %% Cell type:code id: tags:

 ``` python
 def print_bp_entry(bp):
    '''
    Prints bad pixels bit encoding.

    Parameters
    ----------
    bp : enum 'BadPixels'
    '''
-    print('{:<30s} {:032b}'.format(bp.name, bp.value))
+    print('{:<23s}: {:032b} ({})'.format(bp.name, bp.value, bp.real))

 print_bp_entry(BadPixels.OFFSET_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.NOISE_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.OFFSET_NOISE_EVAL_ERROR)
 ```

 %% Cell type:code id: tags:

 ``` python
-def eval_bpidx(const_map, n_sigma):
+def eval_bpidx(const_map, n_sigma, block_size):
    '''
-    Evaluates bad pixels by comparing offset and noise of each
-    pixel against the median value of the respective maps.
+    Evaluates bad pixels by comparing the average offset and
+    noise of each pixel against the median value of the
+    respective maps of each ASIC.

-    Returns boolean array
+    Returns boolean array.

    Parameters
    ----------
    const_map : ndarray
                Offset or noise constant map to input.
    n_sigma : float
              Standard deviation multiplicity interval outside
              which bad pixels are defined.
+    block_size : ndarray
+                 dimensions ([x,y]) of each ASIC.
    '''

-    mdn = np.nanmedian(const_map)
-    std = np.nanstd(const_map)
-    idx = ( (const_map > mdn + n_sigma*std)
-          | (const_map < mdn - n_sigma*std) )
+    blocks = {} # Each block corresponds to 1 ASIC
+    blocks['0'] = const_map[:int(block_size[1]), int(block_size[0]):] # bottom right
+    blocks['1'] = const_map[int(block_size[1]):, int(block_size[0]):] # top right
+    blocks['2'] = const_map[int(block_size[1]):, :int(block_size[0])] # top left
+    blocks['3'] = const_map[:int(block_size[1]), :int(block_size[0])] # bottom left

-    return idx
+    idx = {}
+    for b in range(0, len(blocks)):
+        mdn = np.nanmedian(blocks[str(b)])
+        std = np.nanstd(blocks[str(b)])
+        idx[str(b)] = ( (blocks[str(b)] > mdn + n_sigma*std) | (blocks[str(b)] < mdn - n_sigma*std) )
+
+    idx_output = np.zeros(const_map.shape, dtype=bool)
+    idx_output[:int(block_size[1]), int(block_size[0]):] = idx['0']
+    idx_output[int(block_size[1]):, int(block_size[0]):] = idx['1']
+    idx_output[int(block_size[1]):, :int(block_size[0])] = idx['2']
+    idx_output[:int(block_size[1]), :int(block_size[0])] = idx['3']
+
+    return idx_output
 ```

 %% Cell type:code id: tags:

 ``` python
-# Add BadPixels to constant_maps
-constant_maps['BadPixelsDark'] = np.zeros(constant_maps['Offset'].shape, np.uint32)
+def bp_analysis_table(bad_pixels_map, title = ''):
+    '''
+    Prints a table with short analysis of the number and
+    percentage of bad pixels on count of offset or noise
+    out of threshold, or evaluation error.

-# Noise related bad pixels
-constant_maps['BadPixelsDark'][eval_bpidx(constant_maps['Noise'], badpixel_threshold_sigma)] = BadPixels.NOISE_OUT_OF_THRESHOLD.value
-constant_maps['BadPixelsDark'][~np.isfinite(constant_maps['Noise'])] = BadPixels.OFFSET_NOISE_EVAL_ERROR.value
+    Returns PrettyTable.

-# Offset related bad pixels
-constant_maps['BadPixelsDark'][eval_bpidx(constant_maps['Offset'], badpixel_threshold_sigma)] = BadPixels.OFFSET_OUT_OF_THRESHOLD.value
-constant_maps['BadPixelsDark'][~np.isfinite(constant_maps['Offset'])] = BadPixels.OFFSET_NOISE_EVAL_ERROR.value
+    Parameters
+    ----------
+    bad_pixels_map : ndarray
+                     Bad pixel map to analyse.
+    title : string, optional
+            Table title to be used.
+    '''

-num_bad_pixels = np.sum(constant_maps['BadPixelsDark']!=0)
+    num_bp = np.sum(bad_pixels_map!=0)
+    offset_bp = np.sum(bad_pixels_map==1)
+    noise_bp = np.sum(bad_pixels_map==2)
+    eval_error_bp = np.sum(bad_pixels_map==4)

-print('Number of Bad Pixels: ' + str(num_bad_pixels) + ' (' + str(np.round(100*num_bad_pixels/(sensor_size[0]*sensor_size[1]),2)) + '%)')
+    t = PrettyTable()
+    t.field_names = [title]
+    t.add_row(['Total number of Bad Pixels : {:<5} ({:<.2f}%)'.format(num_bp, 100*num_bp/np.prod(bad_pixels_map.shape))])
+    t.add_row(['  Offset out of threshold  : {:<5} ({:<.2f}%)'.format(offset_bp, 100*offset_bp/np.prod(bad_pixels_map.shape))])
+    t.add_row(['  Noise out of threshold   : {:<5} ({:<.2f}%)'.format(noise_bp, 100*noise_bp/np.prod(bad_pixels_map.shape))])
+    t.add_row(['  Evaluation error         : {:<5} ({:<.2f}%)'.format(eval_error_bp, 100*eval_error_bp/np.prod(bad_pixels_map.shape))])
+
+    return t
 ```

 %% Cell type:code id: tags:

 ``` python
-#************** BAD PIXELS HEAT MAP **************#
+# Add bad pixels from darks to constant_maps
+constant_maps['BadPixelsDark'] = np.zeros(constant_maps['Offset'].shape, np.uint32)
+
+# Find noise related bad pixels
+constant_maps['BadPixelsDark'][eval_bpidx(constant_maps['Noise'], badpixel_threshold_sigma, sensor_size//2)] = BadPixels.NOISE_OUT_OF_THRESHOLD.value
+constant_maps['BadPixelsDark'][~np.isfinite(constant_maps['Noise'])] = BadPixels.OFFSET_NOISE_EVAL_ERROR.value
+
+# Find offset related bad pixels
+constant_maps['BadPixelsDark'][eval_bpidx(constant_maps['Offset'], badpixel_threshold_sigma, sensor_size//2)] = BadPixels.OFFSET_OUT_OF_THRESHOLD.value
+constant_maps['BadPixelsDark'][~np.isfinite(constant_maps['Offset'])] = BadPixels.OFFSET_NOISE_EVAL_ERROR.value
+
+# Plot Bad Pixels Map
 fig = xana.heatmapPlot(
-    np.log2(constant_maps['BadPixelsDark'][:, :, 0]),
-    lut_label='Bad pixel bit assinged',
+    np.exp(np.nan_to_num(np.log(constant_maps['BadPixelsDark']), neginf=np.nan)).squeeze(), # convert zeros to NaN
+    lut_label='Bad pixel value [ADU]',                                                      # for plotting purposes
    x_label='Column',
    y_label='Row',
    x_range=(0, sensor_size[0]),
    y_range=(0, sensor_size[1])
 )
-fig.suptitle('Bad Pixels Map', x=.5, y=.9, fontsize=16)
+fig.suptitle('Initial Bad Pixels Map', x=.5, y=.9, fontsize=16)
 fig.set_size_inches(h=15, w=15)
+
+step_timer.done_step('Initial Bad Pixels Map created. Elapsed Time')
+print(bp_analysis_table(constant_maps['BadPixelsDark'], title='Initial Bad Pixel Analysis'))
+```
+
+%% Cell type:markdown id: tags:
+
+## Common Mode Correction
+
+Common mode correction is applied here to increase sensitivy to bad pixels. This is done in an iterative process. Each iteration is composed by the follwing steps:
+
+1. Common mode noise is calculated and subtracted from data.
+2. Noise map is recalculated.
+3. Bad pixels are recalulated based on the new noise map.
+4. Data is masked based on the new bad pixels map.
+
+%% Cell type:code id: tags:
+
+``` python
+if CM_N_iterations < 1:
+    print('Common mode correction not applied.')
+else:
+
+    commonModeBlockSize = sensor_size//2
+
+    # Instantiate common mode calculators for column and row CM correction
+    cmCorrection_col = xcal.CommonModeCorrection(
+        sensor_size.tolist(),
+        commonModeBlockSize.tolist(),
+        'col',
+        parallel=False)
+
+    cmCorrection_row = xcal.CommonModeCorrection(
+        sensor_size.tolist(),
+        commonModeBlockSize.tolist(),
+        'row',
+        parallel=False)
 ```

 %% Cell type:code id: tags:

 ``` python
+if CM_N_iterations > 0:
+
+    data = data.astype(float) # This conversion is needed for offset subtraction
+
+    # Subtract Offset
+    data -= constant_maps['Offset'].squeeze()
+
+    noise_map_corrected = np.copy(constant_maps['Noise'])
+
+    bp_offset = [np.sum(constant_maps['BadPixelsDark']==1)]
+    bp_noise = [np.sum(constant_maps['BadPixelsDark']==2)]
+
+    for it in range (0,CM_N_iterations):
+        step_timer.start()
+
+        # Mask bad pixels
+        BadPixels_mask = np.squeeze(constant_maps['BadPixelsDark'] != 0)
+        BadPixels_mask = np.repeat(BadPixels_mask[np.newaxis,...],data.shape[0],axis=0)
+        data[BadPixels_mask] = np.nan
+
+        # Common mode correction
+        data = np.swapaxes(data,0,-1)
+        data = cmCorrection_col.correct(data)
+        data = cmCorrection_row.correct(data)
+        data = np.swapaxes(data,0,-1)
+
+        # Update noise map
+        noise_map_corrected = np.nanstd(data, axis=0)[..., np.newaxis]
+
+        # Update bad pixels map
+        constant_maps['BadPixelsDark'][eval_bpidx(noise_map_corrected, badpixel_threshold_sigma, sensor_size//2)] = BadPixels.NOISE_OUT_OF_THRESHOLD.value
+        bp_offset.append(np.sum(constant_maps['BadPixelsDark']==1))
+        bp_noise.append(np.sum(constant_maps['BadPixelsDark']==2))
+
+        print(bp_analysis_table(constant_maps['BadPixelsDark'], title=f'{it+1} CM correction iterations'))
+        step_timer.done_step('Elapsed Time')
+        print('\n')
+
+    # Apply final bad pixels mask
+    BadPixels_mask = np.broadcast_to(np.squeeze(constant_maps['BadPixelsDark'] != 0), data.shape)
+    data[BadPixels_mask] = np.nan
+
+    it = np.arange(0, CM_N_iterations+1)
+
+    plt.figure()
+    plt.plot(it, np.sum((bp_offset,bp_noise),axis=0), c = 'black', ls = '--', marker = 'o', label = 'Total')
+    plt.plot(it, bp_noise, c = 'red', ls = '--', marker = 'v', label = 'Noise out of threshold')
+    plt.plot(it, bp_offset, c = 'blue', ls = '--', marker = 's',label = 'Offset out of threshold')
+    plt.xticks(it)
+    plt.xlabel('CM correction iteration')
+    plt.ylabel('# Bad Pixels')
+    plt.legend()
+    plt.grid(linestyle = ':')
+```
+
+%% Cell type:code id: tags:
+
+``` python
+if CM_N_iterations > 0:
+    display(Markdown(f'## Common-Mode Corrected Bad Pixels Map\n'))
+```
+
+%% Cell type:code id: tags:
+
+``` python
+if CM_N_iterations > 0:
+
+    # Plot final bad pixels map
+    fig = xana.heatmapPlot(
+        np.exp(np.nan_to_num(np.log(constant_maps['BadPixelsDark']),neginf=np.nan)).squeeze(), # convert zeros to NaN
+        lut_label='Bad pixel value [ADU]',                                                     # for plotting purposes
+        x_label='Column',
+        y_label='Row',
+        x_range=(0, sensor_size[0]),
+        y_range=(0, sensor_size[1])
+    )
+    fig.suptitle('Final Bad Pixels Map', x=.5, y=.9, fontsize=16)
+    fig.set_size_inches(h=15, w=15)
+
+    print(bp_analysis_table(constant_maps['BadPixelsDark'], title='Final Bad Pixels Analysis'))
+```
+
+%% Cell type:markdown id: tags:
+
+## Calibration Constants DB
+
+Send the dark constants to the database and/or save them locally.
+
+%% Cell type:code id: tags:
+
+``` python
 # Save constants to DB
 md = None

 for const_name in constant_maps.keys():
    const = getattr(Constants.ePix100, const_name)()
    const.data = constant_maps[const_name].data

    # Set the operating condition
    condition = Conditions.Dark.ePix100(
        bias_voltage=bias_voltage,
        integration_time=integration_time,
        temperature=temperature_k,
        in_vacuum=in_vacuum)

    for parm in condition.parameters:
        if parm.name == "Sensor Temperature":
            parm.lower_deviation = temp_limits
            parm.upper_deviation = temp_limits

    # Get physical detector unit
    db_module = get_pdu_from_db(
        karabo_id=karabo_id,
        karabo_da=karabo_da,
        constant=const,
        condition=condition,
        cal_db_interface=cal_db_interface,
        snapshot_at=creation_time)[0]

    # Inject or save calibration constants
    if db_output:
        md = send_to_db(
            db_module=db_module,
            karabo_id=karabo_id,
            constant=const,
            condition=condition,
            file_loc=file_loc,
            report_path=report,
            cal_db_interface=cal_db_interface,
            creation_time=creation_time,
            timeout=cal_db_timeout
        )
    if local_output:
        md = save_const_to_h5(
            db_module=db_module,
            karabo_id=karabo_id,
            constant=const,
            condition=condition,
            data=const.data,
            file_loc=file_loc,
            report=report,
            creation_time=creation_time,
            out_folder=out_folder
        )
        print(f"Calibration constant {const_name} is stored locally at {out_folder} \n")

 print("Constants parameter conditions are:\n"
    f"• Bias voltage: {bias_voltage}\n"
    f"• Integration time: {integration_time}\n"
    f"• Temperature: {temperature_k}\n"
    f"• In Vacuum: {in_vacuum}\n"
    f"• Creation time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

--- a/notebooks/ePix100/Correction_ePix100_NBC.ipynb
+++ b/notebooks/ePix100/Correction_ePix100_NBC.ipynb
 %% Cell type:markdown id: tags:

 # ePix100 Data Correction

 Author: European XFEL Detector Group, Version: 2.0

 The following notebook provides data correction of images acquired with the ePix100 detector.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/CALLAB/202031/p900113/raw"  # input folder, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/remove/epix_correct"  # output folder, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1]  # sequences to correct, set to -1 for all, range allowed
 sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
 run = 9988  # which run to read data from, required

 # Parameters for accessing the raw data.
 karabo_id = "MID_EXP_EPIX-1"  # karabo karabo_id
 karabo_da = "EPIX01"  # data aggregators
 db_module = ""  # module id in the database
 receiver_template = "RECEIVER"  # detector receiver template for accessing raw data files
 path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5'  # the template to use to access data
 instrument_source_template = '{}/DET/{}:daqOutput'  # instrument detector data source in h5files

 # Parameters affecting writing corrected data.
 chunk_size_idim = 1  # H5 chunking size of output data

 # Only for testing
 limit_images = 0  # ONLY FOR TESTING. process only first N images, 0 - process all.

 # Parameters for the calibration database.
 cal_db_interface = "tcp://max-exfl016:8015#8025"  # calibration DB interface to use
 cal_db_timeout = 300000  # timeout on caldb requests
 creation_time = ""  # The timestamp to use with Calibration DBe. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41

 # Conditions for retrieving calibration constants.
 bias_voltage = 200  # bias voltage
 in_vacuum = False  # detector operated in vacuum
 integration_time = -1  # Detector integration time, Default value -1 to use the value from the slow data.
-fix_temperature = 290  # fixed temperature value in Kelvin, Default value -1 to use the value from files.
+fix_temperature = -1  # fixed temperature value in Kelvin, Default value -1 to use the value from files.
 gain_photon_energy = 9.0  # Photon energy used for gain calibration
 photon_energy = 0.  # Photon energy to calibrate in number of photons, 0 for calibration in keV

 # Flags to select type of applied corrections.
 pattern_classification = True  # do clustering.
 relative_gain = True  # Apply relative gain correction.
 absolute_gain = True  # Apply absolute gain correction (implies relative gain).
 common_mode = True  # Apply common mode correction.

 # Parameters affecting applied correction.
 cm_min_frac = 0.25  # No CM correction is performed if after masking the ratio of good pixels falls below this
 cm_noise_sigma = 5.  # CM correction noise standard deviation
 split_evt_primary_threshold = 7.  # primary threshold for split event correction
 split_evt_secondary_threshold = 5.  # secondary threshold for split event correction
 split_evt_mip_threshold = 1000.  # minimum ionizing particle threshold


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import tabulate
 import warnings

 import h5py
 import pasha as psh
 import numpy as np
 import matplotlib.pyplot as plt
 from IPython.display import Latex, display
 from extra_data import RunDirectory, H5File
 from pathlib import Path

 from XFELDetAna import xfelpyanatools as xana
 from XFELDetAna import xfelpycaltools as xcal
 from cal_tools import h5_copy_except
 from cal_tools.epix100 import epix100lib
 from cal_tools.tools import (
    calcat_creation_time,
    get_dir_creation_date,
    get_constant_from_db,
    load_specified_constants,
    CalibrationMetadata,
 )
 from cal_tools.step_timing import StepTimer
 from iCalibrationDB import (
    Conditions,
    Constants,
 )

 warnings.filterwarnings('ignore')

 prettyPlotting = True

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 x = 708  # rows of the ePix100
 y = 768  # columns of the ePix100

 if absolute_gain:
    relative_gain = True

 plot_unit = 'ADU'
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)

 out_folder.mkdir(parents=True, exist_ok=True)

 run_folder = in_folder / f"r{run:04d}"

 instrument_src = instrument_source_template.format(
    karabo_id, receiver_template)

 print(f"Correcting run: {run_folder}")
 print(f"Instrument H5File source: {instrument_src}")
 print(f"Data corrected files are stored at: {out_folder}")
 ```

 %% Cell type:code id: tags:

 ``` python
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f"Using {creation_time.isoformat()} as creation time")

 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # Constant paths are saved under retrieved-constants in calibration_metadata.yml.
 # NOTE: this notebook shouldn't overwrite calibration metadata file.
 const_yaml = metadata.get("retrieved-constants", {})
 ```

 %% Cell type:code id: tags:

 ``` python
 run_dc = RunDirectory(run_folder, _use_voview=False)

 seq_files = [Path(f.filename) for f in run_dc.select(f"*{karabo_id}*").files]

 # If a set of sequences requested to correct,
 # adapt seq_files list.
 if sequences != [-1]:
    seq_files = [f for f in seq_files if any(f.match(f"*-S{s:05d}.h5") for s in sequences)]

 if not len(seq_files):
    raise IndexError("No sequence files available for the selected sequences.")

 print(f"Processing a total of {len(seq_files)} sequence files")
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 sensorSize = [x, y]
 # Sensor area will be analysed according to blocksize
 blockSize = [sensorSize[0]//2, sensorSize[1]//2]
 xcal.defaultBlockSize = blockSize
 memoryCells = 1  # ePIX has no memory cells
 run_parallel = False

 # Read control data.
 ctrl_data = epix100lib.epix100Ctrl(
    run_dc=run_dc,
    instrument_src=f"{karabo_id}/DET/{receiver_template}:daqOutput",
    ctrl_src=f"{karabo_id}/DET/CONTROL",
    )

 if integration_time < 0:
    integration_time = ctrl_data.get_integration_time()
    integration_time_str_add = ""
 else:
    integration_time_str_add = "(manual input)"

 if fix_temperature < 0:
    temperature = ctrl_data.get_temprature()
    temperature_k = temperature + 273.15
    temp_str_add = ""
 else:
    temperature_k = fix_temperature
    temperature = fix_temperature - 273.15
    temp_str_add = "(manual input)"

 print(f"Bias voltage is {bias_voltage} V")
 print(f"Detector integration time is set to {integration_time} \u03BCs {integration_time_str_add}")
 print(f"Mean temperature: {temperature:0.2f}°C / {temperature_k:0.2f} K {temp_str_add}")
 print(f"Operated in vacuum: {in_vacuum}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Table of sequence files to process
 table = [(k, f) for k, f in enumerate(seq_files)]

 if len(table):
    md = display(Latex(tabulate.tabulate(
        table,
        tablefmt='latex',
        headers=["#", "file"]
    )))
 ```

 %% Cell type:markdown id: tags:

 ## Retrieving calibration constants

 As a first step, dark maps have to be loaded.

 %% Cell type:code id: tags:

 ``` python
 cond_dict = {
    "bias_voltage": bias_voltage,
    "integration_time": integration_time,
    "temperature": temperature_k,
    "in_vacuum": in_vacuum,
 }

 dark_condition = Conditions.Dark.ePix100(**cond_dict)

 # update conditions with illuminated conditins.
 cond_dict.update({
        "photon_energy": gain_photon_energy
    })

 illum_condition = Conditions.Illuminated.ePix100(**cond_dict)

 const_cond = {
    "Offset": dark_condition,
    "Noise": dark_condition,
    "RelativeGain": illum_condition,
 }
 ```

 %% Cell type:code id: tags:

 ``` python
+empty_constant = np.zeros((708, 768, 1), dtype=np.float32)
 if const_yaml:  #  Used while reproducing corrected data.
    print(f"Using stored constants in {metadata.filename}")
    const_data, _ = load_specified_constants(const_yaml[karabo_da]["constants"])
+    for cname, cval in const_data.items():
+        if cval is None and cname != "RelativeGain":
+            const_data[cname] = empty_constant
 else:  # First correction attempt.
    const_data = dict()
    for cname, condition in const_cond.items():
        # Avoid retrieving RelativeGain, if not needed for correction.
        if cname == "RelativeGain" and not relative_gain:
            const_data[cname] = None
        else:
            const_data[cname] = get_constant_from_db(
                karabo_id=karabo_id,
                karabo_da=karabo_da,
                constant=getattr(Constants.ePix100, cname)(),
                condition=condition,
-                empty_constant=None,
+                empty_constant=None if cname == "RelativeGain" else empty_constant,
                cal_db_interface=cal_db_interface,
                creation_time=creation_time,
                print_once=2,
                timeout=cal_db_timeout
    )
 ```

 %% Cell type:code id: tags:

 ``` python
 if relative_gain and const_data.get("RelativeGain", None) is None:
    print(
        "WARNING: RelativeGain map is requested, but not found.\n"
        "No gain correction will be applied"
    )
    relative_gain = False
    absolute_gain = False

 # Initializing some parameters.
 hscale = 1
 stats = True
 hrange = np.array([-50, 1000])
 nbins = hrange[1] - hrange[0]
 commonModeBlockSize = [x//2, y//2]
 ```

 %% Cell type:code id: tags:

 ``` python
 histCalOffsetCor = xcal.HistogramCalculator(
    sensorSize,
    bins=nbins,
    range=hrange,
    parallel=run_parallel,
    nCells=memoryCells,
    blockSize=blockSize
 )


 # *****************Histogram Calculators****************** #
 histCalCor = xcal.HistogramCalculator(
    sensorSize,
    bins=1050,
    range=[-50, 1000],
    parallel=run_parallel,
    nCells=memoryCells,
    blockSize=blockSize
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 if common_mode:
    histCalCMCor = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize,
    )
    cmCorrectionB = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='block',
        nCells=memoryCells,
        noiseMap=const_data['Noise'],
        runParallel=run_parallel,
        parallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
    cmCorrectionR = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='row',
        nCells=memoryCells,
        noiseMap=const_data['Noise'],
        runParallel=run_parallel,
        parallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
    cmCorrectionC = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='col',
        nCells=memoryCells,
        noiseMap=const_data['Noise'],
        runParallel=run_parallel,
        parallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
 ```

 %% Cell type:code id: tags:

 ``` python
 if relative_gain:
    gain_cnst = np.median(const_data["RelativeGain"])
    hscale = gain_cnst
    plot_unit = 'keV'
    if photon_energy > 0:
        plot_unit = '$\gamma$'
        hscale /= photon_energy

    gainCorrection = xcal.RelativeGainCorrection(
        sensorSize,
        gain_cnst/const_data["RelativeGain"][..., None],
        nCells=memoryCells,
        parallel=run_parallel,
        blockSize=blockSize,
        gains=None,
    )

    histCalRelGainCor = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize
    )

    if absolute_gain:
        histCalAbsGainCor = xcal.HistogramCalculator(
            sensorSize,
            bins=nbins,
            range=hrange*hscale,
            parallel=run_parallel,
            nCells=memoryCells,
            blockSize=blockSize
        )
 ```

 %% Cell type:code id: tags:

 ``` python
 if pattern_classification :
    patternClassifier = xcal.PatternClassifier(
        [x, y],
        const_data["Noise"],
        split_evt_primary_threshold,
        split_evt_secondary_threshold,
        split_evt_mip_threshold,
        tagFirstSingles=0,
        nCells=memoryCells,
        allowElongated=False,
        blockSize=[x, y],
        parallel=run_parallel,
    )
    histCalSECor = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize,
    )
    histCalGainCorSingles = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange*hscale,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize
    )
 ```

 %% Cell type:markdown id: tags:

 ## Applying corrections

 %% Cell type:code id: tags:

 ``` python
 def correct_train(wid, index, tid, d):

    d = d[pixel_data[0]][pixel_data[1]][..., np.newaxis].astype(np.float32)
    d = np.compress(
        np.any(d > 0, axis=(0, 1)), d, axis=2)

    # Offset correction.
    d -= const_data["Offset"]

    histCalOffsetCor.fill(d)
    # Common Mode correction.
    if common_mode:
        # Block CM
        d = cmCorrectionB.correct(d)
        # Row CM
        d = cmCorrectionR.correct(d)
        # COL CM
        d = cmCorrectionC.correct(d)
        histCalCMCor.fill(d)

    # relative gain correction.
    if relative_gain:
        d = gainCorrection.correct(d)
        histCalRelGainCor.fill(d)

    data[index, ...] = np.squeeze(d)

    """The gain correction is currently applying
    an absolute correction (not a relative correction
    as the implied by the name);
    it changes the scale (the unit of measurement)
    of the data from ADU to either keV or n_of_photons.
    But the pattern classification relies on comparing
    data with the noise map, which is still in ADU.

    The best solution is to do a relative gain
    correction first and apply the global absolute
    gain to the data at the end, after clustering.
    """

    if pattern_classification:

        d_clu, patterns = patternClassifier.classify(d)

        d_clu[d_clu < (split_evt_primary_threshold*const_data["Noise"])] = 0

        data_patterns[index, ...] = np.squeeze(patterns)

        data_clu[index, ...] = np.squeeze(d_clu)

        d_clu[patterns != 100] = np.nan

        histCalSECor.fill(d_clu)

    # absolute gain correction
    # changes data from ADU to keV (or n. of photons)
    if absolute_gain:

        d = d * gain_cnst
        if photon_energy > 0:
            d /= photon_energy
        histCalAbsGainCor.fill(d)

        if pattern_classification:
            # Modify pattern classification.
            d_clu = d_clu * gain_cnst
            if photon_energy > 0:
                d_clu /= photon_energy

            histCalGainCorSingles.fill(d_clu)

            data_clu[index, ...] = np.squeeze(d_clu)

    histCalCor.fill(d)
 ```

 %% Cell type:code id: tags:

 ``` python
 pixel_data = (instrument_src, "data.image.pixels")

 # 10 is a number chosen after testing 1 ... 71 parallel threads
 context = psh.context.ThreadContext(num_workers=10)
 ```

 %% Cell type:code id: tags:

 ``` python
 for f in seq_files:

    seq_dc = H5File(f)

    n_imgs = seq_dc.get_data_counts(*pixel_data).shape[0]

    # Data shape in seq_dc excluding trains with empty images.
    dshape = seq_dc[pixel_data].shape
    dataset_chunk = ((chunk_size_idim,) + dshape[1:])  # e.g. (1, pixels_x, pixels_y)

    if n_imgs - dshape[0] != 0:
        print(f"- WARNING: {f} has {n_imgs - dshape[0]} trains with empty data.")

    # This parameter is only used for testing.
    if limit_images > 0:
        n_imgs = min(n_imgs, limit_images)

    data = context.alloc(shape=dshape, dtype=np.float32)

    if pattern_classification:
        data_clu = context.alloc(shape=dshape, dtype=np.float32)
        data_patterns = context.alloc(shape=dshape, dtype=np.int32)

    step_timer.start()

    context.map(
        correct_train, seq_dc.select(
            *pixel_data, require_all=True).select_trains(np.s_[:n_imgs])
    )
    step_timer.done_step(f'Correcting {n_imgs} trains.')

    # Store detector h5 information in the corrected file
    # and deselect data to correct and store later.
    step_timer.start()

    out_file = out_folder / f.name.replace("RAW", "CORR")
    data_path = "INSTRUMENT/"+instrument_src+"/data/image"
    pixels_path = f"{data_path}/pixels"

    # First copy all raw data source to the corrected file,
    # while excluding the raw data image /data/image/pixels.
    with h5py.File(out_file, 'w') as ofile:
        # Copy RAW non-calibrated sources.
        with h5py.File(f, 'r') as sfile:
            h5_copy_except.h5_copy_except_paths(
                sfile, ofile,
                [pixels_path])

        # Create dataset in CORR h5 file and add corrected images.
        dataset = ofile.create_dataset(
            pixels_path,
            data=data,
            chunks=dataset_chunk,
            dtype=np.float32)

        if pattern_classification:
            # Save /data/image//pixels_classified in corrected file.
            datasetc = ofile.create_dataset(
                f"{data_path}/pixels_classified",
                data=data_clu,
                chunks=dataset_chunk,
                dtype=np.float32)

            # Save /data/image//patterns in corrected file.
            datasetp = ofile.create_dataset(
                f"{data_path}/patterns",
                data=data_patterns,
                chunks=dataset_chunk,
                dtype=np.int32)

        step_timer.done_step('Storing data.')
 ```

 %% Cell type:code id: tags:

 ``` python
 ho, eo, co, so = histCalCor.get()

 d = [{
    'x': co,
    'y': ho,
    'y_err': np.sqrt(ho[:]),
    'drawstyle': 'steps-mid',
    'errorstyle': 'bars',
    'errorcoarsing': 2,
    'label': 'Total corr.'
 }]

 ho, eo, co, so = histCalOffsetCor.get()

 d.append({
    'x': co,
    'y': ho,
    'y_err': np.sqrt(ho[:]),
    'drawstyle': 'steps-mid',
    'errorstyle': 'bars',
    'errorcoarsing': 2,
    'label': 'Offset corr.'
 })

 if common_mode:
    ho, eo, co, so = histCalCMCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'CM corr.'
    })

 if relative_gain :
    ho, eo, co, so = histCalRelGainCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'Relative gain corr.'
    })


 if pattern_classification:
    ho, eo, co, so = histCalSECor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'Isolated photons (singles)'
    })

 fig = xana.simplePlot(
    d, aspect=1, x_label=f'Energy (ADU)',
    y_label='Number of occurrences', figsize='2col',
    y_log=True, x_range=(-50, 500),
    legend='top-center-frame-2col',
 )
 plt.title(f'run {run} - {karabo_da}')
 plt.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 if absolute_gain :
    d=[]
    ho, eo, co, so = histCalAbsGainCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'Absolute gain corr.'
    })

    if pattern_classification:
        ho, eo, co, so = histCalGainCorSingles.get()
        d.append({
            'x': co,
            'y': ho,
            'y_err': np.sqrt(ho[:]),
            'drawstyle': 'steps-mid',
            'errorstyle': 'bars',
            'errorcoarsing': 2,
            'label': 'Isolated photons (singles)'
        })

    fig = xana.simplePlot(
        d, aspect=1, x_label=f'Energy ({plot_unit})',
        y_label='Number of occurrences', figsize='2col',
        y_log=True,
        x_range=np.array((-50, 500))*hscale,
        legend='top-center-frame-2col',
    )
    plt.grid()
    plt.title(f'run {run} - {karabo_da}')
 ```

 %% Cell type:markdown id: tags:

 ## Mean Image of the corrected data

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 fig = xana.heatmapPlot(
    np.nanmedian(data, axis=0),
    x_label='Columns', y_label='Rows',
    lut_label=f'Signal ({plot_unit})',
    x_range=(0, y),
    y_range=(0, x),
    vmin=-50, vmax=50)
 step_timer.done_step(f'Plotting mean image of {data.shape[0]} trains.')
 ```

 %% Cell type:markdown id: tags:

 ## Single Shot of the corrected data

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 fig = xana.heatmapPlot(
    data[0, ...],
    x_label='Columns', y_label='Rows',
    lut_label=f'Signal ({plot_unit})',
    x_range=(0, y),
    y_range=(0, x),
    vmin=-50, vmax=50)
 step_timer.done_step(f'Plotting single shot of corrected data.')
 ```

 %% Cell type:markdown id: tags:

 # ePix100 Data Correction

 Author: European XFEL Detector Group, Version: 2.0

 The following notebook provides data correction of images acquired with the ePix100 detector.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/CALLAB/202031/p900113/raw"  # input folder, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/remove/epix_correct"  # output folder, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1]  # sequences to correct, set to -1 for all, range allowed
 sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
 run = 9988  # which run to read data from, required

 # Parameters for accessing the raw data.
 karabo_id = "MID_EXP_EPIX-1"  # karabo karabo_id
 karabo_da = "EPIX01"  # data aggregators
 db_module = ""  # module id in the database
 receiver_template = "RECEIVER"  # detector receiver template for accessing raw data files
 path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5'  # the template to use to access data
 instrument_source_template = '{}/DET/{}:daqOutput'  # instrument detector data source in h5files

 # Parameters affecting writing corrected data.
 chunk_size_idim = 1  # H5 chunking size of output data

 # Only for testing
 limit_images = 0  # ONLY FOR TESTING. process only first N images, 0 - process all.

 # Parameters for the calibration database.
 cal_db_interface = "tcp://max-exfl016:8015#8025"  # calibration DB interface to use
 cal_db_timeout = 300000  # timeout on caldb requests
 creation_time = ""  # The timestamp to use with Calibration DBe. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41

 # Conditions for retrieving calibration constants.
 bias_voltage = 200  # bias voltage
 in_vacuum = False  # detector operated in vacuum
 integration_time = -1  # Detector integration time, Default value -1 to use the value from the slow data.
-fix_temperature = 290  # fixed temperature value in Kelvin, Default value -1 to use the value from files.
+fix_temperature = -1  # fixed temperature value in Kelvin, Default value -1 to use the value from files.
 gain_photon_energy = 9.0  # Photon energy used for gain calibration
 photon_energy = 0.  # Photon energy to calibrate in number of photons, 0 for calibration in keV

 # Flags to select type of applied corrections.
 pattern_classification = True  # do clustering.
 relative_gain = True  # Apply relative gain correction.
 absolute_gain = True  # Apply absolute gain correction (implies relative gain).
 common_mode = True  # Apply common mode correction.

 # Parameters affecting applied correction.
 cm_min_frac = 0.25  # No CM correction is performed if after masking the ratio of good pixels falls below this
 cm_noise_sigma = 5.  # CM correction noise standard deviation
 split_evt_primary_threshold = 7.  # primary threshold for split event correction
 split_evt_secondary_threshold = 5.  # secondary threshold for split event correction
 split_evt_mip_threshold = 1000.  # minimum ionizing particle threshold


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import tabulate
 import warnings

 import h5py
 import pasha as psh
 import numpy as np
 import matplotlib.pyplot as plt
 from IPython.display import Latex, display
 from extra_data import RunDirectory, H5File
 from pathlib import Path

 from XFELDetAna import xfelpyanatools as xana
 from XFELDetAna import xfelpycaltools as xcal
 from cal_tools import h5_copy_except
 from cal_tools.epix100 import epix100lib
 from cal_tools.tools import (
    calcat_creation_time,
    get_dir_creation_date,
    get_constant_from_db,
    load_specified_constants,
    CalibrationMetadata,
 )
 from cal_tools.step_timing import StepTimer
 from iCalibrationDB import (
    Conditions,
    Constants,
 )

 warnings.filterwarnings('ignore')

 prettyPlotting = True

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 x = 708  # rows of the ePix100
 y = 768  # columns of the ePix100

 if absolute_gain:
    relative_gain = True

 plot_unit = 'ADU'
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)

 out_folder.mkdir(parents=True, exist_ok=True)

 run_folder = in_folder / f"r{run:04d}"

 instrument_src = instrument_source_template.format(
    karabo_id, receiver_template)

 print(f"Correcting run: {run_folder}")
 print(f"Instrument H5File source: {instrument_src}")
 print(f"Data corrected files are stored at: {out_folder}")
 ```

 %% Cell type:code id: tags:

 ``` python
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f"Using {creation_time.isoformat()} as creation time")

 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # Constant paths are saved under retrieved-constants in calibration_metadata.yml.
 # NOTE: this notebook shouldn't overwrite calibration metadata file.
 const_yaml = metadata.get("retrieved-constants", {})
 ```

 %% Cell type:code id: tags:

 ``` python
 run_dc = RunDirectory(run_folder, _use_voview=False)

 seq_files = [Path(f.filename) for f in run_dc.select(f"*{karabo_id}*").files]

 # If a set of sequences requested to correct,
 # adapt seq_files list.
 if sequences != [-1]:
    seq_files = [f for f in seq_files if any(f.match(f"*-S{s:05d}.h5") for s in sequences)]

 if not len(seq_files):
    raise IndexError("No sequence files available for the selected sequences.")

 print(f"Processing a total of {len(seq_files)} sequence files")
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 sensorSize = [x, y]
 # Sensor area will be analysed according to blocksize
 blockSize = [sensorSize[0]//2, sensorSize[1]//2]
 xcal.defaultBlockSize = blockSize
 memoryCells = 1  # ePIX has no memory cells
 run_parallel = False

 # Read control data.
 ctrl_data = epix100lib.epix100Ctrl(
    run_dc=run_dc,
    instrument_src=f"{karabo_id}/DET/{receiver_template}:daqOutput",
    ctrl_src=f"{karabo_id}/DET/CONTROL",
    )

 if integration_time < 0:
    integration_time = ctrl_data.get_integration_time()
    integration_time_str_add = ""
 else:
    integration_time_str_add = "(manual input)"

 if fix_temperature < 0:
    temperature = ctrl_data.get_temprature()
    temperature_k = temperature + 273.15
    temp_str_add = ""
 else:
    temperature_k = fix_temperature
    temperature = fix_temperature - 273.15
    temp_str_add = "(manual input)"

 print(f"Bias voltage is {bias_voltage} V")
 print(f"Detector integration time is set to {integration_time} \u03BCs {integration_time_str_add}")
 print(f"Mean temperature: {temperature:0.2f}°C / {temperature_k:0.2f} K {temp_str_add}")
 print(f"Operated in vacuum: {in_vacuum}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Table of sequence files to process
 table = [(k, f) for k, f in enumerate(seq_files)]

 if len(table):
    md = display(Latex(tabulate.tabulate(
        table,
        tablefmt='latex',
        headers=["#", "file"]
    )))
 ```

 %% Cell type:markdown id: tags:

 ## Retrieving calibration constants

 As a first step, dark maps have to be loaded.

 %% Cell type:code id: tags:

 ``` python
 cond_dict = {
    "bias_voltage": bias_voltage,
    "integration_time": integration_time,
    "temperature": temperature_k,
    "in_vacuum": in_vacuum,
 }

 dark_condition = Conditions.Dark.ePix100(**cond_dict)

 # update conditions with illuminated conditins.
 cond_dict.update({
        "photon_energy": gain_photon_energy
    })

 illum_condition = Conditions.Illuminated.ePix100(**cond_dict)

 const_cond = {
    "Offset": dark_condition,
    "Noise": dark_condition,
    "RelativeGain": illum_condition,
 }
 ```

 %% Cell type:code id: tags:

 ``` python
+empty_constant = np.zeros((708, 768, 1), dtype=np.float32)
 if const_yaml:  #  Used while reproducing corrected data.
    print(f"Using stored constants in {metadata.filename}")
    const_data, _ = load_specified_constants(const_yaml[karabo_da]["constants"])
+    for cname, cval in const_data.items():
+        if cval is None and cname != "RelativeGain":
+            const_data[cname] = empty_constant
 else:  # First correction attempt.
    const_data = dict()
    for cname, condition in const_cond.items():
        # Avoid retrieving RelativeGain, if not needed for correction.
        if cname == "RelativeGain" and not relative_gain:
            const_data[cname] = None
        else:
            const_data[cname] = get_constant_from_db(
                karabo_id=karabo_id,
                karabo_da=karabo_da,
                constant=getattr(Constants.ePix100, cname)(),
                condition=condition,
-                empty_constant=None,
+                empty_constant=None if cname == "RelativeGain" else empty_constant,
                cal_db_interface=cal_db_interface,
                creation_time=creation_time,
                print_once=2,
                timeout=cal_db_timeout
    )
 ```

 %% Cell type:code id: tags:

 ``` python
 if relative_gain and const_data.get("RelativeGain", None) is None:
    print(
        "WARNING: RelativeGain map is requested, but not found.\n"
        "No gain correction will be applied"
    )
    relative_gain = False
    absolute_gain = False

 # Initializing some parameters.
 hscale = 1
 stats = True
 hrange = np.array([-50, 1000])
 nbins = hrange[1] - hrange[0]
 commonModeBlockSize = [x//2, y//2]
 ```

 %% Cell type:code id: tags:

 ``` python
 histCalOffsetCor = xcal.HistogramCalculator(
    sensorSize,
    bins=nbins,
    range=hrange,
    parallel=run_parallel,
    nCells=memoryCells,
    blockSize=blockSize
 )


 # *****************Histogram Calculators****************** #
 histCalCor = xcal.HistogramCalculator(
    sensorSize,
    bins=1050,
    range=[-50, 1000],
    parallel=run_parallel,
    nCells=memoryCells,
    blockSize=blockSize
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 if common_mode:
    histCalCMCor = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize,
    )
    cmCorrectionB = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='block',
        nCells=memoryCells,
        noiseMap=const_data['Noise'],
        runParallel=run_parallel,
        parallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
    cmCorrectionR = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='row',
        nCells=memoryCells,
        noiseMap=const_data['Noise'],
        runParallel=run_parallel,
        parallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
    cmCorrectionC = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='col',
        nCells=memoryCells,
        noiseMap=const_data['Noise'],
        runParallel=run_parallel,
        parallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
 ```

 %% Cell type:code id: tags:

 ``` python
 if relative_gain:
    gain_cnst = np.median(const_data["RelativeGain"])
    hscale = gain_cnst
    plot_unit = 'keV'
    if photon_energy > 0:
        plot_unit = '$\gamma$'
        hscale /= photon_energy

    gainCorrection = xcal.RelativeGainCorrection(
        sensorSize,
        gain_cnst/const_data["RelativeGain"][..., None],
        nCells=memoryCells,
        parallel=run_parallel,
        blockSize=blockSize,
        gains=None,
    )

    histCalRelGainCor = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize
    )

    if absolute_gain:
        histCalAbsGainCor = xcal.HistogramCalculator(
            sensorSize,
            bins=nbins,
            range=hrange*hscale,
            parallel=run_parallel,
            nCells=memoryCells,
            blockSize=blockSize
        )
 ```

 %% Cell type:code id: tags:

 ``` python
 if pattern_classification :
    patternClassifier = xcal.PatternClassifier(
        [x, y],
        const_data["Noise"],
        split_evt_primary_threshold,
        split_evt_secondary_threshold,
        split_evt_mip_threshold,
        tagFirstSingles=0,
        nCells=memoryCells,
        allowElongated=False,
        blockSize=[x, y],
        parallel=run_parallel,
    )
    histCalSECor = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize,
    )
    histCalGainCorSingles = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange*hscale,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize
    )
 ```

 %% Cell type:markdown id: tags:

 ## Applying corrections

 %% Cell type:code id: tags:

 ``` python
 def correct_train(wid, index, tid, d):

    d = d[pixel_data[0]][pixel_data[1]][..., np.newaxis].astype(np.float32)
    d = np.compress(
        np.any(d > 0, axis=(0, 1)), d, axis=2)

    # Offset correction.
    d -= const_data["Offset"]

    histCalOffsetCor.fill(d)
    # Common Mode correction.
    if common_mode:
        # Block CM
        d = cmCorrectionB.correct(d)
        # Row CM
        d = cmCorrectionR.correct(d)
        # COL CM
        d = cmCorrectionC.correct(d)
        histCalCMCor.fill(d)

    # relative gain correction.
    if relative_gain:
        d = gainCorrection.correct(d)
        histCalRelGainCor.fill(d)

    data[index, ...] = np.squeeze(d)

    """The gain correction is currently applying
    an absolute correction (not a relative correction
    as the implied by the name);
    it changes the scale (the unit of measurement)
    of the data from ADU to either keV or n_of_photons.
    But the pattern classification relies on comparing
    data with the noise map, which is still in ADU.

    The best solution is to do a relative gain
    correction first and apply the global absolute
    gain to the data at the end, after clustering.
    """

    if pattern_classification:

        d_clu, patterns = patternClassifier.classify(d)

        d_clu[d_clu < (split_evt_primary_threshold*const_data["Noise"])] = 0

        data_patterns[index, ...] = np.squeeze(patterns)

        data_clu[index, ...] = np.squeeze(d_clu)

        d_clu[patterns != 100] = np.nan

        histCalSECor.fill(d_clu)

    # absolute gain correction
    # changes data from ADU to keV (or n. of photons)
    if absolute_gain:

        d = d * gain_cnst
        if photon_energy > 0:
            d /= photon_energy
        histCalAbsGainCor.fill(d)

        if pattern_classification:
            # Modify pattern classification.
            d_clu = d_clu * gain_cnst
            if photon_energy > 0:
                d_clu /= photon_energy

            histCalGainCorSingles.fill(d_clu)

            data_clu[index, ...] = np.squeeze(d_clu)

    histCalCor.fill(d)
 ```

 %% Cell type:code id: tags:

 ``` python
 pixel_data = (instrument_src, "data.image.pixels")

 # 10 is a number chosen after testing 1 ... 71 parallel threads
 context = psh.context.ThreadContext(num_workers=10)
 ```

 %% Cell type:code id: tags:

 ``` python
 for f in seq_files:

    seq_dc = H5File(f)

    n_imgs = seq_dc.get_data_counts(*pixel_data).shape[0]

    # Data shape in seq_dc excluding trains with empty images.
    dshape = seq_dc[pixel_data].shape
    dataset_chunk = ((chunk_size_idim,) + dshape[1:])  # e.g. (1, pixels_x, pixels_y)

    if n_imgs - dshape[0] != 0:
        print(f"- WARNING: {f} has {n_imgs - dshape[0]} trains with empty data.")

    # This parameter is only used for testing.
    if limit_images > 0:
        n_imgs = min(n_imgs, limit_images)

    data = context.alloc(shape=dshape, dtype=np.float32)

    if pattern_classification:
        data_clu = context.alloc(shape=dshape, dtype=np.float32)
        data_patterns = context.alloc(shape=dshape, dtype=np.int32)

    step_timer.start()

    context.map(
        correct_train, seq_dc.select(
            *pixel_data, require_all=True).select_trains(np.s_[:n_imgs])
    )
    step_timer.done_step(f'Correcting {n_imgs} trains.')

    # Store detector h5 information in the corrected file
    # and deselect data to correct and store later.
    step_timer.start()

    out_file = out_folder / f.name.replace("RAW", "CORR")
    data_path = "INSTRUMENT/"+instrument_src+"/data/image"
    pixels_path = f"{data_path}/pixels"

    # First copy all raw data source to the corrected file,
    # while excluding the raw data image /data/image/pixels.
    with h5py.File(out_file, 'w') as ofile:
        # Copy RAW non-calibrated sources.
        with h5py.File(f, 'r') as sfile:
            h5_copy_except.h5_copy_except_paths(
                sfile, ofile,
                [pixels_path])

        # Create dataset in CORR h5 file and add corrected images.
        dataset = ofile.create_dataset(
            pixels_path,
            data=data,
            chunks=dataset_chunk,
            dtype=np.float32)

        if pattern_classification:
            # Save /data/image//pixels_classified in corrected file.
            datasetc = ofile.create_dataset(
                f"{data_path}/pixels_classified",
                data=data_clu,
                chunks=dataset_chunk,
                dtype=np.float32)

            # Save /data/image//patterns in corrected file.
            datasetp = ofile.create_dataset(
                f"{data_path}/patterns",
                data=data_patterns,
                chunks=dataset_chunk,
                dtype=np.int32)

        step_timer.done_step('Storing data.')
 ```

 %% Cell type:code id: tags:

 ``` python
 ho, eo, co, so = histCalCor.get()

 d = [{
    'x': co,
    'y': ho,
    'y_err': np.sqrt(ho[:]),
    'drawstyle': 'steps-mid',
    'errorstyle': 'bars',
    'errorcoarsing': 2,
    'label': 'Total corr.'
 }]

 ho, eo, co, so = histCalOffsetCor.get()

 d.append({
    'x': co,
    'y': ho,
    'y_err': np.sqrt(ho[:]),
    'drawstyle': 'steps-mid',
    'errorstyle': 'bars',
    'errorcoarsing': 2,
    'label': 'Offset corr.'
 })

 if common_mode:
    ho, eo, co, so = histCalCMCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'CM corr.'
    })

 if relative_gain :
    ho, eo, co, so = histCalRelGainCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'Relative gain corr.'
    })


 if pattern_classification:
    ho, eo, co, so = histCalSECor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'Isolated photons (singles)'
    })

 fig = xana.simplePlot(
    d, aspect=1, x_label=f'Energy (ADU)',
    y_label='Number of occurrences', figsize='2col',
    y_log=True, x_range=(-50, 500),
    legend='top-center-frame-2col',
 )
 plt.title(f'run {run} - {karabo_da}')
 plt.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 if absolute_gain :
    d=[]
    ho, eo, co, so = histCalAbsGainCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'Absolute gain corr.'
    })

    if pattern_classification:
        ho, eo, co, so = histCalGainCorSingles.get()
        d.append({
            'x': co,
            'y': ho,
            'y_err': np.sqrt(ho[:]),
            'drawstyle': 'steps-mid',
            'errorstyle': 'bars',
            'errorcoarsing': 2,
            'label': 'Isolated photons (singles)'
        })

    fig = xana.simplePlot(
        d, aspect=1, x_label=f'Energy ({plot_unit})',
        y_label='Number of occurrences', figsize='2col',
        y_log=True,
        x_range=np.array((-50, 500))*hscale,
        legend='top-center-frame-2col',
    )
    plt.grid()
    plt.title(f'run {run} - {karabo_da}')
 ```

 %% Cell type:markdown id: tags:

 ## Mean Image of the corrected data

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 fig = xana.heatmapPlot(
    np.nanmedian(data, axis=0),
    x_label='Columns', y_label='Rows',
    lut_label=f'Signal ({plot_unit})',
    x_range=(0, y),
    y_range=(0, x),
    vmin=-50, vmax=50)
 step_timer.done_step(f'Plotting mean image of {data.shape[0]} trains.')
 ```

 %% Cell type:markdown id: tags:

 ## Single Shot of the corrected data

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 fig = xana.heatmapPlot(
    data[0, ...],
    x_label='Columns', y_label='Rows',
    lut_label=f'Signal ({plot_unit})',
    x_range=(0, y),
    y_range=(0, x),
    vmin=-50, vmax=50)
 step_timer.done_step(f'Plotting single shot of corrected data.')
 ```

--- a/setup.py
+++ b/setup.py
@@ -24,6 +24,12 @@ ext_modules = [
                            '-ftree-vectorize', '-frename-registers'],
        extra_link_args=['-fopenmp'],
    ),
+    Extension(
+        "cal_tools.jfalgs",
+        ['src/cal_tools/jfalgs.pyx'],
+        extra_compile_args=['-O3', '-march=native', '-ftree-vectorize',
+                            '-frename-registers']
+    ),
    Extension(
        "cal_tools.gotthard2.gotthard2algs",
        ["src/cal_tools/gotthard2/gotthard2algs.pyx"],
@@ -58,14 +64,14 @@ install_requires = [
        "markupsafe==2.0.1",
        "astcheck==0.2.5",
        "astsearch==0.2.0",
-        "cfelpyutils==1.0.1",
+        "cfelpyutils==2.0.6",
        "calibration_client==10.0.0",
        "dill==0.3.0",
        "docutils==0.17.1",
        "dynaconf==3.1.4",
        "env_cache==0.1",
        "extra_data==1.12.0",
-        "extra_geom==1.6.0",
+        "extra_geom==1.8.0",
        "gitpython==3.1.0",
        "h5py==3.5.0",
        "iminuit==1.3.8",

--- a/src/cal_tools/agipdlib.py
+++ b/src/cal_tools/agipdlib.py
@@ -2,6 +2,7 @@ import os
 import posixpath
 import zlib
 from datetime import datetime
+from multiprocessing import Manager
 from multiprocessing.pool import ThreadPool
 from typing import Any, Dict, List, Optional, Tuple

@@ -399,7 +400,7 @@ class AgipdCorrections:
        self.hg_hard_threshold = 100
        self.noisy_adc_threshold = 0.25
        self.ff_gain = 1
-        self.actual_photon_energy = 9.2
+        self.photon_energy = 9.2

        # Output parameters
        self.compress_fields = ['gain', 'mask']
@@ -416,6 +417,14 @@ class AgipdCorrections:
        self.mask = {}
        self.xray_cor = {}

+        if corr_bools.get('round_photons'):
+            # Variables related to histogramming.
+            self.shared_hist_preround = None
+            self.shared_hist_postround = None
+            self.hist_bins_preround = np.linspace(-5.0, 60.0, 200)
+            self.hist_bins_postround = np.arange(-5.5, 60.1)
+            self.hist_lock = Manager().Lock()
+
        # check if given corr_bools are correct
        tot_corr_bools = ['only_offset', 'adjust_mg_baseline', 'rel_gain',
                          'xray_corr', 'blc_noise', 'blc_hmatch',
@@ -909,7 +918,9 @@ class AgipdCorrections:

        # Round keV-normalized intensity to photons.
        if self.corr_bools.get("round_photons"):
-            data /= self.actual_photon_energy
+            data_hist_preround, _ = np.histogram(data, bins=self.hist_bins_preround)
+
+            data /= self.photon_energy
            np.round(data, out=data)

            # This could also be done before and its mask inverted for
@@ -919,6 +930,13 @@ class AgipdCorrections:
            msk[bidx] |= BadPixels.VALUE_OUT_OF_RANGE
            del bidx

+            data_hist_postround, _ = np.histogram(data * self.photon_energy,
+                                                  bins=self.hist_bins_postround)
+
+            with self.hist_lock:
+                self.shared_hist_preround += data_hist_preround
+                self.shared_hist_postround += data_hist_postround
+
        if np.issubdtype(self.recast_image_fields.get('image'), np.integer):
            # If the image data is meant to be recast to an integer
            # type, make sure its values are within its bounds.
@@ -1497,6 +1515,10 @@ class AgipdCorrections:
            self.shared_dict[i]["n_valid_trains"] = sharedmem.empty(1, dtype="i4")  # noqa
            self.shared_dict[i]["valid_trains"] = sharedmem.empty(1024, dtype="u8")  # noqa

+        if self.corr_bools.get("round_photons"):
+            self.shared_hist_preround = sharedmem.empty(len(self.hist_bins_preround) - 1, dtype="i8")
+            self.shared_hist_postround = sharedmem.empty(len(self.hist_bins_postround) - 1, dtype="i8")
+

 def validate_selected_pulses(
    max_pulses: List[int], max_cells: int

--- a/src/cal_tools/enums.py
+++ b/src/cal_tools/enums.py
@@ -29,6 +29,7 @@ class BadPixels(IntFlag):
    WRONG_GAIN_VALUE         = 1 << 21 # bit 22
    NON_STANDARD_SIZE        = 1 << 22 # bit 23

+
 class SnowResolution(Enum):
    """ An Enum specifying how to resolve snowy pixels
    """
@@ -46,10 +47,16 @@ class AgipdGainMode(IntEnum):


 class JungfrauSettings(Enum):
-    """ Jungfrau run gain settings."""
-    DYNAMIC_GAIN = "dynamicgain"
-    DYNAMIC_GAIN_HG0 = "dynamichg0"
-    FIX_GAIN_1 = "fixgain1"
-    FIX_GAIN_2 = "fixgain2"
+    """Jungfrau run gain settings."""
+    # old setting, new setting, new mode
+    GAIN_0 = "gain0"
+    HIGH_GAIN_0 = "highgain0"
+
+
+class JungfrauGainMode(Enum):
+    """Jungfrau run gain mode."""
+    DYNAMIC = "dynamic"
+    FIX_GAIN_1 = "fixg1"
+    FIX_GAIN_2 = "fixg2"
    FORCE_SWITCH_HG1 = "forceswitchg1"
    FORCE_SWITCH_HG2 = "forceswitchg2"
--- a/src/cal_tools/epix100/epix100lib.py
+++ b/src/cal_tools/epix100/epix100lib.py
@@ -22,5 +22,13 @@ class epix100Ctrl():
            self.ctrl_src, 'expTime.value'].as_single_value(reduce_by='first')

    def get_temprature(self):
+        """Get temperature value from CONTROL.
+        Temprature is stored in Celsius/100 units.
+        Therefore, we are dividing by 100 and
+        there is an absolute tolerance of 100.
+        atol=100 is a 1 degree variation tolerance.
+        """
+        # data.backTemp shape evolved from (n_trains,) to (n_trains, 1)
        return self.run_dc[
-            self.instrument_src, 'data.backTemp'].ndarray().mean() / 100
+            self.instrument_src, 'data.backTemp'].as_single_value(
+                reduce_by='mean', atol=100).item() / 100
--- a/src/cal_tools/files.py
+++ b/src/cal_tools/files.py
@@ -318,7 +318,6 @@ class DataFile(h5py.File):
                        for channel in self[f'INSTRUMENT/{name}']})

        source_names = sorted(sources.keys())
-
        data_sources_shape = (len(sources),)
        md_group.create_dataset('dataSources/dataSourceId',
                                shape=data_sources_shape,
@@ -445,7 +444,7 @@ class ControlSource(h5py.Group):
        self.__run_group.create_dataset(
            f'{key}/value', data=value, shape=shape, dtype=dtype)
        self.__run_group.create_dataset(
-            f'{key}/timestamp', data=timestamp, shape=shape, dtype=np.uint64)
+            f'{key}/timestamp', data=timestamp, shape=(1,), dtype=np.uint64)

    def create_index(self, num_trains):
        """Create source-specific INDEX datasets.

--- a/src/cal_tools/jfalgs.pyx
+++ b/src/cal_tools/jfalgs.pyx
+
+from cython cimport boundscheck, wraparound, cdivision
+from cython.view cimport contiguous
+
+import numpy as np
+
+
+ctypedef fused jf_data_t:
+    unsigned short  # raw pixel data
+    float  # corrected pixel data
+    unsigned int  # mask data
+    unsigned char  # gain data
+
+
+DEF STRIXEL_Y = 86
+DEF STRIXEL_X = 1024 * 3 + 18
+
+
+def strixel_shape():
+    return STRIXEL_Y, STRIXEL_X
+
+
+def strixel_double_pixels():
+    """Build index arrays for double-size pixels.
+
+    In raw data, the entire columns 255, 256, 511, 512, 767 and 768
+    are double-size pixels. After strixelation, these end up in columns
+    765-776, 1539-1550 and 2313-2324 on rows 0-85 or 0-83, with a set
+    of four columns with 86 rows followed by a set of 84 and 86 again.
+
+    This function builds the index arrays after strixelation.
+    """
+
+    Ydouble = []
+    Xdouble = []
+
+    for double_col in [765, 1539, 2313]:
+        for col in range(double_col, double_col+12):
+            for row in range(84 if ((col-double_col) // 4) == 1 else 86):
+                Ydouble.append(row)
+                Xdouble.append(col)
+
+    return np.array(Ydouble), np.array(Xdouble)
+
+
+@boundscheck(False)
+@wraparound(False)
+@cdivision(True)
+def strixel_transform(jf_data_t[:, :, ::contiguous] data,
+                      jf_data_t[:, :, ::contiguous] out = None):
+    """Reorder raw data to physical strixel sensor layout. """
+
+    if data.shape[1] < 256 or data.shape[2] < 256:
+        raise ValueError('Pixel shape of data may not be below (256, 256)')
+
+    if out is None:
+        import numpy as np
+        out = np.zeros((data.shape[0], STRIXEL_Y, STRIXEL_X), dtype=np.float32)
+    elif data.shape[0] > out.shape[0]:
+        raise ValueError('Cell shape of data exceeds out')
+    elif out.shape[1] < STRIXEL_Y or out.shape[2] < STRIXEL_X:
+        raise ValueError(f'Pixel shape of out may not be below '
+                         f'({STRIXEL_Y}, {STRIXEL_X})')
+
+    cdef int cell, yin, xin, xout, yout, igap
+
+    for cell in range(data.shape[0]):
+        # Normal pixels.
+        for yin in range(256):
+            yout = yin // 3
+
+            for xin in range(1024) :
+                xout = 774 * (xin // 256) + 3 * (xin % 256) + yin % 3
+                out[cell, yout, xout] = data[cell, yin, xin]
+
+        # Gap pixels.
+        for yin in range(256):
+            yout = 2 * (yin // 6)
+
+            for igap in range(3) :
+                # Left side of the gap.
+                xin = igap * 256 + 255
+                xout = igap * 774 + 765 + yin % 6
+                out[cell, yout, xout] = data[cell, yin, xin]
+                out[cell, yout+1, xout] = data[cell, yin, xin]
+
+                # Right side of the gap.
+                xin = igap * 256 + 255 + 1
+                xout = igap * 774 + 765 + 11 - yin % 6
+                out[cell, yout, xout] = data[cell, yin, xin]
+                out[cell, yout+1, xout] = data[cell, yin, xin]
+
+    return out
--- a/src/cal_tools/jungfraulib.py
+++ b/src/cal_tools/jungfraulib.py
-from typing import Tuple
+from typing import Optional, Tuple

-from cal_tools.enums import JungfrauSettings
+import extra_data

+from cal_tools.enums import JungfrauGainMode, JungfrauSettings

-def _get_settings(run_dc, ctrl_src) -> str:
-    try:
-        return(JungfrauSettings(
-            run_dc.get_run_value(ctrl_src, "settings.value")))
-    except KeyError:
-        print(
-            "WARNING: \'settings.value\' key is not available at "
-            f"{run_dc.select(ctrl_src).files[0].filename},\n")
-        return
+
+def _old_settings_to_new(settings: str, index: int) -> str:
+    """At the end of 2022, there is a sls update.
+    - `settings.value` values was updated from 6 possible values to only 2.
+    - `gainMode.value` dataset was created with 5 possible values.
+
+    https://git.xfel.eu/karaboDevices/slsDetectors/-/commit/4433ae9c00edcca3309bec8b7515e0938f5f502c  # noqa
+    
+    This method is used to get the settings or mode that
+    corresponds to old settings.value
+    """
+    settings_lookup = {
+        # old setting:  new setting, new mode
+        "dynamicgain": ("gain0", "dynamic"),
+        "dynamichg0":  ("highgain0", "dynamic"),
+        "fixgain1": ("gain0", "fixg1"),
+        "fixgain2": ("gain0", "fixg2"),
+        "forceswitchg1": ("gain0", "forceswitchg1"),
+        "forceswitchg2": ("gain0", "forceswitchg2"),
+    }
+    return settings_lookup[settings][index]


 class JungfrauCtrl():
    def __init__(
        self,
-        run_dc: "extra_data.DataCollection",  # noqa
+        run_dc: extra_data.DataCollection,
        ctrl_src: str,
    ):
-
        """Read slow data from RUN source.
        :param run_dir: EXtra-data RunDirectory DataCollection object.
        :param ctrl_src: Control source name for accessing run slow data.
        """
        self.run_dc = run_dc
        self.ctrl_src = ctrl_src
-        self.run_settings = _get_settings(run_dc, ctrl_src)
+        # run settings and mode are read as raw settings
+        self.run_settings = self._get_settings()
+        self.run_mode = self._get_mode()
+
+    def _get_settings(self) -> str:
+        """Get JUNGFRAU run settings and mode."""
+        try:
+            return self.run_dc.get_run_value(self.ctrl_src, "settings")
+        except extra_data.PropertyNameError:
+            print("\'settings.value\' key "
+                  "is not available for this run. "
+                  "`run_settings` is set to `dynamicgain`")
+            return "dynamicgain"  # old data are set to `dynamicgain`.
+
+    def _get_mode(self) -> str:
+        """Get run mode from `gainMode` dataset."""
+        try:
+            return self.run_dc.get_run_value(self.ctrl_src, "gainMode")
+        except extra_data.PropertyNameError:
+            return self.run_settings

    def get_memory_cells(self) -> Tuple[int, int]:
        n_storage_cells = int(self.run_dc.get_run_value(
@@ -37,26 +68,52 @@ class JungfrauCtrl():
        return n_storage_cells, storage_cell_st

    def get_bias_voltage(self) -> int:
-        return(int(self.run_dc.get_run_value(
-            self.ctrl_src, "vHighVoltage.value")[0]))
+        """Get Bias voltage value from RUN source."""
+        if "highVoltage" in self.run_dc[self.ctrl_src]:
+            return int(
+                self.run_dc.get_run_value(self.ctrl_src, "highVoltage")[0])
+        else:  # Old dataset "vHighVoltage" before end of 2022.
+            return int(
+                self.run_dc.get_run_value(self.ctrl_src, "vHighVoltage")[0])

    def get_integration_time(self) -> float:
        return(float(self.run_dc.get_run_value(
            self.ctrl_src, "exposureTime.value")) * 1e6)

    def get_gain_setting(self) -> int:
-        if self.run_settings == JungfrauSettings.DYNAMIC_GAIN_HG0:
-            gain_setting = 1
-        else:  # JungfrauSettings.DYNAMIC_GAIN
-            gain_setting = 0
-            if self.run_settings != JungfrauSettings.DYNAMIC_GAIN:
-                print(
-                    "WARNING: Setting gain_setting to 0, "
-                    "assuming that this is an old run.\n")
-        return gain_setting
+        """Get run gain settings to identify if run is in
+        High CDS or Low CDS.
+        - `1` if run_settings = highgain0.
+        - `0` if run_settings = gain0 or None.
+        """
+        # Check if `run_settings` is of an old settings value
+        # to convert into new settings value.
+        if self.run_settings in [s.value for s in JungfrauSettings]:
+            settings = self.run_settings
+        else:
+            settings = _old_settings_to_new(self.run_settings, 0)

-    def get_gain_mode(self) -> int:
-        if self.run_settings in [JungfrauSettings.FIX_GAIN_1, JungfrauSettings.FIX_GAIN_2]:  # noqa
+        if settings == JungfrauSettings.HIGH_GAIN_0.value:
            return 1
+        else:  # JungfrauSettings.GAIN_0
+            return 0
+
+    def get_gain_mode(self) -> int:
+        """Get gain mode value. Fixed `1` or Adaptive `1`.
+        - `0` if run_mode = dynamic, forceswitchg1, forceswitchg2, or None.
+        - `1` if run_mode = fixg1 or fixg2.
+        """
+        # Check if run_mode is of an old settings to convert
+        # into new mode value.
+        if self.run_mode in [m.value for m in JungfrauGainMode]:
+            mode = self.run_mode
        else:
+            mode = _old_settings_to_new(self.run_mode, 1)
+        
+        if mode in [
+            JungfrauGainMode.FIX_GAIN_1.value,
+            JungfrauGainMode.FIX_GAIN_2.value,
+        ]:
+            return 1
+        else:  # DYNAMIC, FORCE_SWITCH_G1, or FORCE_SWITCH_G2
            return 0
--- a/src/cal_tools/plotting.py
+++ b/src/cal_tools/plotting.py
 from pathlib import Path
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional, Tuple

 import matplotlib.pyplot as plt
 import numpy as np
@@ -8,6 +8,8 @@ from extra_geom import (
    AGIPD_500K2GGeometry,
    DSSC_1MGeometry,
    LPD_1MGeometry,
+    JUNGFRAUGeometry,
+
 )
 from extra_geom import tests as eg_tests
 from matplotlib import colors
@@ -338,3 +340,121 @@ def show_processed_modules(dinstance: str, constants: Optional[Dict[str, Any]],
                               Patch(facecolor='green', label='Processed')],
                      loc='best', ncol=3, bbox_to_anchor=(0.1, 0.25, 0.7, 0.8))
    plt.show()
+
+
+def show_processed_modules_jungfrau(
+    jungfrau_geom: JUNGFRAUGeometry,
+    constants: Optional[Dict[str, Any]],
+    processed_modules: List[str],
+    expected_modules: Optional[List[str]]=None,
+    display_module_names: Optional[List[str]] = None,
+    ):
+    """ Show the status of the processed modules.
+    `Green`: Processed. `Gray`: Not Processed. `Red`: No data available.
+    :param geom: JUNGFRAUGeometry object to change the colors of
+    it based on the processed modules.
+    :param constants: A dict of the plotted constants data.
+        {"const_name":constant_data}. Can be None in case of position mode.
+    :param processed_modules: A list of processed module names.
+    :param expected_modules: A list of full module names expected
+        for this detector.
+    :param display_module_names: A list of the module names to display.
+    :return
+    """
+    # Estimate expected modules from number of modules in extra_geom object.
+    # This will work fine as long as the related modules start from JNGFR01
+    # incrementaly for the number of modules available.
+    if expected_modules is None:
+        expected_modules = [
+            f"JNGFR{i:02d}" for i in len(jungfrau_geom.modules)]
+
+    # Create the figure
+    ax = jungfrau_geom.inspect(
+        module_names=[
+            f'{mod}\n{pdu}' for mod, pdu in zip(
+                processed_modules, display_module_names)])
+    ax.set_title('')  # Cannot remove title
+    ax.set_axis_off()
+    ax.get_legend().set_visible(False)
+
+    # Set each module colour individually,
+    # extra_geom provides a single color for all modules.
+    modules, = ax.collections = ax.collections[:1]
+    facecolors = np.repeat(modules.get_facecolor(), len(expected_modules), axis=0)
+
+    counter = 0  # Used as index within the `Noise` matrix
+    for idx, module in enumerate(expected_modules):
+        color = 'grey'  # Unprocessed modules are grey
+        if module in processed_modules:
+            color = 'green'
+            if (
+                'Noise' not in constants.keys() or
+                np.nanmean(constants['Noise'][counter, ..., 0]) == 0
+                ):
+                color = 'red'
+            counter += 1
+
+        # Set the colours
+        facecolors[idx] = colors.to_rgba(color)
+
+    modules.set_facecolors(facecolors)  # Update colours in figure
+
+    ax.legend(
+        handles=[
+            Patch(facecolor='red', label='No data'),
+            Patch(facecolor='gray', label='Not processed'),
+            Patch(facecolor='green', label='Processed'),],
+        loc='best', ncol=3,
+        bbox_to_anchor=(0.1, 0.25, 0.7, 0.8))
+    plt.show()
+
+
+# Prepare the extra_geom object.
+def init_jungfrau_geom(
+    karabo_id: str,
+    karabo_da: List[str]
+    ) -> Tuple[List[str], JUNGFRAUGeometry]:
+    """ Initiate JUNGFRAUGeometry object based on the selected detector
+    (SPB_IRDA_JF4M, FXE_XAD_JF1M, or a single module detector).
+    
+    :param karabo_id: the detector identifer of an expected multimodular
+        detector or a single module detector.
+    :param karabo_da: the data aggregator names that defines the module name.
+        This is more relevant for the single module detector to display the
+        correct module name.
+    :return: List of expected module names, JUNGFRAUGeometry object.
+
+    """
+    # Positions are given in pixels
+    mod_width = (256 * 4) + (2 * 3)  # inc. 2px gaps between tiles
+    mod_height = (256 * 2) + 2
+
+    if karabo_id == "SPB_IRDA_JF4M":
+        nmods = 8
+        expected_modules = [f"JNGFR{i:02d}" for i in range(1, nmods+1)]
+        # The first 4 modules are rotated 180 degrees relative to the others.
+        # We pass the bottom, beam-right corner of the module regardless
+        # of its orientation, requiring a subtraction from the
+        # symmetric positions we'd otherwise calculate.
+        x_start, y_start = 1125, 1078
+        module_pos = [
+            (x_start - mod_width, y_start - mod_height - (i*(mod_height+33)))
+            for i in range(4)
+        ] + [
+            (-x_start, -y_start + (i*(mod_height + 33))) for i in range(4)
+        ]
+        orientations = [
+            (-1, -1) for _ in range(4)] + [(1, 1) for _ in range(4)]
+    elif karabo_id == "FXE_XAD_JF1M":
+        nmods = 2
+        expected_modules = [f"JNGFR{i:02d}" for i in range(1, nmods+1)]
+        module_pos = ((-mod_width//2, 33), (-mod_width//2, -mod_height-33))
+        orientations = [(-1,-1), (1,1)]
+    else:
+        nmods = 1
+        expected_modules = karabo_da
+        module_pos = ((-mod_width//2, -mod_height//2),)
+        orientations = None
+
+    return expected_modules, JUNGFRAUGeometry.from_module_positions(
+        module_pos, orientations=orientations, asic_gap=6)
--- a/src/cal_tools/step_timing.py
+++ b/src/cal_tools/step_timing.py
@@ -27,7 +27,10 @@ class StepTimer:

    def timespan(self):
        """Wall time from 1st call to start() to last start() or done_step()"""
-        return self.t_latest - self.t0
+        if self.t_latest is not None and self.t0 is not None:
+            return self.t_latest - self.t0
+        else:
+            return 0

    def print_summary(self):
        """Show mean & std for each step"""

--- a/src/xfel_calibrate/notebooks.py
+++ b/src/xfel_calibrate/notebooks.py
@@ -172,6 +172,8 @@ notebooks = {
        "DARK": {
            "notebook":
                "notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb",  # noqa
+            "dep_notebooks": [
+                "notebooks/Jungfrau/Jungfrau_darks_Summary_NBC.ipynb"],
            "concurrency": {"parameter": "karabo_da",
                            "default concurrency": list(range(8)),
                            "cluster cores": 4},

--- a/tests/test_jungfraulib.py
+++ b/tests/test_jungfraulib.py
+import pytest
+from extra_data import RunDirectory
+
+from cal_tools.jungfraulib import JungfrauCtrl
+
+# TODO: replace with mocked RAW data as in tests/test_agipdlib.py
+JF = JungfrauCtrl(
+    run_dc=RunDirectory("/gpfs/exfel/exp/CALLAB/202130/p900203/raw/r9031/"),
+    ctrl_src="FXE_XAD_JF1M/DET/CONTROL",
+    )
+
+
+@pytest.mark.parametrize(
+    'settings,result',
+    [
+        ('dynamicgain', 0),
+        ('dynamichg0', 1),
+        ('fixgain1', 0),
+        ('fixgain2', 0),
+        ('forceswitchg1', 0),
+        ('forceswitchg2', 0),
+        ('gain0', 0),
+        ('highgain0', 1),
+        ],
+    )
+def test_get_gain_setting(settings, result):
+    JF.run_settings = settings
+    assert JF.get_gain_setting() == result
+
+
+@pytest.mark.parametrize(
+    'mode,result',
+    [
+        ('dynamicgain', 0),
+        ('dynamichg0', 0),
+        ('fixgain1', 1),
+        ('fixgain2', 1),
+        ('forceswitchg1', 0),
+        ('forceswitchg2', 0),
+        ('dynamic', 0),
+        ('fixg1', 1),
+        ('fixg2', 1),
+        ],
+    )
+def test_get_gain_mode(mode, result):
+    JF.run_mode = mode
+    assert JF.get_gain_mode() == result
No results found