Compare revisions

ebd1c7c9 · e304a32d · a2139cc4 · a6726f05 · 4f3ef7f8 · 61391b1d
--- a/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
+++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
 %% Cell type:markdown id: tags:

 # AGIPD Characterize Dark Images #

 Author: European XFEL Detector Group, Version: 2.0

 The following code analyzes a set of dark images taken with the AGIPD detector to deduce detector offsets , noise, bad-pixel maps and thresholding. All four types of constants are evaluated per-pixel and per-memory cell. Data for the detector's three gain stages needs to be present, separated into separate runs.

 The evaluated calibration constants are stored locally and injected in the calibration data base.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/d/raw/CALLAB/202031/p900113" # path to input data, required
 out_folder = "" # path to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 modules = [-1]  # list of modules to evaluate, RANGE ALLOWED
 run_high = 9985 # run number in which high gain data was recorded, required
 run_med = 9984 # run number in which medium gain data was recorded, required
 run_low = 9983 # run number in which low gain data was recorded, required
 operation_mode = "ADAPTIVE_GAIN"  # Detector operation mode, optional (defaults to "ADAPTIVE_GAIN")

 karabo_id = "HED_DET_AGIPD500K2G" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_template = "{}CH0" # inset for receiver devices
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information
 karabo_id_control = "HED_EXP_AGIPD500K2G" # karabo-id for control device '

 use_dir_creation_date = True  # use dir creation date as data production reference date
 cal_db_interface = "tcp://max-exfl016:8020" # the database interface to use
 cal_db_timeout = 3000000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database

 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 0 # bias voltage, set to 0 to use stored value in slow data.
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode, use -1 to use value stored in slow data.
 integration_time = -1 # integration time, negative values for auto-detection.
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 interlaced = False # assume interlaced data format, for data prior to Dec. 2017

 thresholds_offset_sigma = 3. # offset sigma thresholds for offset deduced bad pixels
 thresholds_offset_hard = [0, 0]  # For setting the same threshold offset for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.
 thresholds_offset_hard_hg = [3000, 7000]  # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_mg = [6000, 10000]  # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_lg = [6000, 10000]  # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_hg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_hg, but for fixed gain operation
 thresholds_offset_hard_mg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_mg, but for fixed gain operation
 thresholds_offset_hard_lg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_lg, but for fixed gain operation

 thresholds_noise_sigma = 5. # noise sigma thresholds for offset deduced bad pixels
 thresholds_noise_hard = [0, 0] # For setting the same threshold noise for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.
 thresholds_noise_hard_hg = [4, 20] # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_mg = [4, 20] # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_lg = [4, 20] # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels

 thresholds_gain_sigma = 5.  # Gain separation sigma threshold
 max_trains = 550  # Maximum number of trains to use for processing dark. Set to 0 to process all available trains. 550 added for ~500GB nodes to temporarely avoid memory issues.
 min_trains = 1  # Miniumum number of trains for processing dark. If run folder has less than minimum trains, processing is stopped.
 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. ~7mins extra time for 64 memory cells

 # This is used if modules is not specified:
 def find_modules(in_folder, run_high, modules):
    if (modules is not None) and modules != [-1]:
        return modules
    from pathlib import Path
    import re
    modules = set()
    for file in Path(in_folder, f'r{run_high:04d}').iterdir():
        m = re.search(r'-AGIPD(\d{2})-', file.name)
        if m:
            modules.add(int(m.group(1)))
    return sorted(modules)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import multiprocessing
 import os
 from collections import OrderedDict
 from datetime import timedelta
 from pathlib import Path
 from typing import List, Tuple

 import dateutil.parser
 import matplotlib
 import numpy as np
 import pasha as psh
 import tabulate
 import yaml
 from IPython.display import Latex, Markdown, display
 from extra_data import RunDirectory

 matplotlib.use('agg')

 import iCalibrationDB
 import matplotlib.pyplot as plt
 from cal_tools.agipdlib import AgipdCtrl
 from cal_tools.enums import AgipdGainMode, BadPixels
 from cal_tools.plotting import (
    create_constant_overview,
    plot_badpix_3d,
    show_overview,
    show_processed_modules,
 )
 from cal_tools.tools import (
    get_dir_creation_date,
    get_from_db,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    map_gain_stages,
    module_index_to_qm,
    run_prop_seq_from_path,
    save_const_to_h5,
    send_to_db,
 )

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 # insert control device if format string (does nothing otherwise)
 ctrl_src = ctrl_source_template.format(karabo_id_control)

 runs_dict = OrderedDict()
 run_numbers = [run_high, run_med, run_low]

 for gain_idx, (run_name, run_number) in enumerate(zip(["high", "med", "low"], run_numbers)):
    runs_dict[run_name] = {
        "number": run_number,
        "gain": gain_idx,
        "dc": RunDirectory(f'{in_folder}/r{run_number:04d}/')
    }

 creation_time=None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)

 print(f"Using {creation_time} as creation time of constant.")

 run, prop, seq = run_prop_seq_from_path(in_folder)

 # Read report path and create file location tuple to add with the injection
 file_loc = f"proposal:{prop} runs:{run_low} {run_med} {run_high}"

 report = get_report(metadata_folder)
 cal_db_interface = get_random_db_interface(cal_db_interface)
 print(f'Calibration database interface: {cal_db_interface}')

 instrument = karabo_id.split("_")[0]

 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 instrument_src = instrument_source_template.format(karabo_id, receiver_template)

 def create_karabo_da_list(modules):
    return(["AGIPD{:02d}".format(i) for i in modules])

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = create_karabo_da_list(modules)
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create out_folder if it doesn't exist.
 Path(out_folder).mkdir(parents=True, exist_ok=True)

 mod_image_size = []
 for run_dict in runs_dict.values():
    missing_modules = []  # modules with no images within a run.
    n_trains_list = []   # list of the number of trains for each module within a run.
    # This is important in case of no slurm parallelization over modules is done.
    # (e.g. running notebook interactively)
    for m in modules:
        # validate that there are trains for the selected modules and run.
        dc = run_dict["dc"].select(
            instrument_src.format(m), "*", require_all=True)
        n_trains = len(dc.train_ids)

        if n_trains == 0:
            print(f"WARNING: No images for module AGIPD{m:02d}, run {run_dict['number']}.")
            missing_modules.append(m)
        # Raise a warning if the module has less trains than expected.
        elif n_trains < min_trains:
            print(f"WARNING: AGIPD{m:02d}, run {run_dict['number']} "
                  f"has trains less than minimum trains: {min_trains}.")
        else:
            print(f"Processing {max_trains if max_trains < n_trains else n_trains} "
                  f"for AGIPD{m:02d}, run {run_dict['number']} ")

        n_trains_list.append(n_trains)
        mod_image_size.append(np.product(dc[instrument_src.format(m), "image.data"].shape) * 2  / 1e9)

    if max(n_trains_list) == 0:
        raise ValueError(f"No images to process for run: {run_dict['number']}")
    elif max(n_trains_list) < min_trains:
        raise ValueError(f"{run_dict['number']} has less than minimum trains: {min_trains}")

 # Update modules and karabo_da lists based on available modules to processes.
 modules = [m for m in modules if m not in missing_modules]
 karabo_da = create_karabo_da_list(modules)

 print(f"Will process data of ({sum(mod_image_size):.02f} GB).")
 ```

 %% Cell type:markdown id: tags:

 ## Read and validate the runs control data.

 %% Cell type:code id: tags:

 ``` python
 def read_run_conditions(runs_dict: dict):
    agipd_cond = AgipdCtrl(
        run_dc=runs_dict["dc"],
        image_src=instrument_src_mod,
        ctrl_src=ctrl_src,
    )
    cond_dict["runs"].append(runs_dict["number"])
    if acq_rate == 0:
        cond_dict["acq_rate"].append(agipd_cond.get_acq_rate())
    if mem_cells == 0:
        cond_dict["mem_cells"].append(agipd_cond.get_num_cells())
    if gain_setting == -1:
        cond_dict["gain_setting"].append(
            agipd_cond.get_gain_setting(creation_time))
    if bias_voltage == 0.:
        cond_dict["bias_voltage"].append(
            agipd_cond.get_bias_voltage(karabo_id_control))
    if integration_time == -1:
        cond_dict["integration_time"].append(
            agipd_cond.get_integration_time())
    if gain_mode == -1:
        cond_dict["gain_mode"].append(agipd_cond.get_gain_mode())
    else:
        cond_dict["gain_mode"].append(AgipdGainMode(gain_mode))
 ```

 %% Cell type:code id: tags:

 ``` python
 def validate_gain_modes(gain_modes: List[AgipdGainMode]):
    # Validate that gain modes are not a mix of adaptive and fixed gain.
    if all(
        gm == AgipdGainMode.ADAPTIVE_GAIN for gm in gain_modes
    ):
        fixed_gain_mode = False
    # Some runs are adaptive by mistake.
    elif any(
        gm == AgipdGainMode.ADAPTIVE_GAIN for gm in gain_modes
    ):
        raise ValueError(
            f"ERROR: Given runs {run_numbers}"
            " have a mix of ADAPTIVE and FIXED gain modes: "
            f"{gain_modes}."
    )
    elif list(gain_modes) == [
        AgipdGainMode.FIXED_HIGH_GAIN,
        AgipdGainMode.FIXED_MEDIUM_GAIN,
        AgipdGainMode.FIXED_LOW_GAIN
    ]:
        fixed_gain_mode = True
    else:
        raise ValueError(
        "ERROR: Wrong arrangment of given dark runs. "
        f"Given runs' gain_modes are {gain_modes} for runs: {run_numbers}."
    )
    return fixed_gain_mode
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read slow data from 1st channel only.
 # Read all modules in one notebook and validate the conditions across detectors?
 # Currently slurm jobs run per one module.

 # TODO: what if first module is not available. Maybe only channel 2 available
 instrument_src_mod = instrument_src.format(modules[0])

 cond_dict = dict()
 fixed_gain_mode = None

 with multiprocessing.Manager() as manager:
    cond_dict["runs"] = manager.list()
    cond_dict["acq_rate"] = manager.list()
    cond_dict["mem_cells"] = manager.list()
    cond_dict["gain_setting"] = manager.list()
    cond_dict["gain_mode"] = manager.list()
    cond_dict["bias_voltage"] = manager.list()
    cond_dict["integration_time"] = manager.list()

    with multiprocessing.Pool(processes=len(modules)) as pool:
        pool.starmap(read_run_conditions, zip(runs_dict.values()))

    for cond, vlist in cond_dict.items():
        if cond == "runs":
            continue
        elif cond == "gain_mode":
            fixed_gain_mode = validate_gain_modes(cond_dict["gain_mode"])
        elif not all(x == vlist[0] for x in vlist):
            # TODO: raise ERROR??
            print(
                f"WARNING: {cond} is not the same for the runs "
                f"{cond_dict['runs']} with values"
                f" of {cond_dict[cond]}, respectively."
            )
    if cond_dict["acq_rate"]: acq_rate = cond_dict["acq_rate"][0]
    if cond_dict["mem_cells"]: mem_cells = cond_dict["mem_cells"][0]
    if cond_dict["gain_setting"]: gain_setting = cond_dict["gain_setting"][0]
    if cond_dict["gain_mode"]: gain_mode = list(cond_dict["gain_mode"])
    if cond_dict["bias_voltage"]: bias_voltage = cond_dict["bias_voltage"][0]
    if cond_dict["integration_time"]: integration_time = cond_dict["integration_time"][0]
 ```

 %% Cell type:code id: tags:

 ``` python
 # Determine the gain operation mode based on the gain_mode stored in control h5file.
 if operation_mode not in ("ADAPTIVE_GAIN", "FIXED_GAIN"):
    print(f"WARNING: unknown operation_mode \"{operation_mode}\" parameter set")

 if fixed_gain_mode and operation_mode == "ADAPTIVE_GAIN":
    print(
        "WARNING: Operation_mode parameter is ADAPTIVE_GAIN, but"
        "slow data indicates FIXED_GAIN. Processing fixed gain constants.")
 elif not fixed_gain_mode and operation_mode == "FIXED_GAIN":
    print(
        "WARNING: Operation_mode parameter is FIXED_GAIN, "
        "slow data indicates ADAPTIVE_GAIN. Processing adaptive gain constants.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Parameters are:")
 print(f"Proposal: {prop}")
 print(f"Acquisition rate: {acq_rate}")
 print(f"Memory cells: {mem_cells}")
 print(f"Runs: {run_numbers}")
 print(f"Interlaced mode: {interlaced}")
 print(f"Using DB: {db_output}")
 print(f"Input: {in_folder}")
 print(f"Output: {out_folder}")
 print(f"Bias voltage: {bias_voltage}V")
 print(f"Gain setting: {gain_setting}")
 print(f"Integration time: {integration_time}")
 print(f"Operation mode is {'fixed' if fixed_gain_mode else 'adaptive'} gain mode")
 ```

 %% Cell type:code id: tags:

 ``` python
 if thresholds_offset_hard != [0, 0]:
    # if set, this will override the individual parameters
    thresholds_offset_hard = [thresholds_offset_hard] * 3
 elif fixed_gain_mode:
    thresholds_offset_hard = [
        thresholds_offset_hard_hg_fixed,
        thresholds_offset_hard_mg_fixed,
        thresholds_offset_hard_lg_fixed,
    ]
 else:
    thresholds_offset_hard = [
        thresholds_offset_hard_hg,
        thresholds_offset_hard_mg,
        thresholds_offset_hard_lg,
    ]
 print("Will use the following hard offset thresholds")
 for name, value in zip(("High", "Medium", "Low"), thresholds_offset_hard):
    print(f"- {name} gain: {value}")

 if thresholds_noise_hard != [0, 0]:
    thresholds_noise_hard = [thresholds_noise_hard] * 3
 else:
    thresholds_noise_hard = [
        thresholds_noise_hard_hg,
        thresholds_noise_hard_mg,
        thresholds_noise_hard_lg,
    ]
 ```

 %% Cell type:markdown id: tags:

 ## Calculate Offsets, Noise and Thresholds ##

 The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array.

 %% Cell type:code id: tags:

 ``` python
 parallel_num_procs = min(6, len(modules)*3)
 parallel_num_threads = multiprocessing.cpu_count() // parallel_num_procs
 print(f"Will use {parallel_num_procs} processes with {parallel_num_threads} threads each")

 def characterize_module(
    channel: int, runs_dict: dict,
 ) -> Tuple[int, int, np.array, np.array, np.array, np.array, np.array]:

    # Select the corresponding module channel.
    instrument_src_mod = instrument_src.format(channel)

    run_dc = runs_dict["dc"].select(instrument_src_mod, require_all=True)
    if max_trains != 0:
        run_dc = run_dc.select_trains(np.s_[:max_trains])
    gain_index = runs_dict["gain"]

    # Read module's image and cellId data.
    im = run_dc[instrument_src_mod, "image.data"].ndarray()
    cell_ids = np.squeeze(run_dc[instrument_src_mod, "image.cellId"].ndarray())

    local_thresholds_offset_hard = thresholds_offset_hard[gain_index]
    local_thresholds_noise_hard = thresholds_noise_hard[gain_index]

    if interlaced:
        if not fixed_gain_mode:
            ga = im[1::2, 0, ...]
        im = im[0::2, 0, ...].astype(np.float32)
        cell_ids = cell_ids[::2]
    else:
        if not fixed_gain_mode:
            ga = im[:, 1, ...]
        im = im[:, 0, ...].astype(np.float32)
    im = np.transpose(im)
    if not fixed_gain_mode:
        ga = np.transpose(ga)

    context = psh.context.ThreadContext(num_workers=parallel_num_threads)
    offset = context.alloc(shape=(im.shape[0], im.shape[1], mem_cells), dtype=np.float64)
    noise = context.alloc(like=offset)

    if fixed_gain_mode:
        gains = None
        gains_std = None
    else:
        gains = context.alloc(like=offset)
        gains_std = context.alloc(like=offset)

    def process_cell(worker_id, array_index, cell_number):
        cell_slice_index = (cell_ids == cell_number)
        im_slice = im[..., cell_slice_index]
        offset[..., cell_number] = np.median(im_slice, axis=2)
        noise[..., cell_number] = np.std(im_slice, axis=2)
        if not fixed_gain_mode:
            ga_slice = ga[..., cell_slice_index]
            gains[..., cell_number] = np.median(ga_slice, axis=2)
            gains_std[..., cell_number] = np.std(ga_slice, axis=2)
-    context.map(process_cell, np.unique(cell_ids))
+    unique_cell_ids = np.unique(cell_ids)
+
+    # We assume cells are accepted starting 0.
+    if np.any(unique_cell_ids > mem_cells):
+        raise ValueError(
+            f"Invalid cells found {unique_cell_ids} "
+            f"for run: {run_dc.run_metadata()['runNumber']}.")
+
+    context.map(process_cell, unique_cell_ids)

    # bad pixels
    bp = np.zeros_like(offset, dtype=np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0,1))
    offset_std = np.nanstd(offset, axis=(0,1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD
    bp[(offset < local_thresholds_offset_hard[0]) |
       (offset > local_thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0,1))
    noise_std = np.nanstd(noise, axis=(0,1))
    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD
    bp[(noise < local_thresholds_noise_hard[0]) | (noise > local_thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR

    return channel, gain_index, offset, noise, gains, gains_std, bp
 ```

 %% Cell type:code id: tags:

 ``` python
 with multiprocessing.Pool(processes=parallel_num_procs) as pool:
    results = pool.starmap(
        characterize_module, itertools.product(modules, list(runs_dict.values())))

 # mapped values for processing 2 modules example:
 # [
 #     0, {"gain": 0, "run_number": <run-high>, "dc": <high-dc>},
 #     0, {"gain": 1, "run_number": <run-med>, "dc": <med-dc>},
 #     0, {"gain": 2, "run_number": <run-low>, "dc": <low-dc>},
 #     1, {"gain": 0, "run_number": <run-high>, "dc": <high-dc>},
 #     1, {"gain": 1, "run_number": <run-med>, "dc": <med-dc>},
 #     1, {"gain": 2, "run_number": <run-low>, "dc": <low-dc>},
 # ]
 ```

 %% Cell type:code id: tags:

 ``` python
 offset_g = OrderedDict()
 noise_g = OrderedDict()
 badpix_g = OrderedDict()
 if not fixed_gain_mode:
    gain_g = OrderedDict()
    gainstd_g = OrderedDict()


 for module_index, gain_index, offset, noise, gains, gains_std, bp in results:
    qm = module_index_to_qm(module_index)
    if qm not in offset_g:
        offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2], 3))
        noise_g[qm] = np.zeros_like(offset_g[qm])
        badpix_g[qm] = np.zeros_like(offset_g[qm], np.uint32)
        if not fixed_gain_mode:
            gain_g[qm] = np.zeros_like(offset_g[qm])
            gainstd_g[qm] = np.zeros_like(offset_g[qm])

    offset_g[qm][..., gain_index] = offset
    noise_g[qm][..., gain_index] = noise
    badpix_g[qm][..., gain_index] = bp
    if not fixed_gain_mode:
        gain_g[qm][..., gain_index] = gains
        gainstd_g[qm][..., gain_index] = gains_std
 ```

 %% Cell type:code id: tags:

 ``` python
 # Add bad pixels due to bad gain separation
 if not fixed_gain_mode:
    for qm in gain_g.keys():
        for g in range(2):
            # Bad pixels during bad gain separation.
            # Fraction of pixels in the module with separation lower than "thresholds_gain_sigma".
            bad_sep = (gain_g[qm][..., g+1] - gain_g[qm][..., g]) / \
                np.sqrt(gainstd_g[qm][..., g+1]**2 + gainstd_g[qm][..., g]**2)
            badpix_g[qm][...,g+1][bad_sep<thresholds_gain_sigma] |= \
                BadPixels.GAIN_THRESHOLDING_ERROR
 ```

 %% Cell type:markdown id: tags:

 The thresholds for gain switching are then defined as the mean value between in individual gain bit levels. Note that these thresholds need to be refined with charge induced thresholds, as the two are not the same.

 %% Cell type:code id: tags:

 ``` python
 if not fixed_gain_mode:
    thresholds_g = {}
    for qm in gain_g.keys():
        thresholds_g[qm] = np.zeros((gain_g[qm].shape[0], gain_g[qm].shape[1], gain_g[qm].shape[2], 5))
        thresholds_g[qm][...,0] = (gain_g[qm][...,1]+gain_g[qm][...,0])/2
        thresholds_g[qm][...,1] = (gain_g[qm][...,2]+gain_g[qm][...,1])/2
        for i in range(3):
            thresholds_g[qm][...,2+i] = gain_g[qm][...,i]
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()
 for i in modules:
    qm = module_index_to_qm(i)
    res[qm] = {
        'Offset': offset_g[qm],
        'Noise': noise_g[qm],
        'BadPixelsDark': badpix_g[qm]
    }
    if not fixed_gain_mode:
        res[qm]['ThresholdsDark'] = thresholds_g[qm]
 ```

 %% Cell type:code id: tags:

 ``` python
 # set the operating condition
 # note: iCalibrationDB only adds gain_mode if it is truthy, so we don't need to handle None
 condition = iCalibrationDB.Conditions.Dark.AGIPD(
    memory_cells=mem_cells,
    bias_voltage=bias_voltage,
    acquisition_rate=acq_rate,
    gain_setting=gain_setting,
    gain_mode=fixed_gain_mode,
    integration_time=integration_time
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create mapping from module(s) (qm) to karabo_da(s) and PDU(s)
 qm_dict = OrderedDict()
 all_pdus = get_pdu_from_db(
    karabo_id,
    karabo_da,
    constant=iCalibrationDB.CalibrationConstant(),
    condition=condition,
    cal_db_interface=cal_db_interface,
    snapshot_at=creation_time.isoformat() if creation_time else None,
    timeout=cal_db_timeout
 )
 for module_index, module_da, module_pdu in zip(modules, karabo_da, all_pdus):
    qm = module_index_to_qm(module_index)
    qm_dict[qm] = {
        "karabo_da": module_da,
        "db_module": module_pdu
    }
 ```

 %% Cell type:markdown id: tags:

 ## Sending calibration constants to the database.

 %% Cell type:code id: tags:

 ``` python
 md = None

 for qm in res:
    db_module = qm_dict[qm]["db_module"]
    for const in res[qm]:
        dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()
        dconst.data = res[qm][const]

        if db_output:
            md = send_to_db(db_module, karabo_id, dconst, condition, file_loc,
                            report, cal_db_interface, creation_time=creation_time,
                            timeout=cal_db_timeout)

        if local_output:
            md = save_const_to_h5(db_module, karabo_id, dconst, condition, dconst.data,
                                  file_loc, report, creation_time, out_folder)
            print(f"Calibration constant {const} for {qm} is stored locally in {file_loc}.\n")

 print("Constants parameter conditions are:\n")
 print(f"• memory_cells: {mem_cells}\n• bias_voltage: {bias_voltage}\n"
      f"• acquisition_rate: {acq_rate}\n• gain_setting: {gain_setting}\n"
      f"• gain_mode: {fixed_gain_mode}\n• integration_time: {integration_time}\n"
      f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:markdown id: tags:

 ## Retrieving previous calibration constants for comparison.

 %% Cell type:code id: tags:

 ``` python
 # Start retrieving existing constants for comparison
 qm_x_const = [(qm, const) for const in res[qm] for qm in res]


 def retrieve_old_constant(qm, const):
    dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()

    data, mdata = get_from_db(
        karabo_id=karabo_id,
        karabo_da=qm_dict[qm]["karabo_da"],
        constant=dconst,
        condition=condition,
        empty_constant=None,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time-timedelta(seconds=1) if creation_time else None,
        strategy="pdu_prior_in_time",
        verbosity=1,
        timeout=cal_db_timeout
    )

    if mdata is None or data is None:
        timestamp = "Not found"
        filepath = None
        h5path = None
    else:
        timestamp = mdata.calibration_constant_version.begin_at.isoformat()
        filepath = os.path.join(
            mdata.calibration_constant_version.hdf5path,
            mdata.calibration_constant_version.filename
        )
        h5path = mdata.calibration_constant_version.h5path

    return data, timestamp, filepath, h5path


 old_retrieval_pool = multiprocessing.Pool()
 old_retrieval_res = old_retrieval_pool.starmap_async(
    retrieve_old_constant, qm_x_const
 )
 old_retrieval_pool.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 mnames=[]
 for i in modules:
    qm = module_index_to_qm(i)
    mnames.append(qm)
    display(Markdown(f'## Position of the module {qm} and its ASICs'))
 show_processed_modules(dinstance, constants=None, mnames=mnames, mode="position")
 ```

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:markdown id: tags:

 ### High Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 0
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))
 ```

 %% Cell type:markdown id: tags:

 ### Medium Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 1
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))
 ```

 %% Cell type:markdown id: tags:

 ### Low Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 2
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))
 ```

 %% Cell type:code id: tags:

 ``` python
 if high_res_badpix_3d:
    cols = {
        BadPixels.NOISE_OUT_OF_THRESHOLD: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.GAIN_THRESHOLDING_ERROR: (BadPixels.GAIN_THRESHOLDING_ERROR.name, '#FF40FF40'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD | BadPixels.NOISE_OUT_OF_THRESHOLD: ('OFFSET_OUT_OF_THRESHOLD + NOISE_OUT_OF_THRESHOLD', '#DD00DD80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD | BadPixels.NOISE_OUT_OF_THRESHOLD |
        BadPixels.GAIN_THRESHOLDING_ERROR: ('MIXED', '#BFDF009F')
    }

    display(Markdown("""

    ## Global Bad Pixel Behaviour ##

    The following plots show the results of bad pixel evaluation for all evaluated memory cells.
    Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2.
    This excludes single bad pixels present only in disconnected pixels.
    Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
    Colors encode the bad pixel type, or mixed type.

    """))

    gnames = ['High Gain', 'Medium Gain', 'Low Gain']
    for gain in range(3):
        display(Markdown(f'### {gnames[gain]} ###'))
        for mod, data in badpix_g.items():
            plot_badpix_3d(data[...,gain], cols, title=mod, rebin_fac=1)
            plt.show()
 ```

 %% Cell type:markdown id: tags:


 ## Aggregate values, and per Cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior.

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(offset_g, "Offset (ADU)", mem_cells, 4000, 8000,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(noise_g, "Noise (ADU)", mem_cells, 0, 100,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 if not fixed_gain_mode:
    # Plot only three gain threshold maps.
    bp_thresh = OrderedDict()
    for mod, con in badpix_g.items():
        bp_thresh[mod] = np.zeros((con.shape[0], con.shape[1], con.shape[2], 5), dtype=con.dtype)
        bp_thresh[mod][...,:2] = con[...,:2]
        bp_thresh[mod][...,2:] = con

    create_constant_overview(thresholds_g, "Threshold (ADU)", mem_cells, 4000, 10000, 5,
                             badpixels=[bp_thresh, np.nan],
                             gmap=['HG-MG Threshold', 'MG-LG Threshold', 'High gain', 'Medium gain', 'low gain'],
                             marker=['d','d','','','']
                             )
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixel_aggregate_g = OrderedDict()
 for m, d in badpix_g.items():
    bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)
 create_constant_overview(bad_pixel_aggregate_g, "Bad pixel fraction", mem_cells, 0, 0.10, 3)
 ```

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 # now we need the old constants
 old_const = {}
 old_mdata = {}
 old_retrieval_res.wait()

 for (qm, const), (data, timestamp, filepath, h5path) in zip(qm_x_const, old_retrieval_res.get()):
    old_const.setdefault(qm, {})[const] = data
    old_mdata.setdefault(qm, {})[const] = {
        "timestamp": timestamp,
        "filepath": filepath,
        "h5path": h5path
    }
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown("The following pre-existing constants are used for comparison:"))
 for qm, consts in old_mdata.items():
    display(Markdown(f"- {qm}"))
    for const in consts:
        display(Markdown(f"    - {const} at {consts[const]['timestamp']}"))
    # saving locations of old constants for summary notebook
    with open(f"{out_folder}/module_metadata_{qm}.yml", "w") as fd:
        yaml.safe_dump(
            {
                "module": qm,
                "pdu": qm_dict[qm]["db_module"],
                "old-constants": old_mdata[qm]
            },
            fd,
        )
 ```

 %% Cell type:code id: tags:

 ``` python
 table = []
 gain_names = ['High', 'Medium', 'Low']
 bits = [BadPixels.NOISE_OUT_OF_THRESHOLD, BadPixels.OFFSET_OUT_OF_THRESHOLD, BadPixels.OFFSET_NOISE_EVAL_ERROR, BadPixels.GAIN_THRESHOLDING_ERROR]
 for qm in badpix_g.keys():
    for gain in range(3):
        l_data = []
        l_data_old = []

        data = np.copy(badpix_g[qm][:,:,:,gain])
        datau32 = data.astype(np.uint32)
        l_data.append(len(datau32[datau32>0].flatten()))
        for bit in bits:
            l_data.append(np.count_nonzero(badpix_g[qm][:,:,:,gain] & bit))

        if old_const[qm]['BadPixelsDark'] is not None:
            dataold = np.copy(old_const[qm]['BadPixelsDark'][:, :, :, gain])
            datau32old = dataold.astype(np.uint32)
            l_data_old.append(len(datau32old[datau32old>0].flatten()))
            for bit in bits:
                l_data_old.append(np.count_nonzero(old_const[qm]['BadPixelsDark'][:, :, :, gain] & bit))

        l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',
                       'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR', 'GAIN_THRESHOLDING_ERROR']

        l_threshold = ['', f'{thresholds_noise_sigma}' f'{thresholds_noise_hard[gain]}',
                       f'{thresholds_offset_sigma}' f'{thresholds_offset_hard[gain]}',
                       '', f'{thresholds_gain_sigma}']

        for i in range(len(l_data)):
            line = [f'{l_data_name[i]}, {gain_names[gain]} gain', l_threshold[i], l_data[i]]

            if old_const[qm]['BadPixelsDark'] is not None:
                line += [l_data_old[i]]
            else:
                line += ['-']

            table.append(line)
        table.append(['', '', '', ''])

 display(Markdown('''
 ### Number of bad pixels

 One pixel can be bad for different reasons, therefore, the sum of all types of bad pixels can be more than the number of all bad pixels.

 '''))
 if len(table)>0:
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                         headers=["Pixel type", "Threshold",
                                                  "New constant", "Old constant"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 if fixed_gain_mode:
    constants = ['Offset', 'Noise']
 else:
    constants = ['Offset', 'Noise', 'ThresholdsDark']

 constants_x_qms = list(itertools.product(constants, res.keys()))


 def compute_table(const, qm):
    if const == 'ThresholdsDark':
        table = [['','HG-MG threshold', 'HG-MG threshold', 'MG-LG threshold', 'MG-LG threshold']]
    else:
        table = [['','High gain', 'High gain', 'Medium gain', 'Medium gain', 'Low gain', 'Low gain']]

    compare_with_old_constant = old_const[qm][const] is not None and \
        old_const[qm]['BadPixelsDark'] is not None

    data = np.copy(res[qm][const])

    if const == 'ThresholdsDark':
        data[...,0][res[qm]['BadPixelsDark'][...,0]>0] = np.nan
        data[...,1][res[qm]['BadPixelsDark'][...,1]>0] = np.nan
    else:
        data[res[qm]['BadPixelsDark']>0] = np.nan

    if compare_with_old_constant:
        data_old = np.copy(old_const[qm][const])
        if const == 'ThresholdsDark':
            data_old[...,0][old_const[qm]['BadPixelsDark'][...,0]>0] = np.nan
            data_old[...,1][old_const[qm]['BadPixelsDark'][...,1]>0] = np.nan
        else:
            data_old[old_const[qm]['BadPixelsDark']>0] = np.nan

    f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
    n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

    def compute_row(i):
        line = [n_list[i]]
        for gain in range(3):
            # Compare only 3 threshold gain-maps
            if gain == 2 and const == 'ThresholdsDark':
                continue
            stat_measure = f_list[i](data[...,gain])
            line.append(f"{stat_measure:6.1f}")
            if compare_with_old_constant:
                old_stat_measure = f_list[i](data_old[...,gain])
                line.append(f"{old_stat_measure:6.1f}")
            else:
                line.append("-")
        return line


    with multiprocessing.pool.ThreadPool(processes=multiprocessing.cpu_count() // len(constants_x_qms)) as pool:
        rows = pool.map(compute_row, range(len(f_list)))

    table.extend(rows)

    return table


 with multiprocessing.Pool(processes=len(constants_x_qms)) as pool:
    tables = pool.starmap(compute_table, constants_x_qms)

 for (const, qm), table in zip(constants_x_qms, tables):
    display(Markdown(f"### {qm}: {const} [ADU], good pixels only"))
    display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
 ```

 %% Cell type:markdown id: tags:

 # AGIPD Characterize Dark Images #

 Author: European XFEL Detector Group, Version: 2.0

 The following code analyzes a set of dark images taken with the AGIPD detector to deduce detector offsets , noise, bad-pixel maps and thresholding. All four types of constants are evaluated per-pixel and per-memory cell. Data for the detector's three gain stages needs to be present, separated into separate runs.

 The evaluated calibration constants are stored locally and injected in the calibration data base.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/d/raw/CALLAB/202031/p900113" # path to input data, required
 out_folder = "" # path to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 modules = [-1]  # list of modules to evaluate, RANGE ALLOWED
 run_high = 9985 # run number in which high gain data was recorded, required
 run_med = 9984 # run number in which medium gain data was recorded, required
 run_low = 9983 # run number in which low gain data was recorded, required
 operation_mode = "ADAPTIVE_GAIN"  # Detector operation mode, optional (defaults to "ADAPTIVE_GAIN")

 karabo_id = "HED_DET_AGIPD500K2G" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_template = "{}CH0" # inset for receiver devices
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information
 karabo_id_control = "HED_EXP_AGIPD500K2G" # karabo-id for control device '

 use_dir_creation_date = True  # use dir creation date as data production reference date
 cal_db_interface = "tcp://max-exfl016:8020" # the database interface to use
 cal_db_timeout = 3000000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database

 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 0 # bias voltage, set to 0 to use stored value in slow data.
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode, use -1 to use value stored in slow data.
 integration_time = -1 # integration time, negative values for auto-detection.
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 interlaced = False # assume interlaced data format, for data prior to Dec. 2017

 thresholds_offset_sigma = 3. # offset sigma thresholds for offset deduced bad pixels
 thresholds_offset_hard = [0, 0]  # For setting the same threshold offset for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.
 thresholds_offset_hard_hg = [3000, 7000]  # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_mg = [6000, 10000]  # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_lg = [6000, 10000]  # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_hg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_hg, but for fixed gain operation
 thresholds_offset_hard_mg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_mg, but for fixed gain operation
 thresholds_offset_hard_lg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_lg, but for fixed gain operation

 thresholds_noise_sigma = 5. # noise sigma thresholds for offset deduced bad pixels
 thresholds_noise_hard = [0, 0] # For setting the same threshold noise for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.
 thresholds_noise_hard_hg = [4, 20] # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_mg = [4, 20] # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_lg = [4, 20] # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels

 thresholds_gain_sigma = 5.  # Gain separation sigma threshold
 max_trains = 550  # Maximum number of trains to use for processing dark. Set to 0 to process all available trains. 550 added for ~500GB nodes to temporarely avoid memory issues.
 min_trains = 1  # Miniumum number of trains for processing dark. If run folder has less than minimum trains, processing is stopped.
 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. ~7mins extra time for 64 memory cells

 # This is used if modules is not specified:
 def find_modules(in_folder, run_high, modules):
    if (modules is not None) and modules != [-1]:
        return modules
    from pathlib import Path
    import re
    modules = set()
    for file in Path(in_folder, f'r{run_high:04d}').iterdir():
        m = re.search(r'-AGIPD(\d{2})-', file.name)
        if m:
            modules.add(int(m.group(1)))
    return sorted(modules)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import multiprocessing
 import os
 from collections import OrderedDict
 from datetime import timedelta
 from pathlib import Path
 from typing import List, Tuple

 import dateutil.parser
 import matplotlib
 import numpy as np
 import pasha as psh
 import tabulate
 import yaml
 from IPython.display import Latex, Markdown, display
 from extra_data import RunDirectory

 matplotlib.use('agg')

 import iCalibrationDB
 import matplotlib.pyplot as plt
 from cal_tools.agipdlib import AgipdCtrl
 from cal_tools.enums import AgipdGainMode, BadPixels
 from cal_tools.plotting import (
    create_constant_overview,
    plot_badpix_3d,
    show_overview,
    show_processed_modules,
 )
 from cal_tools.tools import (
    get_dir_creation_date,
    get_from_db,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    map_gain_stages,
    module_index_to_qm,
    run_prop_seq_from_path,
    save_const_to_h5,
    send_to_db,
 )

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 # insert control device if format string (does nothing otherwise)
 ctrl_src = ctrl_source_template.format(karabo_id_control)

 runs_dict = OrderedDict()
 run_numbers = [run_high, run_med, run_low]

 for gain_idx, (run_name, run_number) in enumerate(zip(["high", "med", "low"], run_numbers)):
    runs_dict[run_name] = {
        "number": run_number,
        "gain": gain_idx,
        "dc": RunDirectory(f'{in_folder}/r{run_number:04d}/')
    }

 creation_time=None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)

 print(f"Using {creation_time} as creation time of constant.")

 run, prop, seq = run_prop_seq_from_path(in_folder)

 # Read report path and create file location tuple to add with the injection
 file_loc = f"proposal:{prop} runs:{run_low} {run_med} {run_high}"

 report = get_report(metadata_folder)
 cal_db_interface = get_random_db_interface(cal_db_interface)
 print(f'Calibration database interface: {cal_db_interface}')

 instrument = karabo_id.split("_")[0]

 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 instrument_src = instrument_source_template.format(karabo_id, receiver_template)

 def create_karabo_da_list(modules):
    return(["AGIPD{:02d}".format(i) for i in modules])

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = create_karabo_da_list(modules)
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create out_folder if it doesn't exist.
 Path(out_folder).mkdir(parents=True, exist_ok=True)

 mod_image_size = []
 for run_dict in runs_dict.values():
    missing_modules = []  # modules with no images within a run.
    n_trains_list = []   # list of the number of trains for each module within a run.
    # This is important in case of no slurm parallelization over modules is done.
    # (e.g. running notebook interactively)
    for m in modules:
        # validate that there are trains for the selected modules and run.
        dc = run_dict["dc"].select(
            instrument_src.format(m), "*", require_all=True)
        n_trains = len(dc.train_ids)

        if n_trains == 0:
            print(f"WARNING: No images for module AGIPD{m:02d}, run {run_dict['number']}.")
            missing_modules.append(m)
        # Raise a warning if the module has less trains than expected.
        elif n_trains < min_trains:
            print(f"WARNING: AGIPD{m:02d}, run {run_dict['number']} "
                  f"has trains less than minimum trains: {min_trains}.")
        else:
            print(f"Processing {max_trains if max_trains < n_trains else n_trains} "
                  f"for AGIPD{m:02d}, run {run_dict['number']} ")

        n_trains_list.append(n_trains)
        mod_image_size.append(np.product(dc[instrument_src.format(m), "image.data"].shape) * 2  / 1e9)

    if max(n_trains_list) == 0:
        raise ValueError(f"No images to process for run: {run_dict['number']}")
    elif max(n_trains_list) < min_trains:
        raise ValueError(f"{run_dict['number']} has less than minimum trains: {min_trains}")

 # Update modules and karabo_da lists based on available modules to processes.
 modules = [m for m in modules if m not in missing_modules]
 karabo_da = create_karabo_da_list(modules)

 print(f"Will process data of ({sum(mod_image_size):.02f} GB).")
 ```

 %% Cell type:markdown id: tags:

 ## Read and validate the runs control data.

 %% Cell type:code id: tags:

 ``` python
 def read_run_conditions(runs_dict: dict):
    agipd_cond = AgipdCtrl(
        run_dc=runs_dict["dc"],
        image_src=instrument_src_mod,
        ctrl_src=ctrl_src,
    )
    cond_dict["runs"].append(runs_dict["number"])
    if acq_rate == 0:
        cond_dict["acq_rate"].append(agipd_cond.get_acq_rate())
    if mem_cells == 0:
        cond_dict["mem_cells"].append(agipd_cond.get_num_cells())
    if gain_setting == -1:
        cond_dict["gain_setting"].append(
            agipd_cond.get_gain_setting(creation_time))
    if bias_voltage == 0.:
        cond_dict["bias_voltage"].append(
            agipd_cond.get_bias_voltage(karabo_id_control))
    if integration_time == -1:
        cond_dict["integration_time"].append(
            agipd_cond.get_integration_time())
    if gain_mode == -1:
        cond_dict["gain_mode"].append(agipd_cond.get_gain_mode())
    else:
        cond_dict["gain_mode"].append(AgipdGainMode(gain_mode))
 ```

 %% Cell type:code id: tags:

 ``` python
 def validate_gain_modes(gain_modes: List[AgipdGainMode]):
    # Validate that gain modes are not a mix of adaptive and fixed gain.
    if all(
        gm == AgipdGainMode.ADAPTIVE_GAIN for gm in gain_modes
    ):
        fixed_gain_mode = False
    # Some runs are adaptive by mistake.
    elif any(
        gm == AgipdGainMode.ADAPTIVE_GAIN for gm in gain_modes
    ):
        raise ValueError(
            f"ERROR: Given runs {run_numbers}"
            " have a mix of ADAPTIVE and FIXED gain modes: "
            f"{gain_modes}."
    )
    elif list(gain_modes) == [
        AgipdGainMode.FIXED_HIGH_GAIN,
        AgipdGainMode.FIXED_MEDIUM_GAIN,
        AgipdGainMode.FIXED_LOW_GAIN
    ]:
        fixed_gain_mode = True
    else:
        raise ValueError(
        "ERROR: Wrong arrangment of given dark runs. "
        f"Given runs' gain_modes are {gain_modes} for runs: {run_numbers}."
    )
    return fixed_gain_mode
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read slow data from 1st channel only.
 # Read all modules in one notebook and validate the conditions across detectors?
 # Currently slurm jobs run per one module.

 # TODO: what if first module is not available. Maybe only channel 2 available
 instrument_src_mod = instrument_src.format(modules[0])

 cond_dict = dict()
 fixed_gain_mode = None

 with multiprocessing.Manager() as manager:
    cond_dict["runs"] = manager.list()
    cond_dict["acq_rate"] = manager.list()
    cond_dict["mem_cells"] = manager.list()
    cond_dict["gain_setting"] = manager.list()
    cond_dict["gain_mode"] = manager.list()
    cond_dict["bias_voltage"] = manager.list()
    cond_dict["integration_time"] = manager.list()

    with multiprocessing.Pool(processes=len(modules)) as pool:
        pool.starmap(read_run_conditions, zip(runs_dict.values()))

    for cond, vlist in cond_dict.items():
        if cond == "runs":
            continue
        elif cond == "gain_mode":
            fixed_gain_mode = validate_gain_modes(cond_dict["gain_mode"])
        elif not all(x == vlist[0] for x in vlist):
            # TODO: raise ERROR??
            print(
                f"WARNING: {cond} is not the same for the runs "
                f"{cond_dict['runs']} with values"
                f" of {cond_dict[cond]}, respectively."
            )
    if cond_dict["acq_rate"]: acq_rate = cond_dict["acq_rate"][0]
    if cond_dict["mem_cells"]: mem_cells = cond_dict["mem_cells"][0]
    if cond_dict["gain_setting"]: gain_setting = cond_dict["gain_setting"][0]
    if cond_dict["gain_mode"]: gain_mode = list(cond_dict["gain_mode"])
    if cond_dict["bias_voltage"]: bias_voltage = cond_dict["bias_voltage"][0]
    if cond_dict["integration_time"]: integration_time = cond_dict["integration_time"][0]
 ```

 %% Cell type:code id: tags:

 ``` python
 # Determine the gain operation mode based on the gain_mode stored in control h5file.
 if operation_mode not in ("ADAPTIVE_GAIN", "FIXED_GAIN"):
    print(f"WARNING: unknown operation_mode \"{operation_mode}\" parameter set")

 if fixed_gain_mode and operation_mode == "ADAPTIVE_GAIN":
    print(
        "WARNING: Operation_mode parameter is ADAPTIVE_GAIN, but"
        "slow data indicates FIXED_GAIN. Processing fixed gain constants.")
 elif not fixed_gain_mode and operation_mode == "FIXED_GAIN":
    print(
        "WARNING: Operation_mode parameter is FIXED_GAIN, "
        "slow data indicates ADAPTIVE_GAIN. Processing adaptive gain constants.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Parameters are:")
 print(f"Proposal: {prop}")
 print(f"Acquisition rate: {acq_rate}")
 print(f"Memory cells: {mem_cells}")
 print(f"Runs: {run_numbers}")
 print(f"Interlaced mode: {interlaced}")
 print(f"Using DB: {db_output}")
 print(f"Input: {in_folder}")
 print(f"Output: {out_folder}")
 print(f"Bias voltage: {bias_voltage}V")
 print(f"Gain setting: {gain_setting}")
 print(f"Integration time: {integration_time}")
 print(f"Operation mode is {'fixed' if fixed_gain_mode else 'adaptive'} gain mode")
 ```

 %% Cell type:code id: tags:

 ``` python
 if thresholds_offset_hard != [0, 0]:
    # if set, this will override the individual parameters
    thresholds_offset_hard = [thresholds_offset_hard] * 3
 elif fixed_gain_mode:
    thresholds_offset_hard = [
        thresholds_offset_hard_hg_fixed,
        thresholds_offset_hard_mg_fixed,
        thresholds_offset_hard_lg_fixed,
    ]
 else:
    thresholds_offset_hard = [
        thresholds_offset_hard_hg,
        thresholds_offset_hard_mg,
        thresholds_offset_hard_lg,
    ]
 print("Will use the following hard offset thresholds")
 for name, value in zip(("High", "Medium", "Low"), thresholds_offset_hard):
    print(f"- {name} gain: {value}")

 if thresholds_noise_hard != [0, 0]:
    thresholds_noise_hard = [thresholds_noise_hard] * 3
 else:
    thresholds_noise_hard = [
        thresholds_noise_hard_hg,
        thresholds_noise_hard_mg,
        thresholds_noise_hard_lg,
    ]
 ```

 %% Cell type:markdown id: tags:

 ## Calculate Offsets, Noise and Thresholds ##

 The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array.

 %% Cell type:code id: tags:

 ``` python
 parallel_num_procs = min(6, len(modules)*3)
 parallel_num_threads = multiprocessing.cpu_count() // parallel_num_procs
 print(f"Will use {parallel_num_procs} processes with {parallel_num_threads} threads each")

 def characterize_module(
    channel: int, runs_dict: dict,
 ) -> Tuple[int, int, np.array, np.array, np.array, np.array, np.array]:

    # Select the corresponding module channel.
    instrument_src_mod = instrument_src.format(channel)

    run_dc = runs_dict["dc"].select(instrument_src_mod, require_all=True)
    if max_trains != 0:
        run_dc = run_dc.select_trains(np.s_[:max_trains])
    gain_index = runs_dict["gain"]

    # Read module's image and cellId data.
    im = run_dc[instrument_src_mod, "image.data"].ndarray()
    cell_ids = np.squeeze(run_dc[instrument_src_mod, "image.cellId"].ndarray())

    local_thresholds_offset_hard = thresholds_offset_hard[gain_index]
    local_thresholds_noise_hard = thresholds_noise_hard[gain_index]

    if interlaced:
        if not fixed_gain_mode:
            ga = im[1::2, 0, ...]
        im = im[0::2, 0, ...].astype(np.float32)
        cell_ids = cell_ids[::2]
    else:
        if not fixed_gain_mode:
            ga = im[:, 1, ...]
        im = im[:, 0, ...].astype(np.float32)
    im = np.transpose(im)
    if not fixed_gain_mode:
        ga = np.transpose(ga)

    context = psh.context.ThreadContext(num_workers=parallel_num_threads)
    offset = context.alloc(shape=(im.shape[0], im.shape[1], mem_cells), dtype=np.float64)
    noise = context.alloc(like=offset)

    if fixed_gain_mode:
        gains = None
        gains_std = None
    else:
        gains = context.alloc(like=offset)
        gains_std = context.alloc(like=offset)

    def process_cell(worker_id, array_index, cell_number):
        cell_slice_index = (cell_ids == cell_number)
        im_slice = im[..., cell_slice_index]
        offset[..., cell_number] = np.median(im_slice, axis=2)
        noise[..., cell_number] = np.std(im_slice, axis=2)
        if not fixed_gain_mode:
            ga_slice = ga[..., cell_slice_index]
            gains[..., cell_number] = np.median(ga_slice, axis=2)
            gains_std[..., cell_number] = np.std(ga_slice, axis=2)
-    context.map(process_cell, np.unique(cell_ids))
+    unique_cell_ids = np.unique(cell_ids)
+
+    # We assume cells are accepted starting 0.
+    if np.any(unique_cell_ids > mem_cells):
+        raise ValueError(
+            f"Invalid cells found {unique_cell_ids} "
+            f"for run: {run_dc.run_metadata()['runNumber']}.")
+
+    context.map(process_cell, unique_cell_ids)

    # bad pixels
    bp = np.zeros_like(offset, dtype=np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0,1))
    offset_std = np.nanstd(offset, axis=(0,1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD
    bp[(offset < local_thresholds_offset_hard[0]) |
       (offset > local_thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0,1))
    noise_std = np.nanstd(noise, axis=(0,1))
    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD
    bp[(noise < local_thresholds_noise_hard[0]) | (noise > local_thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR

    return channel, gain_index, offset, noise, gains, gains_std, bp
 ```

 %% Cell type:code id: tags:

 ``` python
 with multiprocessing.Pool(processes=parallel_num_procs) as pool:
    results = pool.starmap(
        characterize_module, itertools.product(modules, list(runs_dict.values())))

 # mapped values for processing 2 modules example:
 # [
 #     0, {"gain": 0, "run_number": <run-high>, "dc": <high-dc>},
 #     0, {"gain": 1, "run_number": <run-med>, "dc": <med-dc>},
 #     0, {"gain": 2, "run_number": <run-low>, "dc": <low-dc>},
 #     1, {"gain": 0, "run_number": <run-high>, "dc": <high-dc>},
 #     1, {"gain": 1, "run_number": <run-med>, "dc": <med-dc>},
 #     1, {"gain": 2, "run_number": <run-low>, "dc": <low-dc>},
 # ]
 ```

 %% Cell type:code id: tags:

 ``` python
 offset_g = OrderedDict()
 noise_g = OrderedDict()
 badpix_g = OrderedDict()
 if not fixed_gain_mode:
    gain_g = OrderedDict()
    gainstd_g = OrderedDict()


 for module_index, gain_index, offset, noise, gains, gains_std, bp in results:
    qm = module_index_to_qm(module_index)
    if qm not in offset_g:
        offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2], 3))
        noise_g[qm] = np.zeros_like(offset_g[qm])
        badpix_g[qm] = np.zeros_like(offset_g[qm], np.uint32)
        if not fixed_gain_mode:
            gain_g[qm] = np.zeros_like(offset_g[qm])
            gainstd_g[qm] = np.zeros_like(offset_g[qm])

    offset_g[qm][..., gain_index] = offset
    noise_g[qm][..., gain_index] = noise
    badpix_g[qm][..., gain_index] = bp
    if not fixed_gain_mode:
        gain_g[qm][..., gain_index] = gains
        gainstd_g[qm][..., gain_index] = gains_std
 ```

 %% Cell type:code id: tags:

 ``` python
 # Add bad pixels due to bad gain separation
 if not fixed_gain_mode:
    for qm in gain_g.keys():
        for g in range(2):
            # Bad pixels during bad gain separation.
            # Fraction of pixels in the module with separation lower than "thresholds_gain_sigma".
            bad_sep = (gain_g[qm][..., g+1] - gain_g[qm][..., g]) / \
                np.sqrt(gainstd_g[qm][..., g+1]**2 + gainstd_g[qm][..., g]**2)
            badpix_g[qm][...,g+1][bad_sep<thresholds_gain_sigma] |= \
                BadPixels.GAIN_THRESHOLDING_ERROR
 ```

 %% Cell type:markdown id: tags:

 The thresholds for gain switching are then defined as the mean value between in individual gain bit levels. Note that these thresholds need to be refined with charge induced thresholds, as the two are not the same.

 %% Cell type:code id: tags:

 ``` python
 if not fixed_gain_mode:
    thresholds_g = {}
    for qm in gain_g.keys():
        thresholds_g[qm] = np.zeros((gain_g[qm].shape[0], gain_g[qm].shape[1], gain_g[qm].shape[2], 5))
        thresholds_g[qm][...,0] = (gain_g[qm][...,1]+gain_g[qm][...,0])/2
        thresholds_g[qm][...,1] = (gain_g[qm][...,2]+gain_g[qm][...,1])/2
        for i in range(3):
            thresholds_g[qm][...,2+i] = gain_g[qm][...,i]
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()
 for i in modules:
    qm = module_index_to_qm(i)
    res[qm] = {
        'Offset': offset_g[qm],
        'Noise': noise_g[qm],
        'BadPixelsDark': badpix_g[qm]
    }
    if not fixed_gain_mode:
        res[qm]['ThresholdsDark'] = thresholds_g[qm]
 ```

 %% Cell type:code id: tags:

 ``` python
 # set the operating condition
 # note: iCalibrationDB only adds gain_mode if it is truthy, so we don't need to handle None
 condition = iCalibrationDB.Conditions.Dark.AGIPD(
    memory_cells=mem_cells,
    bias_voltage=bias_voltage,
    acquisition_rate=acq_rate,
    gain_setting=gain_setting,
    gain_mode=fixed_gain_mode,
    integration_time=integration_time
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create mapping from module(s) (qm) to karabo_da(s) and PDU(s)
 qm_dict = OrderedDict()
 all_pdus = get_pdu_from_db(
    karabo_id,
    karabo_da,
    constant=iCalibrationDB.CalibrationConstant(),
    condition=condition,
    cal_db_interface=cal_db_interface,
    snapshot_at=creation_time.isoformat() if creation_time else None,
    timeout=cal_db_timeout
 )
 for module_index, module_da, module_pdu in zip(modules, karabo_da, all_pdus):
    qm = module_index_to_qm(module_index)
    qm_dict[qm] = {
        "karabo_da": module_da,
        "db_module": module_pdu
    }
 ```

 %% Cell type:markdown id: tags:

 ## Sending calibration constants to the database.

 %% Cell type:code id: tags:

 ``` python
 md = None

 for qm in res:
    db_module = qm_dict[qm]["db_module"]
    for const in res[qm]:
        dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()
        dconst.data = res[qm][const]

        if db_output:
            md = send_to_db(db_module, karabo_id, dconst, condition, file_loc,
                            report, cal_db_interface, creation_time=creation_time,
                            timeout=cal_db_timeout)

        if local_output:
            md = save_const_to_h5(db_module, karabo_id, dconst, condition, dconst.data,
                                  file_loc, report, creation_time, out_folder)
            print(f"Calibration constant {const} for {qm} is stored locally in {file_loc}.\n")

 print("Constants parameter conditions are:\n")
 print(f"• memory_cells: {mem_cells}\n• bias_voltage: {bias_voltage}\n"
      f"• acquisition_rate: {acq_rate}\n• gain_setting: {gain_setting}\n"
      f"• gain_mode: {fixed_gain_mode}\n• integration_time: {integration_time}\n"
      f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:markdown id: tags:

 ## Retrieving previous calibration constants for comparison.

 %% Cell type:code id: tags:

 ``` python
 # Start retrieving existing constants for comparison
 qm_x_const = [(qm, const) for const in res[qm] for qm in res]


 def retrieve_old_constant(qm, const):
    dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()

    data, mdata = get_from_db(
        karabo_id=karabo_id,
        karabo_da=qm_dict[qm]["karabo_da"],
        constant=dconst,
        condition=condition,
        empty_constant=None,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time-timedelta(seconds=1) if creation_time else None,
        strategy="pdu_prior_in_time",
        verbosity=1,
        timeout=cal_db_timeout
    )

    if mdata is None or data is None:
        timestamp = "Not found"
        filepath = None
        h5path = None
    else:
        timestamp = mdata.calibration_constant_version.begin_at.isoformat()
        filepath = os.path.join(
            mdata.calibration_constant_version.hdf5path,
            mdata.calibration_constant_version.filename
        )
        h5path = mdata.calibration_constant_version.h5path

    return data, timestamp, filepath, h5path


 old_retrieval_pool = multiprocessing.Pool()
 old_retrieval_res = old_retrieval_pool.starmap_async(
    retrieve_old_constant, qm_x_const
 )
 old_retrieval_pool.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 mnames=[]
 for i in modules:
    qm = module_index_to_qm(i)
    mnames.append(qm)
    display(Markdown(f'## Position of the module {qm} and its ASICs'))
 show_processed_modules(dinstance, constants=None, mnames=mnames, mode="position")
 ```

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:markdown id: tags:

 ### High Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 0
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))
 ```

 %% Cell type:markdown id: tags:

 ### Medium Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 1
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))
 ```

 %% Cell type:markdown id: tags:

 ### Low Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 2
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))
 ```

 %% Cell type:code id: tags:

 ``` python
 if high_res_badpix_3d:
    cols = {
        BadPixels.NOISE_OUT_OF_THRESHOLD: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.GAIN_THRESHOLDING_ERROR: (BadPixels.GAIN_THRESHOLDING_ERROR.name, '#FF40FF40'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD | BadPixels.NOISE_OUT_OF_THRESHOLD: ('OFFSET_OUT_OF_THRESHOLD + NOISE_OUT_OF_THRESHOLD', '#DD00DD80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD | BadPixels.NOISE_OUT_OF_THRESHOLD |
        BadPixels.GAIN_THRESHOLDING_ERROR: ('MIXED', '#BFDF009F')
    }

    display(Markdown("""

    ## Global Bad Pixel Behaviour ##

    The following plots show the results of bad pixel evaluation for all evaluated memory cells.
    Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2.
    This excludes single bad pixels present only in disconnected pixels.
    Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
    Colors encode the bad pixel type, or mixed type.

    """))

    gnames = ['High Gain', 'Medium Gain', 'Low Gain']
    for gain in range(3):
        display(Markdown(f'### {gnames[gain]} ###'))
        for mod, data in badpix_g.items():
            plot_badpix_3d(data[...,gain], cols, title=mod, rebin_fac=1)
            plt.show()
 ```

 %% Cell type:markdown id: tags:


 ## Aggregate values, and per Cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior.

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(offset_g, "Offset (ADU)", mem_cells, 4000, 8000,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(noise_g, "Noise (ADU)", mem_cells, 0, 100,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 if not fixed_gain_mode:
    # Plot only three gain threshold maps.
    bp_thresh = OrderedDict()
    for mod, con in badpix_g.items():
        bp_thresh[mod] = np.zeros((con.shape[0], con.shape[1], con.shape[2], 5), dtype=con.dtype)
        bp_thresh[mod][...,:2] = con[...,:2]
        bp_thresh[mod][...,2:] = con

    create_constant_overview(thresholds_g, "Threshold (ADU)", mem_cells, 4000, 10000, 5,
                             badpixels=[bp_thresh, np.nan],
                             gmap=['HG-MG Threshold', 'MG-LG Threshold', 'High gain', 'Medium gain', 'low gain'],
                             marker=['d','d','','','']
                             )
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixel_aggregate_g = OrderedDict()
 for m, d in badpix_g.items():
    bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)
 create_constant_overview(bad_pixel_aggregate_g, "Bad pixel fraction", mem_cells, 0, 0.10, 3)
 ```

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 # now we need the old constants
 old_const = {}
 old_mdata = {}
 old_retrieval_res.wait()

 for (qm, const), (data, timestamp, filepath, h5path) in zip(qm_x_const, old_retrieval_res.get()):
    old_const.setdefault(qm, {})[const] = data
    old_mdata.setdefault(qm, {})[const] = {
        "timestamp": timestamp,
        "filepath": filepath,
        "h5path": h5path
    }
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown("The following pre-existing constants are used for comparison:"))
 for qm, consts in old_mdata.items():
    display(Markdown(f"- {qm}"))
    for const in consts:
        display(Markdown(f"    - {const} at {consts[const]['timestamp']}"))
    # saving locations of old constants for summary notebook
    with open(f"{out_folder}/module_metadata_{qm}.yml", "w") as fd:
        yaml.safe_dump(
            {
                "module": qm,
                "pdu": qm_dict[qm]["db_module"],
                "old-constants": old_mdata[qm]
            },
            fd,
        )
 ```

 %% Cell type:code id: tags:

 ``` python
 table = []
 gain_names = ['High', 'Medium', 'Low']
 bits = [BadPixels.NOISE_OUT_OF_THRESHOLD, BadPixels.OFFSET_OUT_OF_THRESHOLD, BadPixels.OFFSET_NOISE_EVAL_ERROR, BadPixels.GAIN_THRESHOLDING_ERROR]
 for qm in badpix_g.keys():
    for gain in range(3):
        l_data = []
        l_data_old = []

        data = np.copy(badpix_g[qm][:,:,:,gain])
        datau32 = data.astype(np.uint32)
        l_data.append(len(datau32[datau32>0].flatten()))
        for bit in bits:
            l_data.append(np.count_nonzero(badpix_g[qm][:,:,:,gain] & bit))

        if old_const[qm]['BadPixelsDark'] is not None:
            dataold = np.copy(old_const[qm]['BadPixelsDark'][:, :, :, gain])
            datau32old = dataold.astype(np.uint32)
            l_data_old.append(len(datau32old[datau32old>0].flatten()))
            for bit in bits:
                l_data_old.append(np.count_nonzero(old_const[qm]['BadPixelsDark'][:, :, :, gain] & bit))

        l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',
                       'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR', 'GAIN_THRESHOLDING_ERROR']

        l_threshold = ['', f'{thresholds_noise_sigma}' f'{thresholds_noise_hard[gain]}',
                       f'{thresholds_offset_sigma}' f'{thresholds_offset_hard[gain]}',
                       '', f'{thresholds_gain_sigma}']

        for i in range(len(l_data)):
            line = [f'{l_data_name[i]}, {gain_names[gain]} gain', l_threshold[i], l_data[i]]

            if old_const[qm]['BadPixelsDark'] is not None:
                line += [l_data_old[i]]
            else:
                line += ['-']

            table.append(line)
        table.append(['', '', '', ''])

 display(Markdown('''
 ### Number of bad pixels

 One pixel can be bad for different reasons, therefore, the sum of all types of bad pixels can be more than the number of all bad pixels.

 '''))
 if len(table)>0:
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                         headers=["Pixel type", "Threshold",
                                                  "New constant", "Old constant"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 if fixed_gain_mode:
    constants = ['Offset', 'Noise']
 else:
    constants = ['Offset', 'Noise', 'ThresholdsDark']

 constants_x_qms = list(itertools.product(constants, res.keys()))


 def compute_table(const, qm):
    if const == 'ThresholdsDark':
        table = [['','HG-MG threshold', 'HG-MG threshold', 'MG-LG threshold', 'MG-LG threshold']]
    else:
        table = [['','High gain', 'High gain', 'Medium gain', 'Medium gain', 'Low gain', 'Low gain']]

    compare_with_old_constant = old_const[qm][const] is not None and \
        old_const[qm]['BadPixelsDark'] is not None

    data = np.copy(res[qm][const])

    if const == 'ThresholdsDark':
        data[...,0][res[qm]['BadPixelsDark'][...,0]>0] = np.nan
        data[...,1][res[qm]['BadPixelsDark'][...,1]>0] = np.nan
    else:
        data[res[qm]['BadPixelsDark']>0] = np.nan

    if compare_with_old_constant:
        data_old = np.copy(old_const[qm][const])
        if const == 'ThresholdsDark':
            data_old[...,0][old_const[qm]['BadPixelsDark'][...,0]>0] = np.nan
            data_old[...,1][old_const[qm]['BadPixelsDark'][...,1]>0] = np.nan
        else:
            data_old[old_const[qm]['BadPixelsDark']>0] = np.nan

    f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
    n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

    def compute_row(i):
        line = [n_list[i]]
        for gain in range(3):
            # Compare only 3 threshold gain-maps
            if gain == 2 and const == 'ThresholdsDark':
                continue
            stat_measure = f_list[i](data[...,gain])
            line.append(f"{stat_measure:6.1f}")
            if compare_with_old_constant:
                old_stat_measure = f_list[i](data_old[...,gain])
                line.append(f"{old_stat_measure:6.1f}")
            else:
                line.append("-")
        return line


    with multiprocessing.pool.ThreadPool(processes=multiprocessing.cpu_count() // len(constants_x_qms)) as pool:
        rows = pool.map(compute_row, range(len(f_list)))

    table.extend(rows)

    return table


 with multiprocessing.Pool(processes=len(constants_x_qms)) as pool:
    tables = pool.starmap(compute_table, constants_x_qms)

 for (const, qm), table in zip(constants_x_qms, tables):
    display(Markdown(f"### {qm}: {const} [ADU], good pixels only"))
    display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
 ```

--- a/notebooks/Gotthard2/Correction_Gotthard2_NBC.ipynb
+++ b/notebooks/Gotthard2/Correction_Gotthard2_NBC.ipynb
 %% Cell type:markdown id:bed7bd15-21d9-4735-82c1-c27c1a5e3346 tags:

 # Gotthard2 Offline Correction #

 Author: European XFEL Detector Group, Version: 1.0

 Offline Calibration for the Gothard2 Detector

 %% Cell type:code id:570322ed-f611-4fd1-b2ec-c12c13d55843 tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/FXE/202221/p003225/raw"  # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/gotthard2"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run = 50  # run to process, required
 sequences = [-1]  # sequences to correct, set to [-1] for all, range allowed
 sequences_per_node = 1  # number of sequence files per node if notebook executed through xfel-calibrate, set to 0 to not run SLURM parallel

 # Parameters used to access raw data.
 karabo_id = "FXE_XAD_G2XES"  # karabo prefix of Gotthard-II devices
 karabo_da = ["GH201"]  # data aggregators
 receiver_template = "RECEIVER"  # receiver template used to read INSTRUMENT keys.
 control_template = "CONTROL"  # control template used to read CONTROL keys.
 instrument_source_template = "{}/DET/{}:daqOutput"  # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = "{}/DET/{}"  # template for control source name (filled with karabo_id_control)
 karabo_id_control = ""  # Control karabo ID. Set to empty string to use the karabo-id

 # Parameters for calibration database.
 cal_db_interface = "tcp://max-exfl016:8016#8025"  # the database interface to use.
 cal_db_timeout = 180000  # timeout on caldb requests.
 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"

 # Parameters affecting corrected data.
 constants_file = ""  # Use constants in given constant file path. /gpfs/exfel/data/scratch/ahmedk/dont_remove/gotthard2/constants/calibration_constants_GH2.h5
 offset_correction = True  # apply offset correction. This can be disabled to only apply LUT or apply LUT and gain correction for non-linear differential results.
 gain_correction = True  # apply gain correction.
 chunks_data = 1  # HDF chunk size for pixel data in number of frames.

 # Parameter conditions.
 bias_voltage = -1  # Detector bias voltage, set to -1 to use value in raw file.
 exposure_time = -1.  # Detector exposure time, set to -1 to use value in raw file.
 exposure_period = -1.  # Detector exposure period, set to -1 to use value in raw file.
 acquisition_rate = -1.  # Detector acquisition rate (1.1/4.5), set to -1 to use value in raw file.
 single_photon = -1  # Detector single photon mode (High/Low CDS), set to -1 to use value in raw file.

 # Parameters for plotting
 skip_plots = False  # exit after writing corrected files
 pulse_idx_preview = 3  # pulse index to preview. The following even/odd pulse index is used for preview. # TODO: update to pulseId preview.


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id:6e9730d8-3908-41d7-abe2-d78e046d5de2 tags:

 ``` python
 import warnings
 from logging import warning

 import h5py
 import pasha as psh
 import numpy as np
 import matplotlib.pyplot as plt
 from IPython.display import Markdown, display
 from extra_data import RunDirectory, H5File
 from pathlib import Path

 import cal_tools.restful_config as rest_cfg
 from cal_tools.calcat_interface import CalCatError, GOTTHARD2_CalibrationData
 from cal_tools.files import DataFile
 from cal_tools.gotthard2 import gotthard2algs, gotthard2lib
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    calcat_creation_time,
    CalibrationMetadata,
 )
 from XFELDetAna.plotting.heatmap import heatmapPlot

 warnings.filterwarnings('ignore')

 %matplotlib inline
 ```

 %% Cell type:code id:d7c02c48-4429-42ea-a42e-de45366d7fa3 tags:

 ``` python
 in_folder = Path(in_folder)
 run_folder = in_folder / f"r{run:04d}"
 out_folder = Path(out_folder)
 out_folder.mkdir(parents=True, exist_ok=True)

 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # NOTE: this notebook will not overwrite calibration metadata file
 const_yaml = metadata.get("retrieved-constants", {})

 if not karabo_id_control:
    karabo_id_control = karabo_id

 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 ctrl_src = ctrl_source_template.format(karabo_id_control, control_template)

 print(f"Process modules: {karabo_da} for run {run}")

 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f"Creation time: {creation_time}")
 ```

 %% Cell type:code id:b5eb816e-b5f2-44ce-9907-0273d82341b6 tags:

 ``` python
 # Select only sequence files to process for the selected detector.
 if sequences == [-1]:
    possible_patterns = list(f"*{mod}*.h5" for mod in karabo_da)
 else:
    possible_patterns = list(
        f"*{mod}-S{s:05d}.h5" for mod in karabo_da for s in sequences
    )

 run_folder = Path(in_folder / f"r{run:04d}")
 seq_files = [
    f for f in run_folder.glob("*.h5") if any(f.match(p) for p in possible_patterns)
 ]

 seq_files = sorted(seq_files)

 if not seq_files:
    raise IndexError("No sequence files available for the selected sequences.")

 print(f"Processing a total of {len(seq_files)} sequence files")
 ```

 %% Cell type:code id:f9a8d1eb-ce6a-4ed0-abf4-4a6029734672 tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id:892172d8 tags:

 ``` python
 # Read slow data
 run_dc = RunDirectory(run_folder)
 g2ctrl = gotthard2lib.Gotthard2Ctrl(run_dc=run_dc, ctrl_src=ctrl_src)

 if bias_voltage == -1:
    bias_voltage = g2ctrl.get_bias_voltage()
 if exposure_time == -1:
    exposure_time = g2ctrl.get_exposure_time()
 if exposure_period == -1:
    exposure_period = g2ctrl.get_exposure_period()
 if acquisition_rate == -1:
    acquisition_rate = g2ctrl.get_acquisition_rate()
 if single_photon == -1:
    single_photon = g2ctrl.get_single_photon()

 print("Bias Voltage:", bias_voltage)
 print("Exposure Time:", exposure_time)
 print("Exposure Period:", exposure_period)
 print("Acquisition Rate:", acquisition_rate)
 print("Single Photon:", single_photon)
 ```

 %% Cell type:markdown id:8c852392-bb19-4c40-b2ce-3b787538a92d tags:

 ### Retrieving calibration constants

 %% Cell type:code id:5717d722 tags:

 ``` python
 da_to_pdu = {}
 # Used for old FXE (p003225) runs before adding Gotthard2 to CALCAT
 const_data = dict()

 g2_cal = GOTTHARD2_CalibrationData(
    detector_name=karabo_id,
    sensor_bias_voltage=bias_voltage,
    exposure_time=exposure_time,
    exposure_period=exposure_period,
    acquisition_rate=acquisition_rate,
    single_photon=single_photon,
    event_at=creation_time,
    client=rest_cfg.calibration_client(),
 )
 # Keep as long as it is essential to correct
 # RAW data (FXE p003225) before the data mapping was added to CALCAT.
 try:  # in case local constants are used with old RAW data. This can be removed in the future.
    for mod_info in g2_cal.physical_detector_units.values():
        da_to_pdu[mod_info["karabo_da"]] = mod_info["physical_name"]
    db_modules = [da_to_pdu[da] for da in karabo_da]
 except CalCatError as e:
    print(e)
    db_modules = [None] * len(karabo_da)

 if constants_file:
    for mod in karabo_da:
        const_data[mod] = dict()
        # load constants temporarily using defined local paths.
        with h5py.File(constants_file, "r") as cfile:
            const_data[mod]["LUTGotthard2"] = cfile["LUT"][()]
            const_data[mod]["OffsetGotthard2"] = cfile["offset_map"][()].astype(np.float32)
            const_data[mod]["RelativeGainGotthard2"] = cfile["gain_map"][()].astype(np.float32)
            const_data[mod]["Mask"] = cfile["bpix_ff"][()].astype(np.uint32)
 else:
    if const_yaml:
        const_data = dict()
        for mod in karabo_da:
            const_data[mod] = dict()
            for cname, mdata in const_yaml[mod]["constants"].items():
                const_data[mod][cname] = dict()
                if mdata["creation-time"]:
                    with h5py.File(mdata["path"], "r") as cf:
                        const_data[mod][cname] = np.copy(
                            cf[f"{mdata['dataset']}/data"])
    else:
        mdata_dict = {"constants": dict()}

        constant_names = ["LUTGotthard2", "OffsetGotthard2", "BadPixelsDarkGotthard2"]
        if gain_correction:
            constant_names += ["RelativeGainGotthard2", "BadPixelsFFGotthard2"]

        # Retrieve metadata for all pnccd constants.
        const_data = g2_cal.ndarray_map(constant_names)

    # Validate the constants availability and raise/warn correspondingly.
    for mod, calibrations in const_data.items():

        dark_constants = {"LUTGotthard2"}
        if offset_correction:
            dark_constants |= {"OffsetGotthard2", "BadPixelsDarkGotthard2"}

        missing_dark_constants = dark_constants - set(calibrations)
        if missing_dark_constants:
            karabo_da.remove(mod)
            warning(f"Dark constants {missing_dark_constants} are not available to correct {mod}.")  # noqa

        missing_gain_constants = {
            "BadPixelsFFGotthard2", "RelativeGainGotthard2"} - set(calibrations)
        if gain_correction and missing_gain_constants:
-            warning(f"Gain constants {missing_gain_constants} are not retrieved for mod {mod}."
-                    "Gain correction is disabled for this module")
+            warning(f"Gain constants {missing_gain_constants} are not retrieved for mod {mod}.")

        # Create the mask array.
        bpix = const_data[mod].get("BadPixelsDarkGotthard2")
        if bpix is None:
            bpix = np.zeros((1280, 2, 3), dtype=np.uint32)
        if const_data[mod].get("BadPixelsFFGotthard2") is not None:
            bpix |= const_data[mod]["BadPixelsFFGotthard2"]
        const_data[mod]["Mask"] = bpix

        # Prepare empty arrays for missing constants.
        if const_data[mod].get("OffsetGotthard2") is None:
            const_data[mod]["OffsetGotthard2"] = np.zeros(
                (1280, 2, 3), dtype=np.float32)

        if const_data[mod].get("RelativeGainGotthard2") is None:
            const_data[mod]["RelativeGainGotthard2"] = np.ones(
                (1280, 2, 3), dtype=np.float32)
+            if gain_correction:
+                gain_correction = False
+                warning("Gain correction is disabled for this module.")
        const_data[mod]["RelativeGainGotthard2"] = const_data[mod]["RelativeGainGotthard2"].astype(  # noqa
            np.float32, copy=False)  # Old gain constants are not float32.

 if not karabo_da:
    raise ValueError("Dark constants are not available for all modules.")
 ```

 %% Cell type:code id:23fcf7f4-351a-4df7-8829-d8497d94fecc tags:

 ``` python
 context = psh.ProcessContext(num_workers=23)
 ```

 %% Cell type:code id:daecd662-26d2-4cb8-aa70-383a579cf9f9 tags:

 ``` python
 def correct_train(wid, index, d):
    g = gain[index]
    gotthard2algs.convert_to_10bit(d, const_data[mod]["LUTGotthard2"], data_corr[index, ...])
    gotthard2algs.correct_train(
        data_corr[index, ...],
        mask[index, ...],
        g,
        const_data[mod]["OffsetGotthard2"],
        const_data[mod]["RelativeGainGotthard2"],
        const_data[mod]["Mask"],
        apply_offset=offset_correction,
        apply_gain=gain_correction,
    )
 ```

 %% Cell type:code id:f88c1aa6-a735-4b72-adce-b30162f5daea tags:

 ``` python
 for mod in karabo_da:
    # This is used in case receiver template consists of
    # karabo data aggregator index. e.g. detector at DETLAB
    instr_mod_src = instrument_src.format(mod[-2:])
    data_path = "INSTRUMENT/" + instr_mod_src + "/data"
    for raw_file in seq_files:
        step_timer.start()

        dc = H5File(raw_file)
        out_file = out_folder / raw_file.name.replace("RAW", "CORR")

        # Select module INSTRUMENT source and deselect empty trains.
        dc = dc.select(instr_mod_src, require_all=True)
        data = dc[instr_mod_src, "data.adc"].ndarray()
        gain = dc[instr_mod_src, "data.gain"].ndarray()
        step_timer.done_step("preparing raw data")
        dshape = data.shape

        step_timer.start()

        # Allocate shared arrays.
        data_corr = context.alloc(shape=dshape, dtype=np.float32)
        mask = context.alloc(shape=dshape, dtype=np.uint32)
        context.map(correct_train, data)
        step_timer.done_step("Correcting one sequence file")

        step_timer.start()

        # Provided PSI gain map has 0 values. Set inf values to nan.
        # TODO: This can maybe be removed after creating XFEL gain maps.?
        data_corr[np.isinf(data_corr)] = np.nan

        # Create CORR files and add corrected data sections.
        image_counts = dc[instrument_src, "data.adc"].data_counts(labelled=False)

        with DataFile(out_file, "w") as ofile:
            # Create INDEX datasets.
            ofile.create_index(dc.train_ids, from_file=dc.files[0])
            # Create METDATA datasets
            ofile.create_metadata(
                like=dc,
                sequence=dc.run_metadata()["sequenceNumber"],
                instrument_channels=(f"{instrument_src}/data",)
            )

            # Create Instrument section to later add corrected datasets.
            outp_source = ofile.create_instrument_source(instrument_src)

            # Create count/first datasets at INDEX source.
            outp_source.create_index(data=image_counts)

            # Store uncorrected trainId in the corrected file.
            outp_source.create_key(
                    f"data.trainId", data=dc.train_ids,
                    chunks=min(50, len(dc.train_ids))
                )

            # Create datasets with the available corrected data
            for field_name, field_data in {
                "adc": data_corr,
                "gain": gain,
            }.items():
                outp_source.create_key(
                    f"data.{field_name}", data=field_data,
                    chunks=((chunks_data,) + data_corr.shape[1:])
            )

            for field in ["bunchId", "memoryCell", "frameNumber", "timestamp"]:
                outp_source.create_key(
                    f"data.{field}", data=dc[instr_mod_src, f"data.{field}"].ndarray(),
                    chunks=(chunks_data, data_corr.shape[1])
            )
            outp_source.create_compressed_key(f"data.mask", data=mask)

        step_timer.done_step("Storing data")
 ```

 %% Cell type:code id:94b8e4d2-9f8c-4c23-a509-39238dd8435c tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id:0ccc7f7e-2a3f-4ac0-b854-7d505410d2fd tags:

 ``` python
 if skip_plots:
    print("Skipping plots")
    import sys

    sys.exit(0)
 ```

 %% Cell type:code id:ff203f77-3811-46f3-bf7d-226d2dcab13f tags:

 ``` python
 mod_dcs = {}
 first_seq_raw = seq_files[0]
 first_seq_corr = out_folder / first_seq_raw.name.replace("RAW", "CORR")
 for mod in karabo_da:
    mod_dcs[mod] = {}
    with H5File(first_seq_corr) as out_dc:
        tid, mod_dcs[mod]["train_corr_data"] = next(
            out_dc[instr_mod_src, "data.adc"].trains()
        )
    with H5File(first_seq_raw) as in_dc:
        train_dict = in_dc.train_from_id(tid)[1][instr_mod_src]
        mod_dcs[mod]["train_raw_data"] = train_dict["data.adc"]
        mod_dcs[mod]["train_raw_gain"] = train_dict["data.gain"]
 ```

 %% Cell type:code id:1b379438-eb1d-42b2-ac83-eb8cf88c46db tags:

 ``` python
 display(Markdown("### Mean RAW and CORRECTED across pulses for one train:"))
 display(Markdown(f"Train: {tid}"))

 step_timer.start()
 for mod, pdu in zip(karabo_da, db_modules):

    fig, ax = plt.subplots(figsize=(20, 10))
    raw_data = mod_dcs[mod]["train_raw_data"]
    im = ax.plot(np.mean(raw_data, axis=0))
    ax.set_title(f"RAW module {mod} ({pdu})")
    ax.set_xlabel("Strip #", size=20)
    ax.set_ylabel("12-bit ADC output", size=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    pass

    fig, ax = plt.subplots(figsize=(20, 10))
    corr_data = mod_dcs[mod]["train_corr_data"]
    im = ax.plot(np.mean(corr_data, axis=0))
    ax.set_title(f"CORRECTED module {mod} ({pdu})")
    ax.set_xlabel("Strip #", size=20)
    ax.set_ylabel("10-bit KeV. output", size=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    pass
 step_timer.done_step("Plotting mean data")
 ```

 %% Cell type:code id:58a6a276 tags:

 ``` python
 display(Markdown(f"### RAW and CORRECTED strips across pulses for train {tid}"))

 step_timer.start()
 for mod, pdu in zip(karabo_da, db_modules):
    for plt_data, dname in zip(
        ["train_raw_data", "train_corr_data"], ["RAW", "CORRECTED"]
    ):
        fig, ax = plt.subplots(figsize=(15, 20))
        plt.rcParams.update({"font.size": 20})

        heatmapPlot(
            mod_dcs[mod][plt_data],
            y_label="Pulses",
            x_label="Strips",
            title=f"{dname} module {mod} ({pdu})",
            use_axis=ax,
        )
        pass
 step_timer.done_step("Plotting RAW and CORRECTED data for one train")
 ```

 %% Cell type:code id:cd8f5e08-fcee-4bff-ba63-6452b3d892a2 tags:

 ``` python
 # Validate given "pulse_idx_preview"

 if pulse_idx_preview + 1 > data.shape[1]:
    print(
        f"WARNING: selected pulse_idx_preview {pulse_idx_preview} is not available in data."
        " Previewing 1st pulse."
    )
    pulse_idx_preview = 1

 if data.shape[1] == 1:
    odd_pulse = 1
    even_pulse = None
 else:
    odd_pulse = pulse_idx_preview if pulse_idx_preview % 2 else pulse_idx_preview + 1
    even_pulse = (
        pulse_idx_preview if not (pulse_idx_preview % 2) else pulse_idx_preview + 1
    )

 if pulse_idx_preview + 1 > data.shape[1]:
    pulse_idx_preview = 1
    if data.shape[1] > 1:
        pulse_idx_preview = 2
 ```

 %% Cell type:code id:e5f0d4d8-e32c-4f2c-8469-4ebbfd3f644c tags:

 ``` python
 display(Markdown("### RAW and CORRECTED even/odd pulses for one train:"))
 display(Markdown(f"Train: {tid}"))
 for mod, pdu in zip(karabo_da, db_modules):
    fig, ax = plt.subplots(figsize=(20, 20))
    raw_data = mod_dcs[mod]["train_raw_data"]
    corr_data = mod_dcs[mod]["train_corr_data"]

    ax.plot(raw_data[odd_pulse], label=f"Odd Pulse {odd_pulse}")
    if even_pulse:
        ax.plot(raw_data[even_pulse], label=f"Even Pulse {even_pulse}")

    ax.set_title(f"RAW module {mod} ({pdu})")
    ax.set_xlabel("Strip #", size=20)
    ax.set_ylabel("12-bit ADC RAW", size=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    ax.legend()
    pass

    fig, ax = plt.subplots(figsize=(20, 20))
    ax.plot(corr_data[odd_pulse], label=f"Odd Pulse {odd_pulse}")
    if even_pulse:
        ax.plot(corr_data[even_pulse], label=f"Even Pulse {even_pulse}")
    ax.set_title(f"CORRECTED module {mod} ({pdu})")
    ax.set_xlabel("Strip #", size=20)
    ax.set_ylabel("10-bit KeV CORRECTED", size=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    ax.legend()
    pass
 step_timer.done_step("Plotting RAW and CORRECTED odd/even pulses.")
 ```

 %% Cell type:markdown id:bed7bd15-21d9-4735-82c1-c27c1a5e3346 tags:

 # Gotthard2 Offline Correction #

 Author: European XFEL Detector Group, Version: 1.0

 Offline Calibration for the Gothard2 Detector

 %% Cell type:code id:570322ed-f611-4fd1-b2ec-c12c13d55843 tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/FXE/202221/p003225/raw"  # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/gotthard2"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run = 50  # run to process, required
 sequences = [-1]  # sequences to correct, set to [-1] for all, range allowed
 sequences_per_node = 1  # number of sequence files per node if notebook executed through xfel-calibrate, set to 0 to not run SLURM parallel

 # Parameters used to access raw data.
 karabo_id = "FXE_XAD_G2XES"  # karabo prefix of Gotthard-II devices
 karabo_da = ["GH201"]  # data aggregators
 receiver_template = "RECEIVER"  # receiver template used to read INSTRUMENT keys.
 control_template = "CONTROL"  # control template used to read CONTROL keys.
 instrument_source_template = "{}/DET/{}:daqOutput"  # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = "{}/DET/{}"  # template for control source name (filled with karabo_id_control)
 karabo_id_control = ""  # Control karabo ID. Set to empty string to use the karabo-id

 # Parameters for calibration database.
 cal_db_interface = "tcp://max-exfl016:8016#8025"  # the database interface to use.
 cal_db_timeout = 180000  # timeout on caldb requests.
 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"

 # Parameters affecting corrected data.
 constants_file = ""  # Use constants in given constant file path. /gpfs/exfel/data/scratch/ahmedk/dont_remove/gotthard2/constants/calibration_constants_GH2.h5
 offset_correction = True  # apply offset correction. This can be disabled to only apply LUT or apply LUT and gain correction for non-linear differential results.
 gain_correction = True  # apply gain correction.
 chunks_data = 1  # HDF chunk size for pixel data in number of frames.

 # Parameter conditions.
 bias_voltage = -1  # Detector bias voltage, set to -1 to use value in raw file.
 exposure_time = -1.  # Detector exposure time, set to -1 to use value in raw file.
 exposure_period = -1.  # Detector exposure period, set to -1 to use value in raw file.
 acquisition_rate = -1.  # Detector acquisition rate (1.1/4.5), set to -1 to use value in raw file.
 single_photon = -1  # Detector single photon mode (High/Low CDS), set to -1 to use value in raw file.

 # Parameters for plotting
 skip_plots = False  # exit after writing corrected files
 pulse_idx_preview = 3  # pulse index to preview. The following even/odd pulse index is used for preview. # TODO: update to pulseId preview.


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id:6e9730d8-3908-41d7-abe2-d78e046d5de2 tags:

 ``` python
 import warnings
 from logging import warning

 import h5py
 import pasha as psh
 import numpy as np
 import matplotlib.pyplot as plt
 from IPython.display import Markdown, display
 from extra_data import RunDirectory, H5File
 from pathlib import Path

 import cal_tools.restful_config as rest_cfg
 from cal_tools.calcat_interface import CalCatError, GOTTHARD2_CalibrationData
 from cal_tools.files import DataFile
 from cal_tools.gotthard2 import gotthard2algs, gotthard2lib
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    calcat_creation_time,
    CalibrationMetadata,
 )
 from XFELDetAna.plotting.heatmap import heatmapPlot

 warnings.filterwarnings('ignore')

 %matplotlib inline
 ```

 %% Cell type:code id:d7c02c48-4429-42ea-a42e-de45366d7fa3 tags:

 ``` python
 in_folder = Path(in_folder)
 run_folder = in_folder / f"r{run:04d}"
 out_folder = Path(out_folder)
 out_folder.mkdir(parents=True, exist_ok=True)

 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # NOTE: this notebook will not overwrite calibration metadata file
 const_yaml = metadata.get("retrieved-constants", {})

 if not karabo_id_control:
    karabo_id_control = karabo_id

 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 ctrl_src = ctrl_source_template.format(karabo_id_control, control_template)

 print(f"Process modules: {karabo_da} for run {run}")

 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f"Creation time: {creation_time}")
 ```

 %% Cell type:code id:b5eb816e-b5f2-44ce-9907-0273d82341b6 tags:

 ``` python
 # Select only sequence files to process for the selected detector.
 if sequences == [-1]:
    possible_patterns = list(f"*{mod}*.h5" for mod in karabo_da)
 else:
    possible_patterns = list(
        f"*{mod}-S{s:05d}.h5" for mod in karabo_da for s in sequences
    )

 run_folder = Path(in_folder / f"r{run:04d}")
 seq_files = [
    f for f in run_folder.glob("*.h5") if any(f.match(p) for p in possible_patterns)
 ]

 seq_files = sorted(seq_files)

 if not seq_files:
    raise IndexError("No sequence files available for the selected sequences.")

 print(f"Processing a total of {len(seq_files)} sequence files")
 ```

 %% Cell type:code id:f9a8d1eb-ce6a-4ed0-abf4-4a6029734672 tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id:892172d8 tags:

 ``` python
 # Read slow data
 run_dc = RunDirectory(run_folder)
 g2ctrl = gotthard2lib.Gotthard2Ctrl(run_dc=run_dc, ctrl_src=ctrl_src)

 if bias_voltage == -1:
    bias_voltage = g2ctrl.get_bias_voltage()
 if exposure_time == -1:
    exposure_time = g2ctrl.get_exposure_time()
 if exposure_period == -1:
    exposure_period = g2ctrl.get_exposure_period()
 if acquisition_rate == -1:
    acquisition_rate = g2ctrl.get_acquisition_rate()
 if single_photon == -1:
    single_photon = g2ctrl.get_single_photon()

 print("Bias Voltage:", bias_voltage)
 print("Exposure Time:", exposure_time)
 print("Exposure Period:", exposure_period)
 print("Acquisition Rate:", acquisition_rate)
 print("Single Photon:", single_photon)
 ```

 %% Cell type:markdown id:8c852392-bb19-4c40-b2ce-3b787538a92d tags:

 ### Retrieving calibration constants

 %% Cell type:code id:5717d722 tags:

 ``` python
 da_to_pdu = {}
 # Used for old FXE (p003225) runs before adding Gotthard2 to CALCAT
 const_data = dict()

 g2_cal = GOTTHARD2_CalibrationData(
    detector_name=karabo_id,
    sensor_bias_voltage=bias_voltage,
    exposure_time=exposure_time,
    exposure_period=exposure_period,
    acquisition_rate=acquisition_rate,
    single_photon=single_photon,
    event_at=creation_time,
    client=rest_cfg.calibration_client(),
 )
 # Keep as long as it is essential to correct
 # RAW data (FXE p003225) before the data mapping was added to CALCAT.
 try:  # in case local constants are used with old RAW data. This can be removed in the future.
    for mod_info in g2_cal.physical_detector_units.values():
        da_to_pdu[mod_info["karabo_da"]] = mod_info["physical_name"]
    db_modules = [da_to_pdu[da] for da in karabo_da]
 except CalCatError as e:
    print(e)
    db_modules = [None] * len(karabo_da)

 if constants_file:
    for mod in karabo_da:
        const_data[mod] = dict()
        # load constants temporarily using defined local paths.
        with h5py.File(constants_file, "r") as cfile:
            const_data[mod]["LUTGotthard2"] = cfile["LUT"][()]
            const_data[mod]["OffsetGotthard2"] = cfile["offset_map"][()].astype(np.float32)
            const_data[mod]["RelativeGainGotthard2"] = cfile["gain_map"][()].astype(np.float32)
            const_data[mod]["Mask"] = cfile["bpix_ff"][()].astype(np.uint32)
 else:
    if const_yaml:
        const_data = dict()
        for mod in karabo_da:
            const_data[mod] = dict()
            for cname, mdata in const_yaml[mod]["constants"].items():
                const_data[mod][cname] = dict()
                if mdata["creation-time"]:
                    with h5py.File(mdata["path"], "r") as cf:
                        const_data[mod][cname] = np.copy(
                            cf[f"{mdata['dataset']}/data"])
    else:
        mdata_dict = {"constants": dict()}

        constant_names = ["LUTGotthard2", "OffsetGotthard2", "BadPixelsDarkGotthard2"]
        if gain_correction:
            constant_names += ["RelativeGainGotthard2", "BadPixelsFFGotthard2"]

        # Retrieve metadata for all pnccd constants.
        const_data = g2_cal.ndarray_map(constant_names)

    # Validate the constants availability and raise/warn correspondingly.
    for mod, calibrations in const_data.items():

        dark_constants = {"LUTGotthard2"}
        if offset_correction:
            dark_constants |= {"OffsetGotthard2", "BadPixelsDarkGotthard2"}

        missing_dark_constants = dark_constants - set(calibrations)
        if missing_dark_constants:
            karabo_da.remove(mod)
            warning(f"Dark constants {missing_dark_constants} are not available to correct {mod}.")  # noqa

        missing_gain_constants = {
            "BadPixelsFFGotthard2", "RelativeGainGotthard2"} - set(calibrations)
        if gain_correction and missing_gain_constants:
-            warning(f"Gain constants {missing_gain_constants} are not retrieved for mod {mod}."
-                    "Gain correction is disabled for this module")
+            warning(f"Gain constants {missing_gain_constants} are not retrieved for mod {mod}.")

        # Create the mask array.
        bpix = const_data[mod].get("BadPixelsDarkGotthard2")
        if bpix is None:
            bpix = np.zeros((1280, 2, 3), dtype=np.uint32)
        if const_data[mod].get("BadPixelsFFGotthard2") is not None:
            bpix |= const_data[mod]["BadPixelsFFGotthard2"]
        const_data[mod]["Mask"] = bpix

        # Prepare empty arrays for missing constants.
        if const_data[mod].get("OffsetGotthard2") is None:
            const_data[mod]["OffsetGotthard2"] = np.zeros(
                (1280, 2, 3), dtype=np.float32)

        if const_data[mod].get("RelativeGainGotthard2") is None:
            const_data[mod]["RelativeGainGotthard2"] = np.ones(
                (1280, 2, 3), dtype=np.float32)
+            if gain_correction:
+                gain_correction = False
+                warning("Gain correction is disabled for this module.")
        const_data[mod]["RelativeGainGotthard2"] = const_data[mod]["RelativeGainGotthard2"].astype(  # noqa
            np.float32, copy=False)  # Old gain constants are not float32.

 if not karabo_da:
    raise ValueError("Dark constants are not available for all modules.")
 ```

 %% Cell type:code id:23fcf7f4-351a-4df7-8829-d8497d94fecc tags:

 ``` python
 context = psh.ProcessContext(num_workers=23)
 ```

 %% Cell type:code id:daecd662-26d2-4cb8-aa70-383a579cf9f9 tags:

 ``` python
 def correct_train(wid, index, d):
    g = gain[index]
    gotthard2algs.convert_to_10bit(d, const_data[mod]["LUTGotthard2"], data_corr[index, ...])
    gotthard2algs.correct_train(
        data_corr[index, ...],
        mask[index, ...],
        g,
        const_data[mod]["OffsetGotthard2"],
        const_data[mod]["RelativeGainGotthard2"],
        const_data[mod]["Mask"],
        apply_offset=offset_correction,
        apply_gain=gain_correction,
    )
 ```

 %% Cell type:code id:f88c1aa6-a735-4b72-adce-b30162f5daea tags:

 ``` python
 for mod in karabo_da:
    # This is used in case receiver template consists of
    # karabo data aggregator index. e.g. detector at DETLAB
    instr_mod_src = instrument_src.format(mod[-2:])
    data_path = "INSTRUMENT/" + instr_mod_src + "/data"
    for raw_file in seq_files:
        step_timer.start()

        dc = H5File(raw_file)
        out_file = out_folder / raw_file.name.replace("RAW", "CORR")

        # Select module INSTRUMENT source and deselect empty trains.
        dc = dc.select(instr_mod_src, require_all=True)
        data = dc[instr_mod_src, "data.adc"].ndarray()
        gain = dc[instr_mod_src, "data.gain"].ndarray()
        step_timer.done_step("preparing raw data")
        dshape = data.shape

        step_timer.start()

        # Allocate shared arrays.
        data_corr = context.alloc(shape=dshape, dtype=np.float32)
        mask = context.alloc(shape=dshape, dtype=np.uint32)
        context.map(correct_train, data)
        step_timer.done_step("Correcting one sequence file")

        step_timer.start()

        # Provided PSI gain map has 0 values. Set inf values to nan.
        # TODO: This can maybe be removed after creating XFEL gain maps.?
        data_corr[np.isinf(data_corr)] = np.nan

        # Create CORR files and add corrected data sections.
        image_counts = dc[instrument_src, "data.adc"].data_counts(labelled=False)

        with DataFile(out_file, "w") as ofile:
            # Create INDEX datasets.
            ofile.create_index(dc.train_ids, from_file=dc.files[0])
            # Create METDATA datasets
            ofile.create_metadata(
                like=dc,
                sequence=dc.run_metadata()["sequenceNumber"],
                instrument_channels=(f"{instrument_src}/data",)
            )

            # Create Instrument section to later add corrected datasets.
            outp_source = ofile.create_instrument_source(instrument_src)

            # Create count/first datasets at INDEX source.
            outp_source.create_index(data=image_counts)

            # Store uncorrected trainId in the corrected file.
            outp_source.create_key(
                    f"data.trainId", data=dc.train_ids,
                    chunks=min(50, len(dc.train_ids))
                )

            # Create datasets with the available corrected data
            for field_name, field_data in {
                "adc": data_corr,
                "gain": gain,
            }.items():
                outp_source.create_key(
                    f"data.{field_name}", data=field_data,
                    chunks=((chunks_data,) + data_corr.shape[1:])
            )

            for field in ["bunchId", "memoryCell", "frameNumber", "timestamp"]:
                outp_source.create_key(
                    f"data.{field}", data=dc[instr_mod_src, f"data.{field}"].ndarray(),
                    chunks=(chunks_data, data_corr.shape[1])
            )
            outp_source.create_compressed_key(f"data.mask", data=mask)

        step_timer.done_step("Storing data")
 ```

 %% Cell type:code id:94b8e4d2-9f8c-4c23-a509-39238dd8435c tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id:0ccc7f7e-2a3f-4ac0-b854-7d505410d2fd tags:

 ``` python
 if skip_plots:
    print("Skipping plots")
    import sys

    sys.exit(0)
 ```

 %% Cell type:code id:ff203f77-3811-46f3-bf7d-226d2dcab13f tags:

 ``` python
 mod_dcs = {}
 first_seq_raw = seq_files[0]
 first_seq_corr = out_folder / first_seq_raw.name.replace("RAW", "CORR")
 for mod in karabo_da:
    mod_dcs[mod] = {}
    with H5File(first_seq_corr) as out_dc:
        tid, mod_dcs[mod]["train_corr_data"] = next(
            out_dc[instr_mod_src, "data.adc"].trains()
        )
    with H5File(first_seq_raw) as in_dc:
        train_dict = in_dc.train_from_id(tid)[1][instr_mod_src]
        mod_dcs[mod]["train_raw_data"] = train_dict["data.adc"]
        mod_dcs[mod]["train_raw_gain"] = train_dict["data.gain"]
 ```

 %% Cell type:code id:1b379438-eb1d-42b2-ac83-eb8cf88c46db tags:

 ``` python
 display(Markdown("### Mean RAW and CORRECTED across pulses for one train:"))
 display(Markdown(f"Train: {tid}"))

 step_timer.start()
 for mod, pdu in zip(karabo_da, db_modules):

    fig, ax = plt.subplots(figsize=(20, 10))
    raw_data = mod_dcs[mod]["train_raw_data"]
    im = ax.plot(np.mean(raw_data, axis=0))
    ax.set_title(f"RAW module {mod} ({pdu})")
    ax.set_xlabel("Strip #", size=20)
    ax.set_ylabel("12-bit ADC output", size=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    pass

    fig, ax = plt.subplots(figsize=(20, 10))
    corr_data = mod_dcs[mod]["train_corr_data"]
    im = ax.plot(np.mean(corr_data, axis=0))
    ax.set_title(f"CORRECTED module {mod} ({pdu})")
    ax.set_xlabel("Strip #", size=20)
    ax.set_ylabel("10-bit KeV. output", size=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    pass
 step_timer.done_step("Plotting mean data")
 ```

 %% Cell type:code id:58a6a276 tags:

 ``` python
 display(Markdown(f"### RAW and CORRECTED strips across pulses for train {tid}"))

 step_timer.start()
 for mod, pdu in zip(karabo_da, db_modules):
    for plt_data, dname in zip(
        ["train_raw_data", "train_corr_data"], ["RAW", "CORRECTED"]
    ):
        fig, ax = plt.subplots(figsize=(15, 20))
        plt.rcParams.update({"font.size": 20})

        heatmapPlot(
            mod_dcs[mod][plt_data],
            y_label="Pulses",
            x_label="Strips",
            title=f"{dname} module {mod} ({pdu})",
            use_axis=ax,
        )
        pass
 step_timer.done_step("Plotting RAW and CORRECTED data for one train")
 ```

 %% Cell type:code id:cd8f5e08-fcee-4bff-ba63-6452b3d892a2 tags:

 ``` python
 # Validate given "pulse_idx_preview"

 if pulse_idx_preview + 1 > data.shape[1]:
    print(
        f"WARNING: selected pulse_idx_preview {pulse_idx_preview} is not available in data."
        " Previewing 1st pulse."
    )
    pulse_idx_preview = 1

 if data.shape[1] == 1:
    odd_pulse = 1
    even_pulse = None
 else:
    odd_pulse = pulse_idx_preview if pulse_idx_preview % 2 else pulse_idx_preview + 1
    even_pulse = (
        pulse_idx_preview if not (pulse_idx_preview % 2) else pulse_idx_preview + 1
    )

 if pulse_idx_preview + 1 > data.shape[1]:
    pulse_idx_preview = 1
    if data.shape[1] > 1:
        pulse_idx_preview = 2
 ```

 %% Cell type:code id:e5f0d4d8-e32c-4f2c-8469-4ebbfd3f644c tags:

 ``` python
 display(Markdown("### RAW and CORRECTED even/odd pulses for one train:"))
 display(Markdown(f"Train: {tid}"))
 for mod, pdu in zip(karabo_da, db_modules):
    fig, ax = plt.subplots(figsize=(20, 20))
    raw_data = mod_dcs[mod]["train_raw_data"]
    corr_data = mod_dcs[mod]["train_corr_data"]

    ax.plot(raw_data[odd_pulse], label=f"Odd Pulse {odd_pulse}")
    if even_pulse:
        ax.plot(raw_data[even_pulse], label=f"Even Pulse {even_pulse}")

    ax.set_title(f"RAW module {mod} ({pdu})")
    ax.set_xlabel("Strip #", size=20)
    ax.set_ylabel("12-bit ADC RAW", size=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    ax.legend()
    pass

    fig, ax = plt.subplots(figsize=(20, 20))
    ax.plot(corr_data[odd_pulse], label=f"Odd Pulse {odd_pulse}")
    if even_pulse:
        ax.plot(corr_data[even_pulse], label=f"Even Pulse {even_pulse}")
    ax.set_title(f"CORRECTED module {mod} ({pdu})")
    ax.set_xlabel("Strip #", size=20)
    ax.set_ylabel("10-bit KeV CORRECTED", size=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    ax.legend()
    pass
 step_timer.done_step("Plotting RAW and CORRECTED odd/even pulses.")
 ```

--- a/notebooks/LPD/LPDChar_Darks_NBC.ipynb
+++ b/notebooks/LPD/LPDChar_Darks_NBC.ipynb
--- a/notebooks/LPD/LPD_Correct_Fast.ipynb
+++ b/notebooks/LPD/LPD_Correct_Fast.ipynb
 %% Cell type:markdown id: tags:

 # LPD Offline Correction #

 Author: European XFEL Data Analysis Group

 %% Cell type:code id: tags:

 ``` python
 # Input parameters
 in_folder = "/gpfs/exfel/exp/FXE/202201/p003073/raw/"  # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/schmidtp/random/LPD_test"  # the folder to output to, required
 metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate.
 sequences = [-1]  # Sequences to correct, use [-1] for all
 modules = [-1]  # Modules indices to correct, use [-1] for all, only used when karabo_da is empty
 karabo_da = ['']  # Data aggregators names to correct, use [''] for all
 run = 10  # run to process, required

 # Source parameters
 karabo_id = 'FXE_DET_LPD1M-1'  # Karabo domain for detector.
 input_source = '{karabo_id}/DET/{module_index}CH0:xtdf'  # Input fast data source.
 output_source = ''  # Output fast data source, empty to use same as input.
+xgm_source = 'SA1_XTD2_XGM/DOOCS/MAIN'
+xgm_pulse_count_key = 'pulseEnergy.numberOfSa1BunchesActual'

 # CalCat parameters
 creation_time = ""  # The timestamp to use with Calibration DB. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41
 cal_db_interface = ''  # Not needed, compatibility with current webservice.
 cal_db_timeout = 0  # Not needed, compatbility with current webservice.
-cal_db_root = '/gpfs/exfel/d/cal/caldb_store'
+cal_db_root = '/gpfs/exfel/d/cal/caldb_store'  # The calibration database root path to access constant files. For example accessing constants from the test database.

 # Operating conditions
 mem_cells = 512  # Memory cells, LPD constants are always taken with 512 cells.
 bias_voltage = 250.0  # Detector bias voltage.
 capacitor = '5pF'  # Capacitor setting: 5pF or 50pF
 photon_energy = 9.2  # Photon energy in keV.
 category = 0  # Whom to blame.
-use_cell_order = False  # Whether to use memory cell order as a detector condition (not stored for older constants)
+use_cell_order = 'auto'  # Whether to use memory cell order as a detector condition; auto/always/never

 # Correction parameters
 offset_corr = True  # Offset correction.
 rel_gain = True  # Gain correction based on RelativeGain constant.
 ff_map = True  # Gain correction based on FFMap constant.
 gain_amp_map = True  # Gain correction based on GainAmpMap constant.

 # Output options
+ignore_no_frames_no_pulses = False  # Whether to run without SA1 pulses AND frames.
 overwrite = True  # set to True if existing data should be overwritten
 chunks_data = 1  # HDF chunk size for pixel data in number of frames.
 chunks_ids = 32  # HDF chunk size for cellId and pulseId datasets.
 create_virtual_cxi_in = ''  # Folder to create virtual CXI files in (for each sequence).

 # Parallelization options
 sequences_per_node = 1  # Sequence files to process per node
 max_nodes = 8  # Maximum number of SLURM jobs to split correction work into
 num_workers = 8  # Worker processes per node, 8 is safe on 768G nodes but won't work on 512G.
 num_threads_per_worker = 32  # Number of threads per worker.

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
 ```

 %% Cell type:code id: tags:

 ``` python
-from collections import OrderedDict
+from logging import warning
 from pathlib import Path
 from time import perf_counter
 import gc
 import re
-import warnings

 import numpy as np
 import h5py

 import matplotlib
 matplotlib.use('agg')
 import matplotlib.pyplot as plt
 %matplotlib inline

-from calibration_client import CalibrationClient
-from calibration_client.modules import CalibrationConstantVersion
 import extra_data as xd
 import extra_geom as xg
 import pasha as psh
-
 from extra_data.components import LPD1M

+import cal_tools.restful_config as rest_cfg
+from cal_tools.calcat_interface import CalCatError, LPD_CalibrationData
 from cal_tools.lpdalgs import correct_lpd_frames
-from cal_tools.lpdlib import get_mem_cell_order
-from cal_tools.tools import CalibrationMetadata, calcat_creation_time
+from cal_tools.lpdlib import get_mem_cell_pattern, make_cell_order_condition
+from cal_tools.tools import (
+    CalibrationMetadata,
+    calcat_creation_time,
+    write_constants_fragment,
+)
 from cal_tools.files import DataFile
-from cal_tools.restful_config import restful_config
 ```

 %% Cell type:markdown id: tags:

 # Prepare environment

 %% Cell type:code id: tags:

 ``` python
 file_re = re.compile(r'^RAW-R(\d{4})-(\w+\d+)-S(\d{5})$')  # This should probably move to cal_tools

 run_folder = Path(in_folder) / f'r{run:04d}'
 out_folder = Path(out_folder)
 out_folder.mkdir(exist_ok=True)

 output_source = output_source or input_source

-cal_db_root = Path(cal_db_root)
-
-metadata = CalibrationMetadata(metadata_folder or out_folder)
-
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f'Using {creation_time.isoformat()} as creation time')

 # Pick all modules/aggregators or those selected.
-if not karabo_da or karabo_da == ['']:
-    if not modules or modules == [-1]:
+if karabo_da == ['']:
+    if modules == [-1]:
        modules = list(range(16))
-
    karabo_da = [f'LPD{i:02d}' for i in modules]
+else:
+    modules = [int(x[-2:]) for x in karabo_da]

 # Pick all sequences or those selected.
 if not sequences or sequences == [-1]:
    do_sequence = lambda seq: True
 else:
    do_sequence = [int(x) for x in sequences].__contains__

 # List of detector sources.
 det_inp_sources = [input_source.format(karabo_id=karabo_id, module_index=int(da[-2:])) for da in karabo_da]
+
+if use_cell_order not in {'auto', 'always', 'never'}:
+    raise ValueError("use_cell_order must be auto/always/never")
 ```

 %% Cell type:markdown id: tags:

 # Select data to process

 %% Cell type:code id: tags:

 ``` python
 data_to_process = []

 for inp_path in run_folder.glob('RAW-*.h5'):
    match = file_re.match(inp_path.stem)

    if match[2] not in karabo_da or not do_sequence(int(match[3])):
        continue

    outp_path = out_folder / 'CORR-R{run:04d}-{aggregator}-S{seq:05d}.h5'.format(
        run=int(match[1]), aggregator=match[2], seq=int(match[3]))

    data_to_process.append((match[2], inp_path, outp_path))

 print('Files to process:')
 for data_descr in sorted(data_to_process, key=lambda x: f'{x[0]}{x[1]}'):
    print(f'{data_descr[0]}\t{data_descr[1]}')
+
+# Collect the train ID contained in the input LPD files.
+inp_lpd_dc = xd.DataCollection.from_paths([x[1] for x in data_to_process])
+
+frame_count = sum([
+    int(inp_lpd_dc[source, 'image.data'].data_counts(labelled=False).sum())
+    for source in inp_lpd_dc.all_sources], 0)
+
+if frame_count == 0:
+    inp_dc = xd.RunDirectory(run_folder) \
+        .select_trains(xd.by_id[inp_lpd_dc.train_ids])
+
+    try:
+        pulse_count = int(inp_dc[xgm_source, xgm_pulse_count_key].ndarray().sum())
+    except xd.SourceNameError:
+        warning(f'Missing XGM source `{xgm_source}`')
+        pulse_count = None
+    except xd.PropertyNameError:
+        warning(f'Missing XGM pulse count key `{xgm_pulse_count_key}`')
+        pulse_count = None
+
+    if pulse_count == 0 and not ignore_no_frames_no_pulses:
+        warning(f'Affected files contain neither LPD frames nor SA1 pulses '
+                f'according to {xgm_source}, processing is skipped. If this '
+                f'incorrect, please contact da-support@xfel.eu')
+        from sys import exit
+        exit(0)
+    elif pulse_count is None:
+        raise ValueError('Affected files contain no LPD frames and SA1 pulses '
+                         'could not be inferred from XGM data')
+    else:
+        raise ValueError('Affected files contain no LPD frames but SA1 pulses')
+
+else:
+    print(f'Total number of LPD pulses across all modules: {frame_count}')
 ```

 %% Cell type:markdown id: tags:

 # Obtain and prepare calibration constants

 %% Cell type:code id: tags:

 ``` python
-# Connect to CalCat.
-calcat_config = restful_config['calcat']
-client = CalibrationClient(
-    base_api_url=calcat_config['base-api-url'],
-    use_oauth2=calcat_config['use-oauth2'],
-    client_id=calcat_config['user-id'],
-    client_secret=calcat_config['user-secret'],
-    user_email=calcat_config['user-email'],
-    token_url=calcat_config['token-url'],
-    refresh_url=calcat_config['refresh-url'],
-    auth_url=calcat_config['auth-url'],
-    scope='')
-```
+start = perf_counter()

-%% Cell type:code id: tags:
+cell_ids_pattern_s = None
+if use_cell_order != 'never':
+    # Read the order of memory cells used
+    raw_data = xd.DataCollection.from_paths([e[1] for e in data_to_process])
+    cell_ids_pattern_s = make_cell_order_condition(
+        use_cell_order, get_mem_cell_pattern(raw_data, det_inp_sources)
+    )
+print("Memory cells order:", cell_ids_pattern_s)
+
+lpd_cal = LPD_CalibrationData(
+    detector_name=karabo_id,
+    modules=karabo_da,
+    sensor_bias_voltage=bias_voltage,
+    memory_cells=mem_cells,
+    feedback_capacitor=capacitor,
+    source_energy=photon_energy,
+    memory_cell_order=cell_ids_pattern_s,
+    category=category,
+    event_at=creation_time,
+    client=rest_cfg.calibration_client(),
+    caldb_root=Path(cal_db_root),
+)
+
+lpd_metadata = lpd_cal.metadata(["Offset", "BadPixelsDark"])
+try:
+    illum_metadata = lpd_cal.metadata(lpd_cal.illuminated_calibrations)
+    for key, value in illum_metadata.items():
+        lpd_metadata.setdefault(key, {}).update(value)
+except CalCatError as e:  # TODO: replace when API errors are improved.
+    warning(f"CalCatError: {e}")

-``` python
-metadata = CalibrationMetadata(metadata_folder or out_folder)
-# Constant paths & timestamps are saved under retrieved-constants in calibration_metadata.yml
-const_yaml = metadata.setdefault("retrieved-constants", {})
+total_time = perf_counter() - start
+print(f'Looking up constants {total_time:.1f}s')
 ```

 %% Cell type:code id: tags:

 ``` python
-const_data = {}
-const_load_mp = psh.ProcessContext(num_workers=24)
-
-if const_yaml:  # Read constants from YAML file.
-    start = perf_counter()
-    for da, ccvs in const_yaml.items():
+# Validate the constants availability and raise/warn accordingly.
+for mod, calibrations in lpd_metadata.items():
+    missing_offset = {"Offset"} - set(calibrations)
+    warn_missing_constants = {
+        "BadPixelsDark", "BadPixelsFF", "GainAmpMap",
+        "FFMap", "RelativeGain"} - set(calibrations)
+    if missing_offset:
+        warning(f"Offset constant is not available to correct {mod}.")
+        karabo_da.remove(mod)
+    if warn_missing_constants:
+        warning(f"Constants {warn_missing_constants} were not retrieved for {mod}.")
+if not karabo_da:  # Offsets are missing for all modules.
+    raise Exception("Could not find offset constants for any modules, will not correct data.")

-        for calibration_name, ccv in ccvs['constants'].items():
-            if ccv['file-path'] is None:
-                warnings.warn(f"Missing {calibration_name} for {da}")
-                continue
-
-            dtype = np.uint32 if calibration_name.startswith('BadPixels') else np.float32
-
-            const_data[(da, calibration_name)] = dict(
-                path=Path(ccv['file-path']),
-                dataset=ccv['dataset-name'],
-                data=const_load_mp.alloc(shape=(256, 256, mem_cells, 3), dtype=dtype)
-            )
-else:  # Retrieve constants from CALCAT.
-    dark_calibrations = {
-        1: 'Offset',  # np.float32
-        14: 'BadPixelsDark'  # should be np.uint32, but is np.float64
-    }
-
-    base_condition = [
-        dict(parameter_id=1, value=bias_voltage),  # Sensor bias voltage
-        dict(parameter_id=7, value=mem_cells),  # Memory cells
-        dict(parameter_id=15, value=capacitor),  # Feedback capacitor
-        dict(parameter_id=13, value=256),  # Pixels X
-        dict(parameter_id=14, value=256),  # Pixels Y
-    ]
-    if use_cell_order:
-        # Read the order of memory cells used
-        raw_data = xd.DataCollection.from_paths([e[1] for e in data_to_process])
-        cell_ids_pattern_s = get_mem_cell_order(raw_data, det_inp_sources)
-        print("Memory cells order:", cell_ids_pattern_s)
-
-        dark_condition = base_condition + [
-            dict(parameter_id=30, value=cell_ids_pattern_s),  # Memory cell order
-        ]
-    else:
-        dark_condition = base_condition.copy()
-
-    illuminated_calibrations = {
-        20: 'BadPixelsFF',  # np.uint32
-        42: 'GainAmpMap',  # np.float32
-        43: 'FFMap',  # np.float32
-        44: 'RelativeGain'  # np.float32
-    }
-
-    illuminated_condition = base_condition + [
-        dict(parameter_id=3, value=photon_energy),  # Source energy
-        dict(parameter_id=25, value=category)  # category
-    ]
-
-    print('Querying calibration database', end='', flush=True)
-    start = perf_counter()
-    for calibrations, condition in [
-        (dark_calibrations, dark_condition),
-        (illuminated_calibrations, illuminated_condition)
-    ]:
-        resp = CalibrationConstantVersion.get_closest_by_time_by_detector_conditions(
-            client, karabo_id, list(calibrations.keys()),
-            {'parameters_conditions_attributes': condition},
-            karabo_da='', event_at=creation_time.isoformat()
-        )
-
-        if not resp['success']:
-            raise RuntimeError(resp)
-
-        for ccv in resp['data']:
-            cc = ccv['calibration_constant']
-            da = ccv['physical_detector_unit']['karabo_da']
-            calibration_name = calibrations[cc['calibration_id']]
-
-            dtype = np.uint32 if calibration_name.startswith('BadPixels') else np.float32
-
-            const_data[(da, calibration_name)] = dict(
-                path=Path(ccv['path_to_file']) / ccv['file_name'],
-                dataset=ccv['data_set_name'],
-                data=const_load_mp.alloc(shape=(256, 256, mem_cells, 3), dtype=dtype)
-            )
-        print('.', end='', flush=True)
-
-total_time = perf_counter() - start
-print(f'{total_time:.1f}s')
+# Remove skipped correction modules from data_to_process
+data_to_process = [(mod, in_f, out_f) for mod, in_f, out_f in data_to_process if mod in karabo_da]
 ```

 %% Cell type:code id: tags:

 ``` python
-def load_constant_dataset(wid, index, const_descr):
-    ccv_entry = const_data[const_descr]
+# write constants metadata to fragment YAML
+write_constants_fragment(
+    out_folder=(metadata_folder or out_folder),
+    det_metadata=lpd_metadata,
+    caldb_root=lpd_cal.caldb_root,
+)

-    with h5py.File(cal_db_root / ccv_entry['path'], 'r') as fp:
-        fp[ccv_entry['dataset'] + '/data'].read_direct(ccv_entry['data'])
-
-    print('.', end='', flush=True)
-
-print('Loading calibration data', end='', flush=True)
-start = perf_counter()
-const_load_mp.map(load_constant_dataset, list(const_data.keys()))
-total_time = perf_counter() - start
-
-print(f'{total_time:.1f}s')
+# Load constants data for all constants
+const_data = lpd_cal.ndarray_map(metadata=lpd_metadata)
 ```

 %% Cell type:code id: tags:

 ``` python
 # These are intended in order cell, X, Y, gain
 ccv_offsets = {}
 ccv_gains = {}
 ccv_masks = {}

 ccv_shape = (mem_cells, 256, 256, 3)

 constant_order = {
    'Offset':        (2, 1, 0, 3),
    'BadPixelsDark': (2, 1, 0, 3),
-    'RelativeGain':  (2, 1, 0, 3),
+    'RelativeGain':  (2, 0, 1, 3),
    'FFMap':         (2, 0, 1, 3),
    'BadPixelsFF':   (2, 0, 1, 3),
    'GainAmpMap':    (2, 0, 1, 3),
 }

 def prepare_constants(wid, index, aggregator):
-    consts = {calibration_name: entry['data']
-              for (aggregator_, calibration_name), entry
-              in const_data.items()
-              if aggregator == aggregator_}
-
+    consts = const_data.get(aggregator, {})
    def _prepare_data(calibration_name, dtype):
+        # Some old BadPixels constants have <f8 dtype.
+        # Convert nan to float 0 to avoid having 2147483648 after
+        # converting float64 to uint32.
+        if "BadPixels" in calibration_name and consts[calibration_name].dtype != np.uint32:
+            consts[calibration_name] = np.nan_to_num(
+                consts[calibration_name], nan=0.0)
        return consts[calibration_name] \
            .transpose(constant_order[calibration_name]) \
            .astype(dtype, copy=True)  # Make sure array is contiguous.

    if offset_corr and 'Offset' in consts:
        ccv_offsets[aggregator] = _prepare_data('Offset', np.float32)
    else:
        ccv_offsets[aggregator] = np.zeros(ccv_shape, dtype=np.float32)

    ccv_gains[aggregator] = np.ones(ccv_shape, dtype=np.float32)

    if 'BadPixelsDark' in consts:
        ccv_masks[aggregator] = _prepare_data('BadPixelsDark', np.uint32)
    else:
        ccv_masks[aggregator] = np.zeros(ccv_shape, dtype=np.uint32)

    if rel_gain and 'RelativeGain' in consts:
        ccv_gains[aggregator] *= _prepare_data('RelativeGain', np.float32)

    if ff_map and 'FFMap' in consts:
        ccv_gains[aggregator] *= _prepare_data('FFMap', np.float32)

        if 'BadPixelsFF' in consts:
            np.bitwise_or(ccv_masks[aggregator], _prepare_data('BadPixelsFF', np.uint32),
                          out=ccv_masks[aggregator])

    if gain_amp_map and 'GainAmpMap' in consts:
        ccv_gains[aggregator] *= _prepare_data('GainAmpMap', np.float32)

    print('.', end='', flush=True)


 print('Preparing constants', end='', flush=True)
 start = perf_counter()
 psh.ThreadContext(num_workers=len(karabo_da)).map(prepare_constants, karabo_da)
 total_time = perf_counter() - start
 print(f'{total_time:.1f}s')

 const_data.clear()  # Clear raw constants data now to save memory.
 gc.collect();
 ```

 %% Cell type:code id: tags:

 ``` python
 def correct_file(wid, index, work):
    aggregator, inp_path, outp_path = work
    module_index = int(aggregator[-2:])

    start = perf_counter()
    dc = xd.H5File(inp_path, inc_suspect_trains=False).select('*', 'image.*', require_all=True)
    inp_source = dc[input_source.format(karabo_id=karabo_id, module_index=module_index)]
    open_time = perf_counter() - start

    # Load raw data for this file.
    # Reshaping gets rid of the extra 1-len dimensions without
    # mangling the frame axis for an actual frame count of 1.
    start = perf_counter()
    in_raw = inp_source['image.data'].ndarray().reshape(-1, 256, 256)
    in_cell = inp_source['image.cellId'].ndarray().reshape(-1)
    in_pulse = inp_source['image.pulseId'].ndarray().reshape(-1)
    read_time = perf_counter() - start

    # Allocate output arrays.
    out_data = np.zeros((in_raw.shape[0], 256, 256), dtype=np.float32)
    out_gain = np.zeros((in_raw.shape[0], 256, 256), dtype=np.uint8)
    out_mask = np.zeros((in_raw.shape[0], 256, 256), dtype=np.uint32)

    start = perf_counter()
    correct_lpd_frames(in_raw, in_cell,
                       out_data, out_gain, out_mask,
                       ccv_offsets[aggregator], ccv_gains[aggregator], ccv_masks[aggregator],
                       num_threads=num_threads_per_worker)
    correct_time = perf_counter() - start

    image_counts = inp_source['image.data'].data_counts(labelled=False)

    start = perf_counter()
    if (not outp_path.exists() or overwrite) and image_counts.sum() > 0:
        outp_source_name = output_source.format(karabo_id=karabo_id, module_index=module_index)

        with DataFile(outp_path, 'w') as outp_file:
            outp_file.create_index(dc.train_ids, from_file=dc.files[0])
            outp_file.create_metadata(like=dc, instrument_channels=(f'{outp_source_name}/image',))

            outp_source = outp_file.create_instrument_source(outp_source_name)

            outp_source.create_index(image=image_counts)
            outp_source.create_key('image.cellId', data=in_cell,
                                   chunks=(min(chunks_ids, in_cell.shape[0]),))
            outp_source.create_key('image.pulseId', data=in_pulse,
                                   chunks=(min(chunks_ids, in_pulse.shape[0]),))
            outp_source.create_key('image.data', data=out_data,
                                   chunks=(min(chunks_data, out_data.shape[0]), 256, 256))
            outp_source.create_compressed_key('image.gain', data=out_gain)
            outp_source.create_compressed_key('image.mask', data=out_mask)
    write_time = perf_counter() - start

    total_time = open_time + read_time + correct_time + write_time
    frame_rate = in_raw.shape[0] / total_time

    print('{}\t{}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{}\t{:.1f}'.format(
        wid, aggregator, open_time, read_time, correct_time, write_time, total_time,
        in_raw.shape[0], frame_rate))

    in_raw = None
    in_cell = None
    in_pulse = None
    out_data = None
    out_gain = None
    out_mask = None
    gc.collect()

 print('worker\tDA\topen\tread\tcorrect\twrite\ttotal\tframes\trate')
 start = perf_counter()
 psh.ProcessContext(num_workers=num_workers).map(correct_file, data_to_process)
 total_time = perf_counter() - start
 print(f'Total time: {total_time:.1f}s')
 ```

 %% Cell type:markdown id: tags:

 # Data preview for first train

 %% Cell type:code id: tags:

 ``` python
 geom = xg.LPD_1MGeometry.from_quad_positions(
    [(11.4, 299), (-11.5, 8), (254.5, -16), (278.5, 275)])

 output_paths = [outp_path for _, _, outp_path in data_to_process if outp_path.exists()]
+
+if not output_paths:
+    warning('Data preview is skipped as there are no existing output paths')
+    from sys import exit
+    exit(0)
+
 dc = xd.DataCollection.from_paths(output_paths).select_trains(np.s_[0])

 det = LPD1M(dc, detector_name=karabo_id)
 data = det.get_array('image.data')
 ```

 %% Cell type:markdown id: tags:

 ### Intensity histogram across all cells

 %% Cell type:code id: tags:

 ``` python
 left_edge_ratio = 0.01
 right_edge_ratio = 0.99

 fig, ax = plt.subplots(num=1, clear=True, figsize=(15, 6))
 values, bins, _ = ax.hist(np.ravel(data.data), bins=2000, range=(-1500, 2000))

 def find_nearest_index(array, value):
    return (np.abs(array - value)).argmin()

 cum_values = np.cumsum(values)
 vmin = bins[find_nearest_index(cum_values, cum_values[-1]*left_edge_ratio)]
 vmax = bins[find_nearest_index(cum_values, cum_values[-1]*right_edge_ratio)]

 max_value = values.max()
 ax.vlines([vmin, vmax], 0, max_value, color='red', linewidth=5, alpha=0.2)
 ax.text(vmin, max_value, f'{left_edge_ratio*100:.0f}%',
        color='red', ha='center', va='bottom', size='large')
 ax.text(vmax, max_value, f'{right_edge_ratio*100:.0f}%',
        color='red', ha='center', va='bottom', size='large')
 ax.text(vmax+(vmax-vmin)*0.01, max_value/2, 'Colormap interval',
        color='red', rotation=90, ha='left', va='center', size='x-large')

 ax.set_xlim(vmin-(vmax-vmin)*0.1, vmax+(vmax-vmin)*0.1)
 ax.set_ylim(0, max_value*1.1)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### First memory cell

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(num=2, figsize=(15, 15), clear=True, nrows=1, ncols=1)
 geom.plot_data_fast(data[:, 0, 0], ax=ax, vmin=vmin, vmax=vmax)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Train average

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(num=3, figsize=(15, 15), clear=True, nrows=1, ncols=1)
 geom.plot_data_fast(data[:, 0].mean(axis=1), ax=ax, vmin=vmin, vmax=vmax)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Lowest gain stage per pixel

 %% Cell type:code id: tags:

 ``` python
 highest_gain_stage = det.get_array('image.gain', pulses=np.s_[:]).max(axis=(1, 2))

 fig, ax = plt.subplots(num=4, figsize=(15, 15), clear=True, nrows=1, ncols=1)
 p = geom.plot_data_fast(highest_gain_stage, ax=ax, vmin=0, vmax=2);

 cb = ax.images[0].colorbar
 cb.set_ticks([0, 1, 2])
 cb.set_ticklabels(['High gain', 'Medium gain', 'Low gain'])
 ```

 %% Cell type:markdown id: tags:

 ### Create virtual CXI file

 %% Cell type:code id: tags:

 ``` python
 if create_virtual_cxi_in:
    vcxi_folder = Path(create_virtual_cxi_in.format(
        run=run, proposal_folder=str(Path(in_folder).parent)))
    vcxi_folder.mkdir(parents=True, exist_ok=True)

    def sort_files_by_seq(by_seq, outp_path):
        by_seq.setdefault(int(outp_path.stem[-5:]), []).append(outp_path)
        return by_seq

    from functools import reduce
    reduce(sort_files_by_seq, output_paths, output_by_seq := {})

    for seq_number, seq_output_paths in output_by_seq.items():
        # Create data collection and detector components only for this sequence.
        try:
            det = LPD1M(xd.DataCollection.from_paths(seq_output_paths), detector_name=karabo_id, min_modules=4)
        except ValueError:  # Couldn't find enough data for min_modules
            continue
        det.write_virtual_cxi(vcxi_folder / f'VCXI-LPD-R{run:04d}-S{seq_number:05d}.cxi')
 ```

 %% Cell type:markdown id: tags:

 # LPD Offline Correction #

 Author: European XFEL Data Analysis Group

 %% Cell type:code id: tags:

 ``` python
 # Input parameters
 in_folder = "/gpfs/exfel/exp/FXE/202201/p003073/raw/"  # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/schmidtp/random/LPD_test"  # the folder to output to, required
 metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate.
 sequences = [-1]  # Sequences to correct, use [-1] for all
 modules = [-1]  # Modules indices to correct, use [-1] for all, only used when karabo_da is empty
 karabo_da = ['']  # Data aggregators names to correct, use [''] for all
 run = 10  # run to process, required

 # Source parameters
 karabo_id = 'FXE_DET_LPD1M-1'  # Karabo domain for detector.
 input_source = '{karabo_id}/DET/{module_index}CH0:xtdf'  # Input fast data source.
 output_source = ''  # Output fast data source, empty to use same as input.
+xgm_source = 'SA1_XTD2_XGM/DOOCS/MAIN'
+xgm_pulse_count_key = 'pulseEnergy.numberOfSa1BunchesActual'

 # CalCat parameters
 creation_time = ""  # The timestamp to use with Calibration DB. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41
 cal_db_interface = ''  # Not needed, compatibility with current webservice.
 cal_db_timeout = 0  # Not needed, compatbility with current webservice.
-cal_db_root = '/gpfs/exfel/d/cal/caldb_store'
+cal_db_root = '/gpfs/exfel/d/cal/caldb_store'  # The calibration database root path to access constant files. For example accessing constants from the test database.

 # Operating conditions
 mem_cells = 512  # Memory cells, LPD constants are always taken with 512 cells.
 bias_voltage = 250.0  # Detector bias voltage.
 capacitor = '5pF'  # Capacitor setting: 5pF or 50pF
 photon_energy = 9.2  # Photon energy in keV.
 category = 0  # Whom to blame.
-use_cell_order = False  # Whether to use memory cell order as a detector condition (not stored for older constants)
+use_cell_order = 'auto'  # Whether to use memory cell order as a detector condition; auto/always/never

 # Correction parameters
 offset_corr = True  # Offset correction.
 rel_gain = True  # Gain correction based on RelativeGain constant.
 ff_map = True  # Gain correction based on FFMap constant.
 gain_amp_map = True  # Gain correction based on GainAmpMap constant.

 # Output options
+ignore_no_frames_no_pulses = False  # Whether to run without SA1 pulses AND frames.
 overwrite = True  # set to True if existing data should be overwritten
 chunks_data = 1  # HDF chunk size for pixel data in number of frames.
 chunks_ids = 32  # HDF chunk size for cellId and pulseId datasets.
 create_virtual_cxi_in = ''  # Folder to create virtual CXI files in (for each sequence).

 # Parallelization options
 sequences_per_node = 1  # Sequence files to process per node
 max_nodes = 8  # Maximum number of SLURM jobs to split correction work into
 num_workers = 8  # Worker processes per node, 8 is safe on 768G nodes but won't work on 512G.
 num_threads_per_worker = 32  # Number of threads per worker.

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
 ```

 %% Cell type:code id: tags:

 ``` python
-from collections import OrderedDict
+from logging import warning
 from pathlib import Path
 from time import perf_counter
 import gc
 import re
-import warnings

 import numpy as np
 import h5py

 import matplotlib
 matplotlib.use('agg')
 import matplotlib.pyplot as plt
 %matplotlib inline

-from calibration_client import CalibrationClient
-from calibration_client.modules import CalibrationConstantVersion
 import extra_data as xd
 import extra_geom as xg
 import pasha as psh
-
 from extra_data.components import LPD1M

+import cal_tools.restful_config as rest_cfg
+from cal_tools.calcat_interface import CalCatError, LPD_CalibrationData
 from cal_tools.lpdalgs import correct_lpd_frames
-from cal_tools.lpdlib import get_mem_cell_order
-from cal_tools.tools import CalibrationMetadata, calcat_creation_time
+from cal_tools.lpdlib import get_mem_cell_pattern, make_cell_order_condition
+from cal_tools.tools import (
+    CalibrationMetadata,
+    calcat_creation_time,
+    write_constants_fragment,
+)
 from cal_tools.files import DataFile
-from cal_tools.restful_config import restful_config
 ```

 %% Cell type:markdown id: tags:

 # Prepare environment

 %% Cell type:code id: tags:

 ``` python
 file_re = re.compile(r'^RAW-R(\d{4})-(\w+\d+)-S(\d{5})$')  # This should probably move to cal_tools

 run_folder = Path(in_folder) / f'r{run:04d}'
 out_folder = Path(out_folder)
 out_folder.mkdir(exist_ok=True)

 output_source = output_source or input_source

-cal_db_root = Path(cal_db_root)
-
-metadata = CalibrationMetadata(metadata_folder or out_folder)
-
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f'Using {creation_time.isoformat()} as creation time')

 # Pick all modules/aggregators or those selected.
-if not karabo_da or karabo_da == ['']:
-    if not modules or modules == [-1]:
+if karabo_da == ['']:
+    if modules == [-1]:
        modules = list(range(16))
-
    karabo_da = [f'LPD{i:02d}' for i in modules]
+else:
+    modules = [int(x[-2:]) for x in karabo_da]

 # Pick all sequences or those selected.
 if not sequences or sequences == [-1]:
    do_sequence = lambda seq: True
 else:
    do_sequence = [int(x) for x in sequences].__contains__

 # List of detector sources.
 det_inp_sources = [input_source.format(karabo_id=karabo_id, module_index=int(da[-2:])) for da in karabo_da]
+
+if use_cell_order not in {'auto', 'always', 'never'}:
+    raise ValueError("use_cell_order must be auto/always/never")
 ```

 %% Cell type:markdown id: tags:

 # Select data to process

 %% Cell type:code id: tags:

 ``` python
 data_to_process = []

 for inp_path in run_folder.glob('RAW-*.h5'):
    match = file_re.match(inp_path.stem)

    if match[2] not in karabo_da or not do_sequence(int(match[3])):
        continue

    outp_path = out_folder / 'CORR-R{run:04d}-{aggregator}-S{seq:05d}.h5'.format(
        run=int(match[1]), aggregator=match[2], seq=int(match[3]))

    data_to_process.append((match[2], inp_path, outp_path))

 print('Files to process:')
 for data_descr in sorted(data_to_process, key=lambda x: f'{x[0]}{x[1]}'):
    print(f'{data_descr[0]}\t{data_descr[1]}')
+
+# Collect the train ID contained in the input LPD files.
+inp_lpd_dc = xd.DataCollection.from_paths([x[1] for x in data_to_process])
+
+frame_count = sum([
+    int(inp_lpd_dc[source, 'image.data'].data_counts(labelled=False).sum())
+    for source in inp_lpd_dc.all_sources], 0)
+
+if frame_count == 0:
+    inp_dc = xd.RunDirectory(run_folder) \
+        .select_trains(xd.by_id[inp_lpd_dc.train_ids])
+
+    try:
+        pulse_count = int(inp_dc[xgm_source, xgm_pulse_count_key].ndarray().sum())
+    except xd.SourceNameError:
+        warning(f'Missing XGM source `{xgm_source}`')
+        pulse_count = None
+    except xd.PropertyNameError:
+        warning(f'Missing XGM pulse count key `{xgm_pulse_count_key}`')
+        pulse_count = None
+
+    if pulse_count == 0 and not ignore_no_frames_no_pulses:
+        warning(f'Affected files contain neither LPD frames nor SA1 pulses '
+                f'according to {xgm_source}, processing is skipped. If this '
+                f'incorrect, please contact da-support@xfel.eu')
+        from sys import exit
+        exit(0)
+    elif pulse_count is None:
+        raise ValueError('Affected files contain no LPD frames and SA1 pulses '
+                         'could not be inferred from XGM data')
+    else:
+        raise ValueError('Affected files contain no LPD frames but SA1 pulses')
+
+else:
+    print(f'Total number of LPD pulses across all modules: {frame_count}')
 ```

 %% Cell type:markdown id: tags:

 # Obtain and prepare calibration constants

 %% Cell type:code id: tags:

 ``` python
-# Connect to CalCat.
-calcat_config = restful_config['calcat']
-client = CalibrationClient(
-    base_api_url=calcat_config['base-api-url'],
-    use_oauth2=calcat_config['use-oauth2'],
-    client_id=calcat_config['user-id'],
-    client_secret=calcat_config['user-secret'],
-    user_email=calcat_config['user-email'],
-    token_url=calcat_config['token-url'],
-    refresh_url=calcat_config['refresh-url'],
-    auth_url=calcat_config['auth-url'],
-    scope='')
-```
+start = perf_counter()

-%% Cell type:code id: tags:
+cell_ids_pattern_s = None
+if use_cell_order != 'never':
+    # Read the order of memory cells used
+    raw_data = xd.DataCollection.from_paths([e[1] for e in data_to_process])
+    cell_ids_pattern_s = make_cell_order_condition(
+        use_cell_order, get_mem_cell_pattern(raw_data, det_inp_sources)
+    )
+print("Memory cells order:", cell_ids_pattern_s)
+
+lpd_cal = LPD_CalibrationData(
+    detector_name=karabo_id,
+    modules=karabo_da,
+    sensor_bias_voltage=bias_voltage,
+    memory_cells=mem_cells,
+    feedback_capacitor=capacitor,
+    source_energy=photon_energy,
+    memory_cell_order=cell_ids_pattern_s,
+    category=category,
+    event_at=creation_time,
+    client=rest_cfg.calibration_client(),
+    caldb_root=Path(cal_db_root),
+)
+
+lpd_metadata = lpd_cal.metadata(["Offset", "BadPixelsDark"])
+try:
+    illum_metadata = lpd_cal.metadata(lpd_cal.illuminated_calibrations)
+    for key, value in illum_metadata.items():
+        lpd_metadata.setdefault(key, {}).update(value)
+except CalCatError as e:  # TODO: replace when API errors are improved.
+    warning(f"CalCatError: {e}")

-``` python
-metadata = CalibrationMetadata(metadata_folder or out_folder)
-# Constant paths & timestamps are saved under retrieved-constants in calibration_metadata.yml
-const_yaml = metadata.setdefault("retrieved-constants", {})
+total_time = perf_counter() - start
+print(f'Looking up constants {total_time:.1f}s')
 ```

 %% Cell type:code id: tags:

 ``` python
-const_data = {}
-const_load_mp = psh.ProcessContext(num_workers=24)
-
-if const_yaml:  # Read constants from YAML file.
-    start = perf_counter()
-    for da, ccvs in const_yaml.items():
+# Validate the constants availability and raise/warn accordingly.
+for mod, calibrations in lpd_metadata.items():
+    missing_offset = {"Offset"} - set(calibrations)
+    warn_missing_constants = {
+        "BadPixelsDark", "BadPixelsFF", "GainAmpMap",
+        "FFMap", "RelativeGain"} - set(calibrations)
+    if missing_offset:
+        warning(f"Offset constant is not available to correct {mod}.")
+        karabo_da.remove(mod)
+    if warn_missing_constants:
+        warning(f"Constants {warn_missing_constants} were not retrieved for {mod}.")
+if not karabo_da:  # Offsets are missing for all modules.
+    raise Exception("Could not find offset constants for any modules, will not correct data.")

-        for calibration_name, ccv in ccvs['constants'].items():
-            if ccv['file-path'] is None:
-                warnings.warn(f"Missing {calibration_name} for {da}")
-                continue
-
-            dtype = np.uint32 if calibration_name.startswith('BadPixels') else np.float32
-
-            const_data[(da, calibration_name)] = dict(
-                path=Path(ccv['file-path']),
-                dataset=ccv['dataset-name'],
-                data=const_load_mp.alloc(shape=(256, 256, mem_cells, 3), dtype=dtype)
-            )
-else:  # Retrieve constants from CALCAT.
-    dark_calibrations = {
-        1: 'Offset',  # np.float32
-        14: 'BadPixelsDark'  # should be np.uint32, but is np.float64
-    }
-
-    base_condition = [
-        dict(parameter_id=1, value=bias_voltage),  # Sensor bias voltage
-        dict(parameter_id=7, value=mem_cells),  # Memory cells
-        dict(parameter_id=15, value=capacitor),  # Feedback capacitor
-        dict(parameter_id=13, value=256),  # Pixels X
-        dict(parameter_id=14, value=256),  # Pixels Y
-    ]
-    if use_cell_order:
-        # Read the order of memory cells used
-        raw_data = xd.DataCollection.from_paths([e[1] for e in data_to_process])
-        cell_ids_pattern_s = get_mem_cell_order(raw_data, det_inp_sources)
-        print("Memory cells order:", cell_ids_pattern_s)
-
-        dark_condition = base_condition + [
-            dict(parameter_id=30, value=cell_ids_pattern_s),  # Memory cell order
-        ]
-    else:
-        dark_condition = base_condition.copy()
-
-    illuminated_calibrations = {
-        20: 'BadPixelsFF',  # np.uint32
-        42: 'GainAmpMap',  # np.float32
-        43: 'FFMap',  # np.float32
-        44: 'RelativeGain'  # np.float32
-    }
-
-    illuminated_condition = base_condition + [
-        dict(parameter_id=3, value=photon_energy),  # Source energy
-        dict(parameter_id=25, value=category)  # category
-    ]
-
-    print('Querying calibration database', end='', flush=True)
-    start = perf_counter()
-    for calibrations, condition in [
-        (dark_calibrations, dark_condition),
-        (illuminated_calibrations, illuminated_condition)
-    ]:
-        resp = CalibrationConstantVersion.get_closest_by_time_by_detector_conditions(
-            client, karabo_id, list(calibrations.keys()),
-            {'parameters_conditions_attributes': condition},
-            karabo_da='', event_at=creation_time.isoformat()
-        )
-
-        if not resp['success']:
-            raise RuntimeError(resp)
-
-        for ccv in resp['data']:
-            cc = ccv['calibration_constant']
-            da = ccv['physical_detector_unit']['karabo_da']
-            calibration_name = calibrations[cc['calibration_id']]
-
-            dtype = np.uint32 if calibration_name.startswith('BadPixels') else np.float32
-
-            const_data[(da, calibration_name)] = dict(
-                path=Path(ccv['path_to_file']) / ccv['file_name'],
-                dataset=ccv['data_set_name'],
-                data=const_load_mp.alloc(shape=(256, 256, mem_cells, 3), dtype=dtype)
-            )
-        print('.', end='', flush=True)
-
-total_time = perf_counter() - start
-print(f'{total_time:.1f}s')
+# Remove skipped correction modules from data_to_process
+data_to_process = [(mod, in_f, out_f) for mod, in_f, out_f in data_to_process if mod in karabo_da]
 ```

 %% Cell type:code id: tags:

 ``` python
-def load_constant_dataset(wid, index, const_descr):
-    ccv_entry = const_data[const_descr]
+# write constants metadata to fragment YAML
+write_constants_fragment(
+    out_folder=(metadata_folder or out_folder),
+    det_metadata=lpd_metadata,
+    caldb_root=lpd_cal.caldb_root,
+)

-    with h5py.File(cal_db_root / ccv_entry['path'], 'r') as fp:
-        fp[ccv_entry['dataset'] + '/data'].read_direct(ccv_entry['data'])
-
-    print('.', end='', flush=True)
-
-print('Loading calibration data', end='', flush=True)
-start = perf_counter()
-const_load_mp.map(load_constant_dataset, list(const_data.keys()))
-total_time = perf_counter() - start
-
-print(f'{total_time:.1f}s')
+# Load constants data for all constants
+const_data = lpd_cal.ndarray_map(metadata=lpd_metadata)
 ```

 %% Cell type:code id: tags:

 ``` python
 # These are intended in order cell, X, Y, gain
 ccv_offsets = {}
 ccv_gains = {}
 ccv_masks = {}

 ccv_shape = (mem_cells, 256, 256, 3)

 constant_order = {
    'Offset':        (2, 1, 0, 3),
    'BadPixelsDark': (2, 1, 0, 3),
-    'RelativeGain':  (2, 1, 0, 3),
+    'RelativeGain':  (2, 0, 1, 3),
    'FFMap':         (2, 0, 1, 3),
    'BadPixelsFF':   (2, 0, 1, 3),
    'GainAmpMap':    (2, 0, 1, 3),
 }

 def prepare_constants(wid, index, aggregator):
-    consts = {calibration_name: entry['data']
-              for (aggregator_, calibration_name), entry
-              in const_data.items()
-              if aggregator == aggregator_}
-
+    consts = const_data.get(aggregator, {})
    def _prepare_data(calibration_name, dtype):
+        # Some old BadPixels constants have <f8 dtype.
+        # Convert nan to float 0 to avoid having 2147483648 after
+        # converting float64 to uint32.
+        if "BadPixels" in calibration_name and consts[calibration_name].dtype != np.uint32:
+            consts[calibration_name] = np.nan_to_num(
+                consts[calibration_name], nan=0.0)
        return consts[calibration_name] \
            .transpose(constant_order[calibration_name]) \
            .astype(dtype, copy=True)  # Make sure array is contiguous.

    if offset_corr and 'Offset' in consts:
        ccv_offsets[aggregator] = _prepare_data('Offset', np.float32)
    else:
        ccv_offsets[aggregator] = np.zeros(ccv_shape, dtype=np.float32)

    ccv_gains[aggregator] = np.ones(ccv_shape, dtype=np.float32)

    if 'BadPixelsDark' in consts:
        ccv_masks[aggregator] = _prepare_data('BadPixelsDark', np.uint32)
    else:
        ccv_masks[aggregator] = np.zeros(ccv_shape, dtype=np.uint32)

    if rel_gain and 'RelativeGain' in consts:
        ccv_gains[aggregator] *= _prepare_data('RelativeGain', np.float32)

    if ff_map and 'FFMap' in consts:
        ccv_gains[aggregator] *= _prepare_data('FFMap', np.float32)

        if 'BadPixelsFF' in consts:
            np.bitwise_or(ccv_masks[aggregator], _prepare_data('BadPixelsFF', np.uint32),
                          out=ccv_masks[aggregator])

    if gain_amp_map and 'GainAmpMap' in consts:
        ccv_gains[aggregator] *= _prepare_data('GainAmpMap', np.float32)

    print('.', end='', flush=True)


 print('Preparing constants', end='', flush=True)
 start = perf_counter()
 psh.ThreadContext(num_workers=len(karabo_da)).map(prepare_constants, karabo_da)
 total_time = perf_counter() - start
 print(f'{total_time:.1f}s')

 const_data.clear()  # Clear raw constants data now to save memory.
 gc.collect();
 ```

 %% Cell type:code id: tags:

 ``` python
 def correct_file(wid, index, work):
    aggregator, inp_path, outp_path = work
    module_index = int(aggregator[-2:])

    start = perf_counter()
    dc = xd.H5File(inp_path, inc_suspect_trains=False).select('*', 'image.*', require_all=True)
    inp_source = dc[input_source.format(karabo_id=karabo_id, module_index=module_index)]
    open_time = perf_counter() - start

    # Load raw data for this file.
    # Reshaping gets rid of the extra 1-len dimensions without
    # mangling the frame axis for an actual frame count of 1.
    start = perf_counter()
    in_raw = inp_source['image.data'].ndarray().reshape(-1, 256, 256)
    in_cell = inp_source['image.cellId'].ndarray().reshape(-1)
    in_pulse = inp_source['image.pulseId'].ndarray().reshape(-1)
    read_time = perf_counter() - start

    # Allocate output arrays.
    out_data = np.zeros((in_raw.shape[0], 256, 256), dtype=np.float32)
    out_gain = np.zeros((in_raw.shape[0], 256, 256), dtype=np.uint8)
    out_mask = np.zeros((in_raw.shape[0], 256, 256), dtype=np.uint32)

    start = perf_counter()
    correct_lpd_frames(in_raw, in_cell,
                       out_data, out_gain, out_mask,
                       ccv_offsets[aggregator], ccv_gains[aggregator], ccv_masks[aggregator],
                       num_threads=num_threads_per_worker)
    correct_time = perf_counter() - start

    image_counts = inp_source['image.data'].data_counts(labelled=False)

    start = perf_counter()
    if (not outp_path.exists() or overwrite) and image_counts.sum() > 0:
        outp_source_name = output_source.format(karabo_id=karabo_id, module_index=module_index)

        with DataFile(outp_path, 'w') as outp_file:
            outp_file.create_index(dc.train_ids, from_file=dc.files[0])
            outp_file.create_metadata(like=dc, instrument_channels=(f'{outp_source_name}/image',))

            outp_source = outp_file.create_instrument_source(outp_source_name)

            outp_source.create_index(image=image_counts)
            outp_source.create_key('image.cellId', data=in_cell,
                                   chunks=(min(chunks_ids, in_cell.shape[0]),))
            outp_source.create_key('image.pulseId', data=in_pulse,
                                   chunks=(min(chunks_ids, in_pulse.shape[0]),))
            outp_source.create_key('image.data', data=out_data,
                                   chunks=(min(chunks_data, out_data.shape[0]), 256, 256))
            outp_source.create_compressed_key('image.gain', data=out_gain)
            outp_source.create_compressed_key('image.mask', data=out_mask)
    write_time = perf_counter() - start

    total_time = open_time + read_time + correct_time + write_time
    frame_rate = in_raw.shape[0] / total_time

    print('{}\t{}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{}\t{:.1f}'.format(
        wid, aggregator, open_time, read_time, correct_time, write_time, total_time,
        in_raw.shape[0], frame_rate))

    in_raw = None
    in_cell = None
    in_pulse = None
    out_data = None
    out_gain = None
    out_mask = None
    gc.collect()

 print('worker\tDA\topen\tread\tcorrect\twrite\ttotal\tframes\trate')
 start = perf_counter()
 psh.ProcessContext(num_workers=num_workers).map(correct_file, data_to_process)
 total_time = perf_counter() - start
 print(f'Total time: {total_time:.1f}s')
 ```

 %% Cell type:markdown id: tags:

 # Data preview for first train

 %% Cell type:code id: tags:

 ``` python
 geom = xg.LPD_1MGeometry.from_quad_positions(
    [(11.4, 299), (-11.5, 8), (254.5, -16), (278.5, 275)])

 output_paths = [outp_path for _, _, outp_path in data_to_process if outp_path.exists()]
+
+if not output_paths:
+    warning('Data preview is skipped as there are no existing output paths')
+    from sys import exit
+    exit(0)
+
 dc = xd.DataCollection.from_paths(output_paths).select_trains(np.s_[0])

 det = LPD1M(dc, detector_name=karabo_id)
 data = det.get_array('image.data')
 ```

 %% Cell type:markdown id: tags:

 ### Intensity histogram across all cells

 %% Cell type:code id: tags:

 ``` python
 left_edge_ratio = 0.01
 right_edge_ratio = 0.99

 fig, ax = plt.subplots(num=1, clear=True, figsize=(15, 6))
 values, bins, _ = ax.hist(np.ravel(data.data), bins=2000, range=(-1500, 2000))

 def find_nearest_index(array, value):
    return (np.abs(array - value)).argmin()

 cum_values = np.cumsum(values)
 vmin = bins[find_nearest_index(cum_values, cum_values[-1]*left_edge_ratio)]
 vmax = bins[find_nearest_index(cum_values, cum_values[-1]*right_edge_ratio)]

 max_value = values.max()
 ax.vlines([vmin, vmax], 0, max_value, color='red', linewidth=5, alpha=0.2)
 ax.text(vmin, max_value, f'{left_edge_ratio*100:.0f}%',
        color='red', ha='center', va='bottom', size='large')
 ax.text(vmax, max_value, f'{right_edge_ratio*100:.0f}%',
        color='red', ha='center', va='bottom', size='large')
 ax.text(vmax+(vmax-vmin)*0.01, max_value/2, 'Colormap interval',
        color='red', rotation=90, ha='left', va='center', size='x-large')

 ax.set_xlim(vmin-(vmax-vmin)*0.1, vmax+(vmax-vmin)*0.1)
 ax.set_ylim(0, max_value*1.1)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### First memory cell

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(num=2, figsize=(15, 15), clear=True, nrows=1, ncols=1)
 geom.plot_data_fast(data[:, 0, 0], ax=ax, vmin=vmin, vmax=vmax)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Train average

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(num=3, figsize=(15, 15), clear=True, nrows=1, ncols=1)
 geom.plot_data_fast(data[:, 0].mean(axis=1), ax=ax, vmin=vmin, vmax=vmax)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Lowest gain stage per pixel

 %% Cell type:code id: tags:

 ``` python
 highest_gain_stage = det.get_array('image.gain', pulses=np.s_[:]).max(axis=(1, 2))

 fig, ax = plt.subplots(num=4, figsize=(15, 15), clear=True, nrows=1, ncols=1)
 p = geom.plot_data_fast(highest_gain_stage, ax=ax, vmin=0, vmax=2);

 cb = ax.images[0].colorbar
 cb.set_ticks([0, 1, 2])
 cb.set_ticklabels(['High gain', 'Medium gain', 'Low gain'])
 ```

 %% Cell type:markdown id: tags:

 ### Create virtual CXI file

 %% Cell type:code id: tags:

 ``` python
 if create_virtual_cxi_in:
    vcxi_folder = Path(create_virtual_cxi_in.format(
        run=run, proposal_folder=str(Path(in_folder).parent)))
    vcxi_folder.mkdir(parents=True, exist_ok=True)

    def sort_files_by_seq(by_seq, outp_path):
        by_seq.setdefault(int(outp_path.stem[-5:]), []).append(outp_path)
        return by_seq

    from functools import reduce
    reduce(sort_files_by_seq, output_paths, output_by_seq := {})

    for seq_number, seq_output_paths in output_by_seq.items():
        # Create data collection and detector components only for this sequence.
        try:
            det = LPD1M(xd.DataCollection.from_paths(seq_output_paths), detector_name=karabo_id, min_modules=4)
        except ValueError:  # Couldn't find enough data for min_modules
            continue
        det.write_virtual_cxi(vcxi_folder / f'VCXI-LPD-R{run:04d}-S{seq_number:05d}.cxi')
 ```

--- a/notebooks/LPD/LPD_retrieve_constants_precorrection.ipynb
+++ b/notebooks/LPD/LPD_retrieve_constants_precorrection.ipynb
-%% Cell type:markdown id: tags:
-
-# LPD Retrieving Constants Pre-correction #
-
-Author: European XFEL Detector Group, Version: 1.0
-
-The following notebook provides a constants metadata in a YAML file to use while correcting LPD images.
-
-%% Cell type:code id: tags:
-
-``` python
-# Input parameters
-in_folder = "/gpfs/exfel/exp/FXE/202201/p003073/raw/"  # the folder to read data from, required
-out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/remove/LPD_test"  # the folder to output to, required
-metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate.
-modules = [-1]  # Modules indices to correct, use [-1] for all, only used when karabo_da is empty
-karabo_da = ['']  # Data aggregators names to correct, use [''] for all
-run = 10  # run to process, required
-
-# Source parameters
-karabo_id = 'FXE_DET_LPD1M-1'  # Karabo domain for detector.
-input_source = '{karabo_id}/DET/{module_index}CH0:xtdf'  # Input fast data source.
-
-# CalCat parameters
-creation_time = ""  # The timestamp to use with Calibration DB. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41
-
-# Operating conditions
-mem_cells = 512  # Memory cells, LPD constants are always taken with 512 cells.
-bias_voltage = 250.0  # Detector bias voltage.
-capacitor = '5pF'  # Capacitor setting: 5pF or 50pF
-photon_energy = 9.2  # Photon energy in keV.
-category = 0  # Whom to blame.
-use_cell_order = False  # Whether to use memory cell order as a detector condition (not stored for older constants)
-```
-
-%% Cell type:code id: tags:
-
-``` python
-from pathlib import Path
-from time import perf_counter
-
-import numpy as np
-
-from calibration_client import CalibrationClient
-from calibration_client.modules import CalibrationConstantVersion
-import extra_data as xd
-
-from cal_tools.lpdlib import get_mem_cell_order
-from cal_tools.tools import (
-    CalibrationMetadata,
-    calcat_creation_time,
-    save_constant_metadata,
-)
-from cal_tools.restful_config import restful_config
-```
-
-%% Cell type:code id: tags:
-
-``` python
-out_folder = Path(out_folder)
-out_folder.mkdir(exist_ok=True)
-
-metadata = CalibrationMetadata(metadata_folder or out_folder)
-# Constant paths & timestamps are saved under retrieved-constants in calibration_metadata.yml
-retrieved_constants = metadata.setdefault("retrieved-constants", {})
-
-creation_time = calcat_creation_time(in_folder, run, creation_time)
-print(f'Using {creation_time.isoformat()} as creation time')
-
-# Pick all modules/aggregators or those selected.
-if not karabo_da or karabo_da == ['']:
-    if not modules or modules == [-1]:
-        modules = list(range(16))
-
-    karabo_da = [f'LPD{i:02d}' for i in modules]
-
-# List of detector sources.
-det_inp_sources = [input_source.format(karabo_id=karabo_id, module_index=int(da[-2:])) for da in karabo_da]
-```
-
-%% Cell type:code id: tags:
-
-``` python
-# Connect to CalCat.
-calcat_config = restful_config['calcat']
-client = CalibrationClient(
-    base_api_url=calcat_config['base-api-url'],
-    use_oauth2=calcat_config['use-oauth2'],
-    client_id=calcat_config['user-id'],
-    client_secret=calcat_config['user-secret'],
-    user_email=calcat_config['user-email'],
-    token_url=calcat_config['token-url'],
-    refresh_url=calcat_config['refresh-url'],
-    auth_url=calcat_config['auth-url'],
-    scope='')
-```
-
-%% Cell type:code id: tags:
-
-``` python
-dark_calibrations = {
-    1: 'Offset',
-    14: 'BadPixelsDark',
-}
-
-base_condition = [
-    dict(parameter_id=1, value=bias_voltage),  # Sensor bias voltage
-    dict(parameter_id=7, value=mem_cells),  # Memory cells
-    dict(parameter_id=15, value=capacitor),  # Feedback capacitor
-    dict(parameter_id=13, value=256),  # Pixels X
-    dict(parameter_id=14, value=256),  # Pixels Y
-]
-if use_cell_order:
-    # Read the order of memory cells used
-    raw_data = xd.RunDirectory(Path(in_folder, f'r{run:04d}'))
-    cell_ids_pattern_s = get_mem_cell_order(raw_data, det_inp_sources)
-    print("Memory cells order:", cell_ids_pattern_s)
-
-    dark_condition = base_condition + [
-        dict(parameter_id=30, value=cell_ids_pattern_s),  # Memory cell order
-    ]
-else:
-    dark_condition = base_condition.copy()
-
-illuminated_calibrations = {
-    20: 'BadPixelsFF',
-    42: 'GainAmpMap',
-    43: 'FFMap',
-    44: 'RelativeGain',
-}
-
-illuminated_condition = base_condition + [
-    dict(parameter_id=3, value=photon_energy),  # Source energy
-    dict(parameter_id=25, value=category)  # category
-]
-```
-
-%% Cell type:code id: tags:
-
-``` python
-const_data = {}
-
-print('Querying calibration database', end='', flush=True)
-start = perf_counter()
-for k_da in karabo_da:
-    pdu = None
-    retrieved_constants[k_da] = dict()
-    const_mdata = retrieved_constants[k_da]["constants"] = dict()
-    for calibrations, condition in [
-        (dark_calibrations, dark_condition),
-        (illuminated_calibrations, illuminated_condition)
-    ]:
-        resp = CalibrationConstantVersion.get_closest_by_time_by_detector_conditions(
-            client, karabo_id, list(calibrations.keys()),
-            {'parameters_conditions_attributes': condition},
-            karabo_da=k_da, event_at=creation_time.isoformat())
-
-        if not resp["success"]:
-            print(f"ERROR: Constants {list(calibrations.values())} "
-            f"were not retrieved, {resp['app_info']}")
-            for cname in calibrations.values():
-                const_mdata[cname] = dict()
-                const_mdata[cname]["file-path"] = None
-                const_mdata[cname]["dataset-name"] = None
-                const_mdata[cname]["creation-time"] = None
-            continue
-
-        for ccv in resp["data"]:
-            cc = ccv['calibration_constant']
-            cname = calibrations[cc['calibration_id']]
-            const_mdata[cname] = dict()
-            const_mdata[cname]["file-path"] = str(Path(ccv['path_to_file']) / ccv['file_name'])
-            const_mdata[cname]["dataset-name"] = ccv['data_set_name']
-            const_mdata[cname]["creation-time"] = ccv['begin_at']
-            pdu = ccv['physical_detector_unit']['physical_name']
-
-        print('.', end='', flush=True)
-    retrieved_constants[k_da]["physical-detector-unit"] = pdu
-metadata.save()
-
-total_time = perf_counter() - start
-print(f'{total_time:.1f}s')
-print(f"Stored retrieved constants in {metadata.filename}")
-```
-%% Cell type:markdown id: tags:
-
-# LPD Retrieving Constants Pre-correction #
-
-Author: European XFEL Detector Group, Version: 1.0
-
-The following notebook provides a constants metadata in a YAML file to use while correcting LPD images.
-
-%% Cell type:code id: tags:
-
-``` python
-# Input parameters
-in_folder = "/gpfs/exfel/exp/FXE/202201/p003073/raw/"  # the folder to read data from, required
-out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/remove/LPD_test"  # the folder to output to, required
-metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate.
-modules = [-1]  # Modules indices to correct, use [-1] for all, only used when karabo_da is empty
-karabo_da = ['']  # Data aggregators names to correct, use [''] for all
-run = 10  # run to process, required
-
-# Source parameters
-karabo_id = 'FXE_DET_LPD1M-1'  # Karabo domain for detector.
-input_source = '{karabo_id}/DET/{module_index}CH0:xtdf'  # Input fast data source.
-
-# CalCat parameters
-creation_time = ""  # The timestamp to use with Calibration DB. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41
-
-# Operating conditions
-mem_cells = 512  # Memory cells, LPD constants are always taken with 512 cells.
-bias_voltage = 250.0  # Detector bias voltage.
-capacitor = '5pF'  # Capacitor setting: 5pF or 50pF
-photon_energy = 9.2  # Photon energy in keV.
-category = 0  # Whom to blame.
-use_cell_order = False  # Whether to use memory cell order as a detector condition (not stored for older constants)
-```
-
-%% Cell type:code id: tags:
-
-``` python
-from pathlib import Path
-from time import perf_counter
-
-import numpy as np
-
-from calibration_client import CalibrationClient
-from calibration_client.modules import CalibrationConstantVersion
-import extra_data as xd
-
-from cal_tools.lpdlib import get_mem_cell_order
-from cal_tools.tools import (
-    CalibrationMetadata,
-    calcat_creation_time,
-    save_constant_metadata,
-)
-from cal_tools.restful_config import restful_config
-```
-
-%% Cell type:code id: tags:
-
-``` python
-out_folder = Path(out_folder)
-out_folder.mkdir(exist_ok=True)
-
-metadata = CalibrationMetadata(metadata_folder or out_folder)
-# Constant paths & timestamps are saved under retrieved-constants in calibration_metadata.yml
-retrieved_constants = metadata.setdefault("retrieved-constants", {})
-
-creation_time = calcat_creation_time(in_folder, run, creation_time)
-print(f'Using {creation_time.isoformat()} as creation time')
-
-# Pick all modules/aggregators or those selected.
-if not karabo_da or karabo_da == ['']:
-    if not modules or modules == [-1]:
-        modules = list(range(16))
-
-    karabo_da = [f'LPD{i:02d}' for i in modules]
-
-# List of detector sources.
-det_inp_sources = [input_source.format(karabo_id=karabo_id, module_index=int(da[-2:])) for da in karabo_da]
-```
-
-%% Cell type:code id: tags:
-
-``` python
-# Connect to CalCat.
-calcat_config = restful_config['calcat']
-client = CalibrationClient(
-    base_api_url=calcat_config['base-api-url'],
-    use_oauth2=calcat_config['use-oauth2'],
-    client_id=calcat_config['user-id'],
-    client_secret=calcat_config['user-secret'],
-    user_email=calcat_config['user-email'],
-    token_url=calcat_config['token-url'],
-    refresh_url=calcat_config['refresh-url'],
-    auth_url=calcat_config['auth-url'],
-    scope='')
-```
-
-%% Cell type:code id: tags:
-
-``` python
-dark_calibrations = {
-    1: 'Offset',
-    14: 'BadPixelsDark',
-}
-
-base_condition = [
-    dict(parameter_id=1, value=bias_voltage),  # Sensor bias voltage
-    dict(parameter_id=7, value=mem_cells),  # Memory cells
-    dict(parameter_id=15, value=capacitor),  # Feedback capacitor
-    dict(parameter_id=13, value=256),  # Pixels X
-    dict(parameter_id=14, value=256),  # Pixels Y
-]
-if use_cell_order:
-    # Read the order of memory cells used
-    raw_data = xd.RunDirectory(Path(in_folder, f'r{run:04d}'))
-    cell_ids_pattern_s = get_mem_cell_order(raw_data, det_inp_sources)
-    print("Memory cells order:", cell_ids_pattern_s)
-
-    dark_condition = base_condition + [
-        dict(parameter_id=30, value=cell_ids_pattern_s),  # Memory cell order
-    ]
-else:
-    dark_condition = base_condition.copy()
-
-illuminated_calibrations = {
-    20: 'BadPixelsFF',
-    42: 'GainAmpMap',
-    43: 'FFMap',
-    44: 'RelativeGain',
-}
-
-illuminated_condition = base_condition + [
-    dict(parameter_id=3, value=photon_energy),  # Source energy
-    dict(parameter_id=25, value=category)  # category
-]
-```
-
-%% Cell type:code id: tags:
-
-``` python
-const_data = {}
-
-print('Querying calibration database', end='', flush=True)
-start = perf_counter()
-for k_da in karabo_da:
-    pdu = None
-    retrieved_constants[k_da] = dict()
-    const_mdata = retrieved_constants[k_da]["constants"] = dict()
-    for calibrations, condition in [
-        (dark_calibrations, dark_condition),
-        (illuminated_calibrations, illuminated_condition)
-    ]:
-        resp = CalibrationConstantVersion.get_closest_by_time_by_detector_conditions(
-            client, karabo_id, list(calibrations.keys()),
-            {'parameters_conditions_attributes': condition},
-            karabo_da=k_da, event_at=creation_time.isoformat())
-
-        if not resp["success"]:
-            print(f"ERROR: Constants {list(calibrations.values())} "
-            f"were not retrieved, {resp['app_info']}")
-            for cname in calibrations.values():
-                const_mdata[cname] = dict()
-                const_mdata[cname]["file-path"] = None
-                const_mdata[cname]["dataset-name"] = None
-                const_mdata[cname]["creation-time"] = None
-            continue
-
-        for ccv in resp["data"]:
-            cc = ccv['calibration_constant']
-            cname = calibrations[cc['calibration_id']]
-            const_mdata[cname] = dict()
-            const_mdata[cname]["file-path"] = str(Path(ccv['path_to_file']) / ccv['file_name'])
-            const_mdata[cname]["dataset-name"] = ccv['data_set_name']
-            const_mdata[cname]["creation-time"] = ccv['begin_at']
-            pdu = ccv['physical_detector_unit']['physical_name']
-
-        print('.', end='', flush=True)
-    retrieved_constants[k_da]["physical-detector-unit"] = pdu
-metadata.save()
-
-total_time = perf_counter() - start
-print(f'{total_time:.1f}s')
-print(f"Stored retrieved constants in {metadata.filename}")
-```
--- a/notebooks/LPDMini/LPD_Mini_Char_Darks_NBC.ipynb
+++ b/notebooks/LPDMini/LPD_Mini_Char_Darks_NBC.ipynb
 %% Cell type:markdown id: tags:

 # LPD Mini Offset, Noise and Dead Pixels Characterization #

 Author: M. Karnevskiy, S. Hauf

 This notebook performs re-characterize of dark images to derive offset, noise and bad-pixel maps. All three types of constants are evaluated per-pixel and per-memory cell.

 The notebook will correctly handle veto settings, but note that if you veto cells you will not be able to use these offsets for runs with different veto settings - vetoed cells will have zero offset.

 The evaluated calibration constants are stored locally and injected in the calibration data base.


 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/FXE/202231/p900316/raw/" # path to input data, required
 out_folder = "/gpfs/exfel/data/scratch/kluyvert/darks-lpdmini" # path to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run_high = 10 # run number in which high gain data was recorded, required
 run_med = 11 # run number in which medium gain data was recorded, required
 run_low = 12 # run number in which low gain data was recorded, required

 karabo_id = "FXE_DET_LPD_MINI" # karabo karabo_id
 karabo_da = ['']  # a list of data aggregators names with module number, e.g. 'LPDMINI00/2', Default [''] for selecting all data aggregators
 source_name = "{}/DET/0CH0:xtdf"  # Source name for raw detector data
 control_source_name = "{}/FPGA/FEM"  # Source name for control device

 creation_time = ""  # Override the timestamp taken from the run (format '2023-03-30 15:04:31')
 cal_db_interface = "tcp://max-exfl016:8015#8025" # the database interface to use
 cal_db_timeout = 300000 # timeout on caldb requests
 local_output = True # output constants locally
 db_output = False # output constants to database

 capacitor_setting = 5 # capacitor_setting for which data was taken
 bias_voltage_0 = -1 # bias voltage for minis 1, 3, 5, 7; Setting -1 will read the value from files
 bias_voltage_1 = -1 # bias voltage for minis 2, 4, 6, 8; Setting -1 will read the value from files
 thresholds_offset_sigma = 3. # bad pixel relative threshold in terms of n sigma offset
 thresholds_offset_hard = [400, 1500] # bad pixel hard threshold
 thresholds_noise_sigma = 7. # bad pixel relative threshold in terms of n sigma noise
 thresholds_noise_hard = [1, 35] # bad pixel hard threshold
 skip_first_ntrains = 10 # Number of first trains to skip

 ntrains = 500 # number of trains to use
-min_trains = 370  # minimum number of trains required for each gain stage
+min_trains = 370  # minimum number of trains needed for each gain stage
 high_res_badpix_3d = False # plot bad-pixel summary in high resolution
 test_for_normality = False # permorm normality test
 inject_cell_order = False  # Include memory cell order as part of the detector condition
+operation_mode = ""  # This in given as a default input argument through the webservice in production. Don't remove!
 ```

 %% Cell type:code id: tags:

 ``` python
 import multiprocessing
 import os
 import warnings

 warnings.filterwarnings('ignore')

 import matplotlib
 import pasha as psh
 import scipy.stats
 from IPython.display import Latex, Markdown, display

 matplotlib.use("agg")
 import matplotlib.pyplot as plt

 %matplotlib inline
 import numpy as np
 import tabulate
 import yaml
 from extra_data import RunDirectory
 from iCalibrationDB import Conditions, Constants
 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.simpleplot import simplePlot

 from cal_tools.calcat_interface import CalCatApi
 from cal_tools.enums import BadPixels
 from cal_tools.plotting import plot_badpix_3d
 from cal_tools.restful_config import calibration_client
 from cal_tools.tools import (
    calcat_creation_time,
    get_from_db,
    get_report,
    run_prop_seq_from_path,
    save_const_to_h5,
    send_to_db,
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 mem_cells = 512
 gain_names = ['High', 'Medium', 'Low']
 const_shape = (mem_cells, 32, 256, 3)  # cells, slow_scan, fast_scan, gain

 gain_runs = {}
 if capacitor_setting == 5:
    gain_runs["high_5pf"] = run_high
    gain_runs["med_5pf"] =  run_med
    gain_runs["low_5pf"] =  run_low
 elif capacitor_setting == 50:
    gain_runs["high_50pf"] = run_high
    gain_runs["med_50pf"] =  run_med
    gain_runs["low_50pf"] =  run_low

 capacitor_settings = [capacitor_setting]
 capacitor_settings = ['{}pf'.format(c) for c in capacitor_settings]

 creation_time = calcat_creation_time(in_folder, run_high, creation_time)
 print(f"Using {creation_time} as creation time")

 source_name = source_name.format(karabo_id)
 control_source_name = control_source_name.format(karabo_id)

 if -1 in {bias_voltage_0, bias_voltage_1}:
    run_data = RunDirectory(os.path.join(in_folder, f"r{run_high:04d}"))
    if bias_voltage_0 == -1:
        bias_voltage_0 = run_data[control_source_name, 'sensorBiasVoltage0'].as_single_value(atol=5.)
    if bias_voltage_1 == -1:
        bias_voltage_1 = run_data[control_source_name, 'sensorBiasVoltage1'].as_single_value(atol=5.)

 run, prop, seq = run_prop_seq_from_path(in_folder)

 display(Markdown('## Evaluated parameters'))
 print(f'CalDB Interface {cal_db_interface}')
 print(f"Proposal: {prop}")
 print(f"Memory cells: {mem_cells}")
 print(f"Runs: {run_high}, {run_med}, {run_low}")
 print(f"Using DB: {db_output}")
 print(f"Input: {in_folder}")
 print(f"Output: {out_folder}")
 print(f"Bias voltage: {bias_voltage_0}V & {bias_voltage_1}V")
 ```

 %% Cell type:code id: tags:

 ``` python
 calcat = CalCatApi(client=calibration_client())
 detector_id = calcat.detector(karabo_id)['id']
 pdus_by_da = calcat.physical_detector_units(detector_id, pdu_snapshot_at=creation_time)
 pdu_name_by_da = {da: p['physical_name'] for (da, p) in pdus_by_da.items()}

 if karabo_da and karabo_da != ['']:
    karabo_da = [da for da in karabo_da if da in pdu_name_by_da]
    pdu_name_by_da = {da: pdu_name_by_da[da] for da in karabo_da}
 else:
    karabo_da = sorted(pdu_name_by_da.keys())

 modules = [int(x.split('/')[-1]) for x in karabo_da]
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Minis in use (1-8) and PDUs:")
 for mod_num, karabo_da_m in zip(modules, karabo_da):
    print(f"Mini {mod_num} -> {pdu_name_by_da[karabo_da_m]}")
 ```

 %% Cell type:markdown id: tags:

 ## Data processing

 %% Cell type:code id: tags:

 ``` python
 parallel_num_procs = min(6, len(modules)*3)
 parallel_num_threads = multiprocessing.cpu_count() // parallel_num_procs

 # the actual characterization
 def characterize_detector(run_path, gg):

    run = RunDirectory(run_path, parallelize=False)
    det_source = source_name.format(karabo_id)
    data = run[det_source, 'image.data'].drop_empty_trains()
    data = data[skip_first_ntrains : skip_first_ntrains + ntrains]
    cell_ids = run[det_source, 'image.cellId'].drop_empty_trains()
    cell_ids = cell_ids[skip_first_ntrains : skip_first_ntrains + ntrains]

    if len(data.train_ids) < min_trains:
        raise Exception(f"Run {run_path} only contains {len(data.train_ids)} trains, but {min_ntrains} required")

    im = data.ndarray()
    if im.ndim > 3:
        im = im[:, 0]  # Drop extra dimension

    cellid = cell_ids.ndarray()
    cellid_pattern = cell_ids[0].ndarray()
    if cellid.ndim > 1:
        cellid = cellid[:, 0]
        cellid_pattern = cellid_pattern[:, 0]

    # Mask off gain bits, leaving only data
    im &= 0b0000111111111111

    im = im.astype(np.float32)

    context = psh.context.ThreadContext(num_workers=parallel_num_threads)
    # Results here should have shape (512, [<= 256], 256)
    offset = context.alloc(shape=(mem_cells,) + im.shape[1:], dtype=np.float64)
    noise = context.alloc(like=offset)
    normal_test = context.alloc(like=offset)

    def process_cell(worker_id, array_index, cc):
        idx = cellid == cc
        im_slice = im[idx]
        if np.any(idx):
            offset[cc] = np.median(im_slice, axis=0)
            noise[cc] = np.std(im_slice, axis=0)
            if test_for_normality:
                _, normal_test[cc] = scipy.stats.normaltest(im_slice, axis=0)
    context.map(process_cell, np.unique(cellid))

    # bad pixels
    bp = np.zeros(offset.shape, np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(1, 2)).reshape(-1, 1, 1)  # add some axes to make broadcasting work
    offset_std = np.nanstd(offset, axis=(1, 2)).reshape(-1, 1, 1)

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[(offset < thresholds_offset_hard[0]) | (
        offset > thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(1, 2)).reshape(-1, 1, 1)  # add some axes to make broadcasting work
    noise_std = np.nanstd(noise, axis=(1, 2)).reshape(-1, 1, 1)

    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[(noise < thresholds_noise_hard[0]) | (
        noise > thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    idx = (cellid == cellid[0])
    return offset, noise, gg, bp, im[idx, 12::32, 12], normal_test, cellid_pattern
 ```

 %% Cell type:code id: tags:

 ``` python
 def modno_to_slice(m):
    return slice((m-1) * 32, m * 32)
 ```

 %% Cell type:code id: tags:

 ``` python
 offset_consts = {m: np.zeros(const_shape, dtype=np.float64) for m in modules}
 noise_consts =  {m: np.zeros(const_shape, dtype=np.float64) for m in modules}
 badpix_consts = {m: np.zeros(const_shape, dtype=np.uint32)  for m in modules}
 normal_tests =  {m: np.zeros(const_shape, dtype=np.float64) for m in modules}
 data_samples = {m: np.full((ntrains, 3), np.nan) for m in modules}  # pixel (12, 12) in first frame of each train

 # Should be the same cell order for all modules & all gain stages
 cellid_pattern_prev = None

 gg = 0
 inp = []

 for gain_i, (gain, run_num) in enumerate(gain_runs.items()):
    run_path = os.path.join(in_folder, f"r{run_num:04d}")
    print("Process run: ", run_path)
    inp.append((run_path, gain_i))

 with multiprocessing.Pool(processes=parallel_num_procs) as pool:
    results = pool.starmap(characterize_detector, inp)

 for ir, r in enumerate(results):
    offset, noise, gg, bp, data, normal, cellid_pattern = r

    if cellid_pattern_prev is not None and not np.array_equal(cellid_pattern, cellid_pattern_prev):
        raise ValueError("Inconsistent cell ID pattern between gain stages")
    cellid_pattern_prev = cellid_pattern

    # Split results up by module
    for m in modules:
        mod_slice = modno_to_slice(m)
        offset_consts[m][..., gg] = offset[:, mod_slice]
        noise_consts[m][..., gg] = noise[:, mod_slice]
        badpix_consts[m][..., gg] = bp[:, mod_slice]
-        data_samples[m][..., gg] = data[:, m - 1]
+        data_samples[m][..., gg][:data.shape[0], ...] = data[:, m - 1]
        normal_tests[m][..., gg] = normal[:, mod_slice]

    print(f"{gain_names[gg]} gain. "
          f"Number of processed trains per cell: {data.shape[0]}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read report path and create file location tuple to add with the injection
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_low, run_med, run_high)

 report = get_report(metadata_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve existing constants for comparison
 clist = ["Offset", "Noise", "BadPixelsDark"]
 old_const = {}
 old_mdata = {}

 print('Retrieve pre-existing constants for comparison.')
 cap = capacitor_settings[0]

 for mod_num, karabo_da_m in zip(modules, karabo_da):
    db_module = pdu_name_by_da[karabo_da_m]
    old_const[mod_num] = {}
    old_mdata[mod_num] = {}

    if inject_cell_order:
        mem_cell_order = ",".join([str(c) for c in cellid_pattern]) + ","
    else:
        mem_cell_order = None

    # mod_num is from 1 to 8, so b_v_0 applies to odd numbers
    bias_voltage = bias_voltage_0 if (mod_num % 2 == 1) else bias_voltage_1
    condition = Conditions.Dark.LPD(
        memory_cells=mem_cells,
        pixels_x=256,
        pixels_y=32,
        bias_voltage=bias_voltage,
        capacitor=cap,
        memory_cell_order=mem_cell_order,
    )
    for const in clist:
        constant = getattr(Constants.LPD, const)()

        data, mdata = get_from_db(karabo_id, karabo_da_m,
                                  constant,
                                  condition, None,
                                  cal_db_interface,
                                  creation_time=creation_time,
                                  verbosity=2, timeout=cal_db_timeout)

        old_const[mod_num][const] = data

        if mdata is None or data is None:
            old_mdata[mod_num][const] = {
                "timestamp": "Not found",
                "filepath": None,
                "h5path": None
            }
        else:
            old_mdata[mod_num][const] = {
                "timestamp": mdata.calibration_constant_version.begin_at.isoformat(),
                "filepath": os.path.join(
                    mdata.calibration_constant_version.hdf5path,
                    mdata.calibration_constant_version.filename
                ),
                "h5path": mdata.calibration_constant_version.h5path,
            }

    with open(f"{out_folder}/module_metadata_{mod_num}.yml","w") as fd:
        yaml.safe_dump(
            {
                "module": mod_num,
                "pdu": db_module,
                "old-constants": old_mdata[mod_num]
            }, fd)
 ```

 %% Cell type:code id: tags:

 ``` python
 const_types = {
    'Offset': offset_consts,
    'Noise': noise_consts,
    'BadPixelsDark': badpix_consts,
 }
 ```

 %% Cell type:code id: tags:

 ``` python
 # Save constants in the calibration DB
 md = None
 cap = capacitor_settings[0]
 for mod_num, karabo_da_m in zip(modules, karabo_da):
    db_module = pdu_name_by_da[karabo_da_m]
    print(f"Storing constants for PDU {db_module}")

    if inject_cell_order:
        mem_cell_order = ",".join([str(c) for c in cellid_pattern]) + ","
    else:
        mem_cell_order = None

    # Do not store empty constants
    # In case of 0 trains data_g is initiated with nans and never refilled.
    if np.count_nonzero(~np.isnan(data_samples[mod_num]))==0:
        print(f"Constant ({cap}, {mod_num}) would be empty, skipping saving")
        continue

    for const_name, const_dict in const_types.items():

        dconst = getattr(Constants.LPD, const_name)()
        dconst.data = const_dict[mod_num]

        # mod_num is from 1 to 8, so b_v_0 applies to odd numbers
        bias_voltage = bias_voltage_0 if (mod_num % 2 == 1) else bias_voltage_1

        # set the operating condition
        condition = Conditions.Dark.LPD(
            memory_cells=mem_cells,
            pixels_x=256,
            pixels_y=32,
            bias_voltage=bias_voltage,
            capacitor=cap,
            memory_cell_order=mem_cell_order,
       )

        if db_output:
            md = send_to_db(db_module, karabo_id, dconst, condition,
                            file_loc, report_path=report,
                            cal_db_interface=cal_db_interface,
                            creation_time=creation_time,
                            timeout=cal_db_timeout)

        if local_output:
            md = save_const_to_h5(db_module, karabo_id, dconst, condition,
                                  dconst.data, file_loc, report, creation_time, out_folder)
            print(f"Calibration constant {const_name} is stored locally.\n")

    print("Constants parameter conditions are:\n")
    print(f"• memory_cells: {mem_cells}\n"
          f"• bias_voltage: {bias_voltage}\n"
          f"• capacitor: {cap}\n"
          f"• memory cell order: {mem_cell_order}\n"
          f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:markdown id: tags:

 ## Raw pedestal distribution ##

 Distribution of a pedestal (ADUs) over trains for the pixel (12,12), memory cell 12. A median of the distribution is shown in yellow. A standard deviation is shown in red. The green line shows average over all pixels for a given memory cell and gain stage.

 %% Cell type:code id: tags:

 ``` python
 for i in modules:
    fig, grid = plt.subplots(3, 1, sharex="col", sharey="row", figsize=(10, 7))
    fig.suptitle(f"Module {i}")
    fig.subplots_adjust(wspace=0, hspace=0)

    if np.count_nonzero(~np.isnan(data_samples[i])) == 0:
        break
    for gain in range(3):
        data = data_samples[i][:, gain]
        offset = np.nanmedian(data)
        noise = np.nanstd(data)
        xrange = [np.nanmin(data_samples[i]), np.nanmax(data_samples[i])]
        if xrange[1] == xrange[0]:
            xrange = [0, xrange[0]+xrange[0]//2]
            nbins = data_samples[i].shape[0]
        else:
            nbins = int(xrange[1] - xrange[0])

        hn, cn = np.histogram(data, bins=nbins, range=xrange)

        grid[gain].hist(data, range=xrange, bins=nbins)
        grid[gain].plot([offset-noise, offset-noise], [0, np.nanmax(hn)],
                        linewidth=1.5, color='red',
                        label='1 $\sigma$ deviation')
        grid[gain].plot([offset+noise, offset+noise],
                        [0, np.nanmax(hn)], linewidth=1.5, color='red')
        grid[gain].plot([offset, offset], [0, 0],
                        linewidth=1.5, color='y', label='median')

        grid[gain].plot([np.nanmedian(offset_consts[i][:, :, 12, gain]),
                         np.nanmedian(offset_consts[i][:, :, 12, gain])],
                        [0, np.nanmax(hn)], linewidth=1.5, color='green',
                        label='average over pixels')

        grid[gain].set_xlim(xrange)
        grid[gain].set_ylim(0, np.nanmax(hn)*1.1)
        grid[gain].set_xlabel("Offset value [ADU]")
        grid[gain].set_ylabel("# of occurance")

        if gain == 0:
            leg = grid[gain].legend(
                loc='upper center', ncol=3,
                bbox_to_anchor=(0.1, 0.25, 0.7, 1.0))

        grid[gain].text(xrange[0], np.nanmax(hn)*0.4,
                        "{} gain".format(gain_names[gain]), fontsize=20)

        a = plt.axes([.125, .1, 0.775, .8], frame_on=False)
        a.patch.set_alpha(0.05)
        a.set_xlim(xrange)
        plt.plot([offset, offset], [0, 1], linewidth=1.5, color='y')
        plt.xticks([])
        plt.yticks([])

    ypos = 0.9
    x1pos = (np.nanmedian(data_samples[i][:, 0]) +
             np.nanmedian(data_samples[i][:, 2]))/2.
    x2pos = (np.nanmedian(data_samples[i][:, 2]) +
             np.nanmedian(data_samples[i][:, 1]))/2.-10

    plt.annotate("", xy=(np.nanmedian(data_samples[i][:, 0]), ypos), xycoords='data',
                 xytext=(np.nanmedian(data_samples[i][:, 2]), ypos), textcoords='data',
                 arrowprops=dict(arrowstyle="<->", connectionstyle="arc3"))

    plt.annotate('{}'.format(np.nanmedian(data_samples[i][:, 0])-np.nanmedian(data_samples[i][:, 2])),
                 xy=(x1pos, ypos), xycoords='data', xytext=(5, 5), textcoords='offset points')

    plt.annotate("", xy=(np.nanmedian(data_samples[i][:, 2]), ypos), xycoords='data',
                 xytext=(np.nanmedian(data_samples[i][:, 1]), ypos), textcoords='data',
                 arrowprops=dict(arrowstyle="<->", connectionstyle="arc3"))

    plt.annotate('{}'.format(np.nanmedian(data_samples[i][:, 2])-np.nanmedian(data_samples[i][:, 1])),
                 xy=(x2pos, ypos), xycoords='data', xytext=(5, 5), textcoords='offset points')


    plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 if not test_for_normality:
    print('Normality test was not requested. Flag `test_for_normality` False')
 else:
    for i in modules:
        data = np.copy(normal_tests[i])
        data[badpix_consts[i] > 0] = 1.01

        hn,cn = np.histogram(data[:,:,:,0], bins=100)

        d = [{'x': np.arange(100)*0.01+0.01,
              'y': np.histogram(data[:,:,:,0], bins=100)[0],
              'drawstyle': 'steps-pre',
              'label' : 'High gain',
              },
             {'x': np.arange(100)*0.01+0.01,
              'y': np.histogram(data[:,:,:,1], bins=100)[0],
              'drawstyle': 'steps-pre',
              'label' : 'Medium gain',
              },
             {'x': np.arange(100)*0.01+0.01,
              'y': np.histogram(data[:,:,:,2], bins=100)[0],
              'drawstyle': 'steps-pre',
              'label' : 'Low gain',
              },
            ]


        fig = plt.figure(figsize=(15,15), tight_layout={'pad': 0.5, 'w_pad': 0.3})

        for gain in range(3):
            ax = fig.add_subplot(2, 2, 1+gain)
            heatmapPlot(data[:,:,12,gain], add_panels=False, cmap='viridis', figsize=(10,10),
                y_label='Rows', x_label='Columns',
                lut_label='p-Value',
                use_axis=ax,
                title='p-Value for cell 12, {} gain'.format(gain_names[gain]) )

        ax = fig.add_subplot(2, 2, 4)
        _ = simplePlot(d, #aspect=1.6,
                              x_label = "p-Value".format(gain),
                              y_label="# of occurance",
                              use_axis=ax,
                               y_log=False, legend='outside-top-ncol3-frame', legend_pad=0.05, legend_size='5%')
        ax.ticklabel_format(style='sci', axis='y', scilimits=(4,6))

 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on a sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:code id: tags:

 ``` python
 for gain in range(3):
    display(
        Markdown('### Cell-12 overview - {} gain'.format(gain_names[gain])))

    fig = plt.figure(figsize=(18, 22) , tight_layout={'pad': 0.1, 'w_pad': 0.1})
    for i in modules:
        for iconst, const in enumerate(['Offset', 'Noise', 'BadPixelsDark']):

            ax = fig.add_subplot(3, 2, 1+iconst)

            data = const_types[const][i][12, :, :, gain]
            vmax = 1.5 * np.nanmedian(data)
            title = const
            label = f'{const} value [ADU]'
            title = f'{const} value'
            if const == 'BadPixelsDark':
                vmax = 4
                bpix_code = data.astype(np.float32)
                bpix_code[bpix_code == 0] = np.nan
                title = 'Bad pixel code'
                label = title

                cb_labels = ['1 {}'.format(BadPixels.NOISE_OUT_OF_THRESHOLD.name),
                             '2 {}'.format(BadPixels.OFFSET_NOISE_EVAL_ERROR.name),
                             '3 {}'.format(BadPixels.OFFSET_OUT_OF_THRESHOLD.name),
                             '4 {}'.format('MIXED')]

                heatmapPlot(bpix_code, add_panels=False, cmap='viridis',
                            y_label='Rows', x_label='Columns',
                            lut_label='', vmax=vmax, aspect=1.,
                            use_axis=ax, cb_ticklabels=cb_labels, cb_ticks = np.arange(4)+1, cb_loc='bottom',
                            title=title)
                del bpix_code
            else:

                heatmapPlot(data, add_panels=False, cmap='viridis',
                            y_label='Rows', x_label='Columns',
                            lut_label=label, vmax=vmax, aspect=1.,
                            use_axis=ax, cb_loc='bottom',
                            title=title)

    for i in modules:
        for iconst, const in enumerate(['Offset', 'Noise']):
            data = const_types[const][i]
            dataBP = np.copy(data)
            dataBP[badpix_consts[i] > 0] = -1

            x_ranges = [[0, 1500], [0, 40]]
            hn, cn = np.histogram(
                data[:, :, :, gain], bins=100, range=x_ranges[iconst])
            hnBP, cnBP = np.histogram(dataBP[:, :, :, gain], bins=cn)

            d = [{'x': cn[:-1],
                  'y': hn,
                  'drawstyle': 'steps-pre',
                  'label': 'All data',
                  },
                 {'x': cnBP[:-1],
                  'y': hnBP,
                  'drawstyle': 'steps-pre',
                  'label': 'Bad pixels masked',
                  },
                 ]

            ax = fig.add_subplot(3, 2, 5+iconst)
            _ = simplePlot(d, figsize=(5, 7), aspect=1,
                                x_label=f"{const} value [ADU]",
                                y_label="# of occurence",
                                title='', legend_pad=0.1, legend_size='10%',
                                use_axis=ax,
                                y_log=True, legend='outside-top-2col-frame')

    plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:


 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.NOISE_OUT_OF_THRESHOLD.value: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR.value: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value | BadPixels.NOISE_OUT_OF_THRESHOLD.value: ('MIXED', '#DD00DD80')}

 if high_res_badpix_3d:
    display(Markdown("""

    ## Global Bad Pixel Behaviour ##

    The following plots shows the results of a bad pixel evaluation for all evaluated memory cells.
    Cells are stacked in the Z-dimension, while pixels values in x/y are re-binned with a factor of 2.
    This excludes single bad pixels present only in disconnected pixels.
    Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
    Colors encode the bad pixel type, or mixed type.

        """))
    # Switch rebin to 1 for full resolution and
    # no interpolation for badpixel values.
    rebin = 2
    for gain in range(3):
        display(Markdown('### Bad pixel behaviour - {} gain ###'.format(gain_names[gain])))
        for mod, data in badpix_consts.items():
            plot_badpix_3d(data[...,gain], cols, title='', rebin_fac=rebin)
            ax = plt.gca()
            leg = ax.get_legend()
            leg.set(alpha=0.5)
        plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Summary across tiles ##

 Plots give an overview of calibration constants averaged across tiles. A bad pixel mask is applied. Constants are compared with pre-existing constants retrieved from the calibration database. Differences $\Delta$ between the old and new constants is shown.

 %% Cell type:code id: tags:

 ``` python
 time_summary = []
 time_summary.append(f"The following pre-existing constants are used for comparison:")
 for mod_num, mod_data in old_mdata.items():
    time_summary.append(f"- Module {mod_num}")
    for const, const_data in mod_data.items():
        time_summary.append(f"    - {const} created at {const_data['timestamp']}")
 display(Markdown("\n".join(time_summary)))
 ```

 %% Cell type:code id: tags:

 ``` python
 for i in modules:
    for gain in range(3):
        display(Markdown('### Summary across tiles - {} gain'.format(gain_names[gain])))

        for const in const_types:
            data = np.copy(const_types[const][i][:, :, :, gain])

            label = 'Fraction of bad pixels'

            if const != 'BadPixelsDark':
                data[badpix_consts[i][:, :, :, gain] > 0] = np.nan
                label = '{} value [ADU]'.format(const)
            else:
                data[data>0] = 1.0

            # Split tiles, making shape (mem_cells, slow_scan, tiles(=2), fast_scan)
            data = data.reshape(data.shape[0], 32, 2, 128)
            # Average within each tile
            data = np.nanmean(data, axis=(1, 3))

            fig = plt.figure(figsize=(15, 6))
            ax = fig.add_subplot(1, 2, 1)

            _ = heatmapPlot(data[:510, :], add_panels=True,
                            y_label='Memory Cell ID', x_label='Tile ID',
                            lut_label=label, use_axis=ax,
                            panel_y_label=label, panel_x_label=label,
                            cmap='viridis',  # cb_loc='right',cb_aspect=15,
                            x_ticklabels=np.arange(2)+1,
                            x_ticks=np.arange(2)+0.5)

            if old_const[i][const] is not None:
                ax = fig.add_subplot(1, 2, 2)

                dataold = np.copy(old_const[i][const][:, :, :, gain])

                label = '$\Delta$ {}'.format(label)

                if const != 'BadPixelsDark':
                    if old_const[i]['BadPixelsDark'] is not None:
                        dataold[old_const[i]['BadPixelsDark'][:, :, :, gain] > 0] = np.nan
                    else:
                        dataold[:] = np.nan
                else:
                    dataold[dataold>0]=1.0

                dataold = dataold.reshape(dataold.shape[0], 32, 2, 128)
                dataold = np.nanmean(dataold, axis=(1, 3))
                dataold = dataold - data

                _ = heatmapPlot(dataold[:510, :], add_panels=True,
                                y_label='Memory Cell ID', x_label='Tile ID',
                                lut_label=label, use_axis=ax,
                                panel_y_label=label, panel_x_label=label,
                                cmap='viridis',  # cb_loc='right',cb_aspect=15,
                                x_ticklabels=np.arange(2)+1,
                                x_ticks=np.arange(2)+0.5)
        plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Variation of offset and noise across Tiles and ASICs ##

 The following plots show a standard deviation $\sigma$ of the calibration constant. The plot of standard deviations across tiles show pixels of one tile ($128 \times 32$). Value for each pixel shows a standard deviation across 16 tiles. The standard deviation across ASICs are shown overall tiles. The plot shows pixels of one ASIC ($16 \times 32$), where the value shows a standard deviation across all ACIS of the module.

 %% Cell type:code id: tags:

 ``` python
 for i in modules:
    for gain in range(3):
        display(Markdown('### Variation of offset and noise across ASICs - {} gain'.format(gain_names[gain])))

        fig = plt.figure(figsize=(15, 6))
        for iconst, const in enumerate(['Offset', 'Noise']):
            data = np.copy(const_types[const][i][:, :, :, gain])
            data[badpix_consts[i][:, :, :, gain] > 0] = np.nan
            label = '$\sigma$ {} [ADU]'.format(const)

            dataA = np.nanmean(data, axis=0)  # average over cells
            dataA = dataA.reshape(32, 16, 16)
            dataA = np.nanstd(dataA, axis=1)  # average across ASICs

            ax = fig.add_subplot(1, 2, 1+iconst)
            _ = heatmapPlot(dataA, add_panels=True,
                            y_label='rows', x_label='columns',
                            lut_label=label, use_axis=ax,
                            panel_y_label=label, panel_x_label=label,
                            cmap='viridis'
                            )

        plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 for i in modules:
    for gain in range(3):
        display(Markdown('### Variation of offset and noise across tiles - {} gain'.format(gain_names[gain])))

        fig = plt.figure(figsize=(15, 6))
        for iconst, const in enumerate(['Offset', 'Noise']):
            data = np.copy(const_types[const][i][:, :, :, gain])
            data[badpix_consts[i][:, :, :, gain] > 0] = np.nan
            label = '$\sigma$ {} [ADU]'.format(const)

            dataT = data.reshape(data.shape[0], 32, 2, 128)
            dataT = np.nanstd(dataT, axis=2)
            dataT = np.nanmean(dataT, axis=0)

            ax = fig.add_subplot(121+iconst)
            _ = heatmapPlot(dataT, add_panels=True,
                            y_label='rows', x_label='columns',
                            lut_label=label, use_axis=ax,
                            panel_y_label=label, panel_x_label=label,
                            cmap='viridis')
        plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Aggregate values and per cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per-cell behavior, averaged across pixels.

 %% Cell type:code id: tags:

 ``` python
 for i in modules:
    for gain in range(3):
        display(Markdown('### Mean over pixels - {} gain'.format(gain_names[gain])))

        fig = plt.figure(figsize=(9,11))

        for iconst, const in enumerate(const_types):

            ax = fig.add_subplot(3, 1, 1+iconst)

            data = const_types[const][i][:510,:,:,gain]
            if const == 'BadPixelsDark':
                data[data>0] = 1.0

            dataBP = np.copy(data)
            dataBP[badpix_consts[i][:510,:,:,gain] > 0] = -10

            data = np.nanmean(data, axis=(1, 2))
            dataBP = np.nanmean(dataBP, axis=(1, 2))

            d = [{'y': data,
                  'x': np.arange(data.shape[0]),
                  'drawstyle': 'steps-mid',
                  'label' : 'All data'
                 }
                ]

            if const != 'BadPixelsDark':
                d.append({'y': dataBP,
                  'x': np.arange(data.shape[0]),
                  'drawstyle': 'steps-mid',
                  'label' : 'good pixels only'
                 })
                y_title = f"{const} value [ADU]"
                title = f"{const} value, {gain_names[gain]} gain"
            else:
                y_title = "Fraction of Bad Pixels"
                title = f"Fraction of Bad Pixels, {gain_names[gain]} gain"

            data_min = np.min([data, dataBP]) if const != 'BadPixelsDark' else np.min([data])
            data_max = np.max([data[20:], dataBP[20:]])
            data_dif = data_max - data_min

            local_max = np.max([data[200:300], dataBP[200:300]])
            frac = 0.35
            new_max = (local_max - data_min*(1-frac))/frac
            new_max = np.max([data_max, new_max])

            _ = simplePlot(d, figsize=(10,10), aspect=2, xrange=(-12, 510),
                              x_label = 'Memory Cell ID',
                              y_label=y_title, use_axis=ax,
                              title=title,
                              title_position=[0.5, 1.15],
                              inset='xy-coord-right', inset_x_range=(0,20), inset_indicated=True,
                              inset_labeled=True, inset_coord=[0.2,0.5,0.6,0.95],
                                inset_lw = 1.0, y_range = [data_min-data_dif*0.05, new_max+data_dif*0.05],
                              y_log=False, legend='outside-top-ncol2-frame', legend_size='18%',
                                 legend_pad=0.00)

            plt.tight_layout(pad=1.08, h_pad=0.35)

        plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 table = []
 bits = [BadPixels.NOISE_OUT_OF_THRESHOLD, BadPixels.OFFSET_OUT_OF_THRESHOLD, BadPixels.OFFSET_NOISE_EVAL_ERROR]

 for mod_num in modules:
    for gain in range(3):

        l_data = []
        l_data_old = []

        data = np.copy(badpix_consts[mod_num][:,:,:,gain])
        l_data.append(len(data[data>0].flatten()))
        for bit in bits:
            l_data.append(np.count_nonzero(badpix_consts[mod_num][:,:,:,gain] & bit.value))

        if old_const[mod_num]['BadPixelsDark'] is not None:
            old_const[mod_num]['BadPixelsDark'] = old_const[mod_num]['BadPixelsDark'].astype(np.uint32)
            dataold = np.copy(old_const[mod_num]['BadPixelsDark'][:, :, :, gain])
            l_data_old.append(len(dataold[dataold>0].flatten()))
            for bit in bits:
                l_data_old.append(np.count_nonzero(old_const[mod_num]['BadPixelsDark'][:, :, :, gain] & bit.value))

        l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',
                       'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR']

        l_threshold = ['', f'{thresholds_noise_sigma}', f'{thresholds_offset_sigma}',
                       f'{thresholds_offset_hard}/{thresholds_noise_hard}']

        for i in range(len(l_data)):
            line = [f'{l_data_name[i]}, gain {gain_names[gain]}', l_threshold[i], l_data[i]]

            if old_const[mod_num]['BadPixelsDark'] is not None:
                line += [l_data_old[i]]
            else:
                line += ['-']

            table.append(line)
        table.append(['', '', '', ''])

 display(Markdown('''

 ### Number of bad pixels ###

 One pixel can be bad for different reasons, therefore, the sum of all types of bad pixels can be more than the number of all bad pixels.

 '''))
 if len(table)>0:
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Pixel type", "Threshold",
                                              "New constant", "Old constant"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 for const in ['Offset', 'Noise']:
    table = [['','High gain', 'High gain', 'Medium gain', 'Medium gain', 'Low gain', 'Low gain']]
    for mod_num in modules:

        data = np.copy(const_types[const][mod_num])
        data[badpix_consts[mod_num] > 0] = np.nan

        if old_const[mod_num][const] is not None and old_const[mod_num]['BadPixelsDark'] is not None :
            dataold = np.copy(old_const[mod_num][const])
            dataold[old_const[mod_num]['BadPixelsDark']>0] = np.nan

        f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
        n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

        for i, f in enumerate(f_list):
            line = [n_list[i]]
            for gain in range(3):
                line.append('{:6.1f}'.format(f(data[...,gain])))
                if old_const[mod_num][const] is not None and old_const[mod_num]['BadPixelsDark'] is not None:
                    line.append('{:6.1f}'.format(f(dataold[...,gain])))
                else:
                    line.append('-')

            table.append(line)

    display(Markdown('### {} [ADU], good pixels only ###'.format(const)))
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
 ```

 %% Cell type:markdown id: tags:

 # LPD Mini Offset, Noise and Dead Pixels Characterization #

 Author: M. Karnevskiy, S. Hauf

 This notebook performs re-characterize of dark images to derive offset, noise and bad-pixel maps. All three types of constants are evaluated per-pixel and per-memory cell.

 The notebook will correctly handle veto settings, but note that if you veto cells you will not be able to use these offsets for runs with different veto settings - vetoed cells will have zero offset.

 The evaluated calibration constants are stored locally and injected in the calibration data base.


 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/FXE/202231/p900316/raw/" # path to input data, required
 out_folder = "/gpfs/exfel/data/scratch/kluyvert/darks-lpdmini" # path to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run_high = 10 # run number in which high gain data was recorded, required
 run_med = 11 # run number in which medium gain data was recorded, required
 run_low = 12 # run number in which low gain data was recorded, required

 karabo_id = "FXE_DET_LPD_MINI" # karabo karabo_id
 karabo_da = ['']  # a list of data aggregators names with module number, e.g. 'LPDMINI00/2', Default [''] for selecting all data aggregators
 source_name = "{}/DET/0CH0:xtdf"  # Source name for raw detector data
 control_source_name = "{}/FPGA/FEM"  # Source name for control device

 creation_time = ""  # Override the timestamp taken from the run (format '2023-03-30 15:04:31')
 cal_db_interface = "tcp://max-exfl016:8015#8025" # the database interface to use
 cal_db_timeout = 300000 # timeout on caldb requests
 local_output = True # output constants locally
 db_output = False # output constants to database

 capacitor_setting = 5 # capacitor_setting for which data was taken
 bias_voltage_0 = -1 # bias voltage for minis 1, 3, 5, 7; Setting -1 will read the value from files
 bias_voltage_1 = -1 # bias voltage for minis 2, 4, 6, 8; Setting -1 will read the value from files
 thresholds_offset_sigma = 3. # bad pixel relative threshold in terms of n sigma offset
 thresholds_offset_hard = [400, 1500] # bad pixel hard threshold
 thresholds_noise_sigma = 7. # bad pixel relative threshold in terms of n sigma noise
 thresholds_noise_hard = [1, 35] # bad pixel hard threshold
 skip_first_ntrains = 10 # Number of first trains to skip

 ntrains = 500 # number of trains to use
-min_trains = 370  # minimum number of trains required for each gain stage
+min_trains = 370  # minimum number of trains needed for each gain stage
 high_res_badpix_3d = False # plot bad-pixel summary in high resolution
 test_for_normality = False # permorm normality test
 inject_cell_order = False  # Include memory cell order as part of the detector condition
+operation_mode = ""  # This in given as a default input argument through the webservice in production. Don't remove!
 ```

 %% Cell type:code id: tags:

 ``` python
 import multiprocessing
 import os
 import warnings

 warnings.filterwarnings('ignore')

 import matplotlib
 import pasha as psh
 import scipy.stats
 from IPython.display import Latex, Markdown, display

 matplotlib.use("agg")
 import matplotlib.pyplot as plt

 %matplotlib inline
 import numpy as np
 import tabulate
 import yaml
 from extra_data import RunDirectory
 from iCalibrationDB import Conditions, Constants
 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.simpleplot import simplePlot

 from cal_tools.calcat_interface import CalCatApi
 from cal_tools.enums import BadPixels
 from cal_tools.plotting import plot_badpix_3d
 from cal_tools.restful_config import calibration_client
 from cal_tools.tools import (
    calcat_creation_time,
    get_from_db,
    get_report,
    run_prop_seq_from_path,
    save_const_to_h5,
    send_to_db,
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 mem_cells = 512
 gain_names = ['High', 'Medium', 'Low']
 const_shape = (mem_cells, 32, 256, 3)  # cells, slow_scan, fast_scan, gain

 gain_runs = {}
 if capacitor_setting == 5:
    gain_runs["high_5pf"] = run_high
    gain_runs["med_5pf"] =  run_med
    gain_runs["low_5pf"] =  run_low
 elif capacitor_setting == 50:
    gain_runs["high_50pf"] = run_high
    gain_runs["med_50pf"] =  run_med
    gain_runs["low_50pf"] =  run_low

 capacitor_settings = [capacitor_setting]
 capacitor_settings = ['{}pf'.format(c) for c in capacitor_settings]

 creation_time = calcat_creation_time(in_folder, run_high, creation_time)
 print(f"Using {creation_time} as creation time")

 source_name = source_name.format(karabo_id)
 control_source_name = control_source_name.format(karabo_id)

 if -1 in {bias_voltage_0, bias_voltage_1}:
    run_data = RunDirectory(os.path.join(in_folder, f"r{run_high:04d}"))
    if bias_voltage_0 == -1:
        bias_voltage_0 = run_data[control_source_name, 'sensorBiasVoltage0'].as_single_value(atol=5.)
    if bias_voltage_1 == -1:
        bias_voltage_1 = run_data[control_source_name, 'sensorBiasVoltage1'].as_single_value(atol=5.)

 run, prop, seq = run_prop_seq_from_path(in_folder)

 display(Markdown('## Evaluated parameters'))
 print(f'CalDB Interface {cal_db_interface}')
 print(f"Proposal: {prop}")
 print(f"Memory cells: {mem_cells}")
 print(f"Runs: {run_high}, {run_med}, {run_low}")
 print(f"Using DB: {db_output}")
 print(f"Input: {in_folder}")
 print(f"Output: {out_folder}")
 print(f"Bias voltage: {bias_voltage_0}V & {bias_voltage_1}V")
 ```

 %% Cell type:code id: tags:

 ``` python
 calcat = CalCatApi(client=calibration_client())
 detector_id = calcat.detector(karabo_id)['id']
 pdus_by_da = calcat.physical_detector_units(detector_id, pdu_snapshot_at=creation_time)
 pdu_name_by_da = {da: p['physical_name'] for (da, p) in pdus_by_da.items()}

 if karabo_da and karabo_da != ['']:
    karabo_da = [da for da in karabo_da if da in pdu_name_by_da]
    pdu_name_by_da = {da: pdu_name_by_da[da] for da in karabo_da}
 else:
    karabo_da = sorted(pdu_name_by_da.keys())

 modules = [int(x.split('/')[-1]) for x in karabo_da]
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Minis in use (1-8) and PDUs:")
 for mod_num, karabo_da_m in zip(modules, karabo_da):
    print(f"Mini {mod_num} -> {pdu_name_by_da[karabo_da_m]}")
 ```

 %% Cell type:markdown id: tags:

 ## Data processing

 %% Cell type:code id: tags:

 ``` python
 parallel_num_procs = min(6, len(modules)*3)
 parallel_num_threads = multiprocessing.cpu_count() // parallel_num_procs

 # the actual characterization
 def characterize_detector(run_path, gg):

    run = RunDirectory(run_path, parallelize=False)
    det_source = source_name.format(karabo_id)
    data = run[det_source, 'image.data'].drop_empty_trains()
    data = data[skip_first_ntrains : skip_first_ntrains + ntrains]
    cell_ids = run[det_source, 'image.cellId'].drop_empty_trains()
    cell_ids = cell_ids[skip_first_ntrains : skip_first_ntrains + ntrains]

    if len(data.train_ids) < min_trains:
        raise Exception(f"Run {run_path} only contains {len(data.train_ids)} trains, but {min_ntrains} required")

    im = data.ndarray()
    if im.ndim > 3:
        im = im[:, 0]  # Drop extra dimension

    cellid = cell_ids.ndarray()
    cellid_pattern = cell_ids[0].ndarray()
    if cellid.ndim > 1:
        cellid = cellid[:, 0]
        cellid_pattern = cellid_pattern[:, 0]

    # Mask off gain bits, leaving only data
    im &= 0b0000111111111111

    im = im.astype(np.float32)

    context = psh.context.ThreadContext(num_workers=parallel_num_threads)
    # Results here should have shape (512, [<= 256], 256)
    offset = context.alloc(shape=(mem_cells,) + im.shape[1:], dtype=np.float64)
    noise = context.alloc(like=offset)
    normal_test = context.alloc(like=offset)

    def process_cell(worker_id, array_index, cc):
        idx = cellid == cc
        im_slice = im[idx]
        if np.any(idx):
            offset[cc] = np.median(im_slice, axis=0)
            noise[cc] = np.std(im_slice, axis=0)
            if test_for_normality:
                _, normal_test[cc] = scipy.stats.normaltest(im_slice, axis=0)
    context.map(process_cell, np.unique(cellid))

    # bad pixels
    bp = np.zeros(offset.shape, np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(1, 2)).reshape(-1, 1, 1)  # add some axes to make broadcasting work
    offset_std = np.nanstd(offset, axis=(1, 2)).reshape(-1, 1, 1)

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[(offset < thresholds_offset_hard[0]) | (
        offset > thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(1, 2)).reshape(-1, 1, 1)  # add some axes to make broadcasting work
    noise_std = np.nanstd(noise, axis=(1, 2)).reshape(-1, 1, 1)

    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[(noise < thresholds_noise_hard[0]) | (
        noise > thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    idx = (cellid == cellid[0])
    return offset, noise, gg, bp, im[idx, 12::32, 12], normal_test, cellid_pattern
 ```

 %% Cell type:code id: tags:

 ``` python
 def modno_to_slice(m):
    return slice((m-1) * 32, m * 32)
 ```

 %% Cell type:code id: tags:

 ``` python
 offset_consts = {m: np.zeros(const_shape, dtype=np.float64) for m in modules}
 noise_consts =  {m: np.zeros(const_shape, dtype=np.float64) for m in modules}
 badpix_consts = {m: np.zeros(const_shape, dtype=np.uint32)  for m in modules}
 normal_tests =  {m: np.zeros(const_shape, dtype=np.float64) for m in modules}
 data_samples = {m: np.full((ntrains, 3), np.nan) for m in modules}  # pixel (12, 12) in first frame of each train

 # Should be the same cell order for all modules & all gain stages
 cellid_pattern_prev = None

 gg = 0
 inp = []

 for gain_i, (gain, run_num) in enumerate(gain_runs.items()):
    run_path = os.path.join(in_folder, f"r{run_num:04d}")
    print("Process run: ", run_path)
    inp.append((run_path, gain_i))

 with multiprocessing.Pool(processes=parallel_num_procs) as pool:
    results = pool.starmap(characterize_detector, inp)

 for ir, r in enumerate(results):
    offset, noise, gg, bp, data, normal, cellid_pattern = r

    if cellid_pattern_prev is not None and not np.array_equal(cellid_pattern, cellid_pattern_prev):
        raise ValueError("Inconsistent cell ID pattern between gain stages")
    cellid_pattern_prev = cellid_pattern

    # Split results up by module
    for m in modules:
        mod_slice = modno_to_slice(m)
        offset_consts[m][..., gg] = offset[:, mod_slice]
        noise_consts[m][..., gg] = noise[:, mod_slice]
        badpix_consts[m][..., gg] = bp[:, mod_slice]
-        data_samples[m][..., gg] = data[:, m - 1]
+        data_samples[m][..., gg][:data.shape[0], ...] = data[:, m - 1]
        normal_tests[m][..., gg] = normal[:, mod_slice]

    print(f"{gain_names[gg]} gain. "
          f"Number of processed trains per cell: {data.shape[0]}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read report path and create file location tuple to add with the injection
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_low, run_med, run_high)

 report = get_report(metadata_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve existing constants for comparison
 clist = ["Offset", "Noise", "BadPixelsDark"]
 old_const = {}
 old_mdata = {}

 print('Retrieve pre-existing constants for comparison.')
 cap = capacitor_settings[0]

 for mod_num, karabo_da_m in zip(modules, karabo_da):
    db_module = pdu_name_by_da[karabo_da_m]
    old_const[mod_num] = {}
    old_mdata[mod_num] = {}

    if inject_cell_order:
        mem_cell_order = ",".join([str(c) for c in cellid_pattern]) + ","
    else:
        mem_cell_order = None

    # mod_num is from 1 to 8, so b_v_0 applies to odd numbers
    bias_voltage = bias_voltage_0 if (mod_num % 2 == 1) else bias_voltage_1
    condition = Conditions.Dark.LPD(
        memory_cells=mem_cells,
        pixels_x=256,
        pixels_y=32,
        bias_voltage=bias_voltage,
        capacitor=cap,
        memory_cell_order=mem_cell_order,
    )
    for const in clist:
        constant = getattr(Constants.LPD, const)()

        data, mdata = get_from_db(karabo_id, karabo_da_m,
                                  constant,
                                  condition, None,
                                  cal_db_interface,
                                  creation_time=creation_time,
                                  verbosity=2, timeout=cal_db_timeout)

        old_const[mod_num][const] = data

        if mdata is None or data is None:
            old_mdata[mod_num][const] = {
                "timestamp": "Not found",
                "filepath": None,
                "h5path": None
            }
        else:
            old_mdata[mod_num][const] = {
                "timestamp": mdata.calibration_constant_version.begin_at.isoformat(),
                "filepath": os.path.join(
                    mdata.calibration_constant_version.hdf5path,
                    mdata.calibration_constant_version.filename
                ),
                "h5path": mdata.calibration_constant_version.h5path,
            }

    with open(f"{out_folder}/module_metadata_{mod_num}.yml","w") as fd:
        yaml.safe_dump(
            {
                "module": mod_num,
                "pdu": db_module,
                "old-constants": old_mdata[mod_num]
            }, fd)
 ```

 %% Cell type:code id: tags:

 ``` python
 const_types = {
    'Offset': offset_consts,
    'Noise': noise_consts,
    'BadPixelsDark': badpix_consts,
 }
 ```

 %% Cell type:code id: tags:

 ``` python
 # Save constants in the calibration DB
 md = None
 cap = capacitor_settings[0]
 for mod_num, karabo_da_m in zip(modules, karabo_da):
    db_module = pdu_name_by_da[karabo_da_m]
    print(f"Storing constants for PDU {db_module}")

    if inject_cell_order:
        mem_cell_order = ",".join([str(c) for c in cellid_pattern]) + ","
    else:
        mem_cell_order = None

    # Do not store empty constants
    # In case of 0 trains data_g is initiated with nans and never refilled.
    if np.count_nonzero(~np.isnan(data_samples[mod_num]))==0:
        print(f"Constant ({cap}, {mod_num}) would be empty, skipping saving")
        continue

    for const_name, const_dict in const_types.items():

        dconst = getattr(Constants.LPD, const_name)()
        dconst.data = const_dict[mod_num]

        # mod_num is from 1 to 8, so b_v_0 applies to odd numbers
        bias_voltage = bias_voltage_0 if (mod_num % 2 == 1) else bias_voltage_1

        # set the operating condition
        condition = Conditions.Dark.LPD(
            memory_cells=mem_cells,
            pixels_x=256,
            pixels_y=32,
            bias_voltage=bias_voltage,
            capacitor=cap,
            memory_cell_order=mem_cell_order,
       )

        if db_output:
            md = send_to_db(db_module, karabo_id, dconst, condition,
                            file_loc, report_path=report,
                            cal_db_interface=cal_db_interface,
                            creation_time=creation_time,
                            timeout=cal_db_timeout)

        if local_output:
            md = save_const_to_h5(db_module, karabo_id, dconst, condition,
                                  dconst.data, file_loc, report, creation_time, out_folder)
            print(f"Calibration constant {const_name} is stored locally.\n")

    print("Constants parameter conditions are:\n")
    print(f"• memory_cells: {mem_cells}\n"
          f"• bias_voltage: {bias_voltage}\n"
          f"• capacitor: {cap}\n"
          f"• memory cell order: {mem_cell_order}\n"
          f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:markdown id: tags:

 ## Raw pedestal distribution ##

 Distribution of a pedestal (ADUs) over trains for the pixel (12,12), memory cell 12. A median of the distribution is shown in yellow. A standard deviation is shown in red. The green line shows average over all pixels for a given memory cell and gain stage.

 %% Cell type:code id: tags:

 ``` python
 for i in modules:
    fig, grid = plt.subplots(3, 1, sharex="col", sharey="row", figsize=(10, 7))
    fig.suptitle(f"Module {i}")
    fig.subplots_adjust(wspace=0, hspace=0)

    if np.count_nonzero(~np.isnan(data_samples[i])) == 0:
        break
    for gain in range(3):
        data = data_samples[i][:, gain]
        offset = np.nanmedian(data)
        noise = np.nanstd(data)
        xrange = [np.nanmin(data_samples[i]), np.nanmax(data_samples[i])]
        if xrange[1] == xrange[0]:
            xrange = [0, xrange[0]+xrange[0]//2]
            nbins = data_samples[i].shape[0]
        else:
            nbins = int(xrange[1] - xrange[0])

        hn, cn = np.histogram(data, bins=nbins, range=xrange)

        grid[gain].hist(data, range=xrange, bins=nbins)
        grid[gain].plot([offset-noise, offset-noise], [0, np.nanmax(hn)],
                        linewidth=1.5, color='red',
                        label='1 $\sigma$ deviation')
        grid[gain].plot([offset+noise, offset+noise],
                        [0, np.nanmax(hn)], linewidth=1.5, color='red')
        grid[gain].plot([offset, offset], [0, 0],
                        linewidth=1.5, color='y', label='median')

        grid[gain].plot([np.nanmedian(offset_consts[i][:, :, 12, gain]),
                         np.nanmedian(offset_consts[i][:, :, 12, gain])],
                        [0, np.nanmax(hn)], linewidth=1.5, color='green',
                        label='average over pixels')

        grid[gain].set_xlim(xrange)
        grid[gain].set_ylim(0, np.nanmax(hn)*1.1)
        grid[gain].set_xlabel("Offset value [ADU]")
        grid[gain].set_ylabel("# of occurance")

        if gain == 0:
            leg = grid[gain].legend(
                loc='upper center', ncol=3,
                bbox_to_anchor=(0.1, 0.25, 0.7, 1.0))

        grid[gain].text(xrange[0], np.nanmax(hn)*0.4,
                        "{} gain".format(gain_names[gain]), fontsize=20)

        a = plt.axes([.125, .1, 0.775, .8], frame_on=False)
        a.patch.set_alpha(0.05)
        a.set_xlim(xrange)
        plt.plot([offset, offset], [0, 1], linewidth=1.5, color='y')
        plt.xticks([])
        plt.yticks([])

    ypos = 0.9
    x1pos = (np.nanmedian(data_samples[i][:, 0]) +
             np.nanmedian(data_samples[i][:, 2]))/2.
    x2pos = (np.nanmedian(data_samples[i][:, 2]) +
             np.nanmedian(data_samples[i][:, 1]))/2.-10

    plt.annotate("", xy=(np.nanmedian(data_samples[i][:, 0]), ypos), xycoords='data',
                 xytext=(np.nanmedian(data_samples[i][:, 2]), ypos), textcoords='data',
                 arrowprops=dict(arrowstyle="<->", connectionstyle="arc3"))

    plt.annotate('{}'.format(np.nanmedian(data_samples[i][:, 0])-np.nanmedian(data_samples[i][:, 2])),
                 xy=(x1pos, ypos), xycoords='data', xytext=(5, 5), textcoords='offset points')

    plt.annotate("", xy=(np.nanmedian(data_samples[i][:, 2]), ypos), xycoords='data',
                 xytext=(np.nanmedian(data_samples[i][:, 1]), ypos), textcoords='data',
                 arrowprops=dict(arrowstyle="<->", connectionstyle="arc3"))

    plt.annotate('{}'.format(np.nanmedian(data_samples[i][:, 2])-np.nanmedian(data_samples[i][:, 1])),
                 xy=(x2pos, ypos), xycoords='data', xytext=(5, 5), textcoords='offset points')


    plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 if not test_for_normality:
    print('Normality test was not requested. Flag `test_for_normality` False')
 else:
    for i in modules:
        data = np.copy(normal_tests[i])
        data[badpix_consts[i] > 0] = 1.01

        hn,cn = np.histogram(data[:,:,:,0], bins=100)

        d = [{'x': np.arange(100)*0.01+0.01,
              'y': np.histogram(data[:,:,:,0], bins=100)[0],
              'drawstyle': 'steps-pre',
              'label' : 'High gain',
              },
             {'x': np.arange(100)*0.01+0.01,
              'y': np.histogram(data[:,:,:,1], bins=100)[0],
              'drawstyle': 'steps-pre',
              'label' : 'Medium gain',
              },
             {'x': np.arange(100)*0.01+0.01,
              'y': np.histogram(data[:,:,:,2], bins=100)[0],
              'drawstyle': 'steps-pre',
              'label' : 'Low gain',
              },
            ]


        fig = plt.figure(figsize=(15,15), tight_layout={'pad': 0.5, 'w_pad': 0.3})

        for gain in range(3):
            ax = fig.add_subplot(2, 2, 1+gain)
            heatmapPlot(data[:,:,12,gain], add_panels=False, cmap='viridis', figsize=(10,10),
                y_label='Rows', x_label='Columns',
                lut_label='p-Value',
                use_axis=ax,
                title='p-Value for cell 12, {} gain'.format(gain_names[gain]) )

        ax = fig.add_subplot(2, 2, 4)
        _ = simplePlot(d, #aspect=1.6,
                              x_label = "p-Value".format(gain),
                              y_label="# of occurance",
                              use_axis=ax,
                               y_log=False, legend='outside-top-ncol3-frame', legend_pad=0.05, legend_size='5%')
        ax.ticklabel_format(style='sci', axis='y', scilimits=(4,6))

 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on a sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:code id: tags:

 ``` python
 for gain in range(3):
    display(
        Markdown('### Cell-12 overview - {} gain'.format(gain_names[gain])))

    fig = plt.figure(figsize=(18, 22) , tight_layout={'pad': 0.1, 'w_pad': 0.1})
    for i in modules:
        for iconst, const in enumerate(['Offset', 'Noise', 'BadPixelsDark']):

            ax = fig.add_subplot(3, 2, 1+iconst)

            data = const_types[const][i][12, :, :, gain]
            vmax = 1.5 * np.nanmedian(data)
            title = const
            label = f'{const} value [ADU]'
            title = f'{const} value'
            if const == 'BadPixelsDark':
                vmax = 4
                bpix_code = data.astype(np.float32)
                bpix_code[bpix_code == 0] = np.nan
                title = 'Bad pixel code'
                label = title

                cb_labels = ['1 {}'.format(BadPixels.NOISE_OUT_OF_THRESHOLD.name),
                             '2 {}'.format(BadPixels.OFFSET_NOISE_EVAL_ERROR.name),
                             '3 {}'.format(BadPixels.OFFSET_OUT_OF_THRESHOLD.name),
                             '4 {}'.format('MIXED')]

                heatmapPlot(bpix_code, add_panels=False, cmap='viridis',
                            y_label='Rows', x_label='Columns',
                            lut_label='', vmax=vmax, aspect=1.,
                            use_axis=ax, cb_ticklabels=cb_labels, cb_ticks = np.arange(4)+1, cb_loc='bottom',
                            title=title)
                del bpix_code
            else:

                heatmapPlot(data, add_panels=False, cmap='viridis',
                            y_label='Rows', x_label='Columns',
                            lut_label=label, vmax=vmax, aspect=1.,
                            use_axis=ax, cb_loc='bottom',
                            title=title)

    for i in modules:
        for iconst, const in enumerate(['Offset', 'Noise']):
            data = const_types[const][i]
            dataBP = np.copy(data)
            dataBP[badpix_consts[i] > 0] = -1

            x_ranges = [[0, 1500], [0, 40]]
            hn, cn = np.histogram(
                data[:, :, :, gain], bins=100, range=x_ranges[iconst])
            hnBP, cnBP = np.histogram(dataBP[:, :, :, gain], bins=cn)

            d = [{'x': cn[:-1],
                  'y': hn,
                  'drawstyle': 'steps-pre',
                  'label': 'All data',
                  },
                 {'x': cnBP[:-1],
                  'y': hnBP,
                  'drawstyle': 'steps-pre',
                  'label': 'Bad pixels masked',
                  },
                 ]

            ax = fig.add_subplot(3, 2, 5+iconst)
            _ = simplePlot(d, figsize=(5, 7), aspect=1,
                                x_label=f"{const} value [ADU]",
                                y_label="# of occurence",
                                title='', legend_pad=0.1, legend_size='10%',
                                use_axis=ax,
                                y_log=True, legend='outside-top-2col-frame')

    plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:


 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.NOISE_OUT_OF_THRESHOLD.value: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR.value: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value | BadPixels.NOISE_OUT_OF_THRESHOLD.value: ('MIXED', '#DD00DD80')}

 if high_res_badpix_3d:
    display(Markdown("""

    ## Global Bad Pixel Behaviour ##

    The following plots shows the results of a bad pixel evaluation for all evaluated memory cells.
    Cells are stacked in the Z-dimension, while pixels values in x/y are re-binned with a factor of 2.
    This excludes single bad pixels present only in disconnected pixels.
    Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
    Colors encode the bad pixel type, or mixed type.

        """))
    # Switch rebin to 1 for full resolution and
    # no interpolation for badpixel values.
    rebin = 2
    for gain in range(3):
        display(Markdown('### Bad pixel behaviour - {} gain ###'.format(gain_names[gain])))
        for mod, data in badpix_consts.items():
            plot_badpix_3d(data[...,gain], cols, title='', rebin_fac=rebin)
            ax = plt.gca()
            leg = ax.get_legend()
            leg.set(alpha=0.5)
        plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Summary across tiles ##

 Plots give an overview of calibration constants averaged across tiles. A bad pixel mask is applied. Constants are compared with pre-existing constants retrieved from the calibration database. Differences $\Delta$ between the old and new constants is shown.

 %% Cell type:code id: tags:

 ``` python
 time_summary = []
 time_summary.append(f"The following pre-existing constants are used for comparison:")
 for mod_num, mod_data in old_mdata.items():
    time_summary.append(f"- Module {mod_num}")
    for const, const_data in mod_data.items():
        time_summary.append(f"    - {const} created at {const_data['timestamp']}")
 display(Markdown("\n".join(time_summary)))
 ```

 %% Cell type:code id: tags:

 ``` python
 for i in modules:
    for gain in range(3):
        display(Markdown('### Summary across tiles - {} gain'.format(gain_names[gain])))

        for const in const_types:
            data = np.copy(const_types[const][i][:, :, :, gain])

            label = 'Fraction of bad pixels'

            if const != 'BadPixelsDark':
                data[badpix_consts[i][:, :, :, gain] > 0] = np.nan
                label = '{} value [ADU]'.format(const)
            else:
                data[data>0] = 1.0

            # Split tiles, making shape (mem_cells, slow_scan, tiles(=2), fast_scan)
            data = data.reshape(data.shape[0], 32, 2, 128)
            # Average within each tile
            data = np.nanmean(data, axis=(1, 3))

            fig = plt.figure(figsize=(15, 6))
            ax = fig.add_subplot(1, 2, 1)

            _ = heatmapPlot(data[:510, :], add_panels=True,
                            y_label='Memory Cell ID', x_label='Tile ID',
                            lut_label=label, use_axis=ax,
                            panel_y_label=label, panel_x_label=label,
                            cmap='viridis',  # cb_loc='right',cb_aspect=15,
                            x_ticklabels=np.arange(2)+1,
                            x_ticks=np.arange(2)+0.5)

            if old_const[i][const] is not None:
                ax = fig.add_subplot(1, 2, 2)

                dataold = np.copy(old_const[i][const][:, :, :, gain])

                label = '$\Delta$ {}'.format(label)

                if const != 'BadPixelsDark':
                    if old_const[i]['BadPixelsDark'] is not None:
                        dataold[old_const[i]['BadPixelsDark'][:, :, :, gain] > 0] = np.nan
                    else:
                        dataold[:] = np.nan
                else:
                    dataold[dataold>0]=1.0

                dataold = dataold.reshape(dataold.shape[0], 32, 2, 128)
                dataold = np.nanmean(dataold, axis=(1, 3))
                dataold = dataold - data

                _ = heatmapPlot(dataold[:510, :], add_panels=True,
                                y_label='Memory Cell ID', x_label='Tile ID',
                                lut_label=label, use_axis=ax,
                                panel_y_label=label, panel_x_label=label,
                                cmap='viridis',  # cb_loc='right',cb_aspect=15,
                                x_ticklabels=np.arange(2)+1,
                                x_ticks=np.arange(2)+0.5)
        plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Variation of offset and noise across Tiles and ASICs ##

 The following plots show a standard deviation $\sigma$ of the calibration constant. The plot of standard deviations across tiles show pixels of one tile ($128 \times 32$). Value for each pixel shows a standard deviation across 16 tiles. The standard deviation across ASICs are shown overall tiles. The plot shows pixels of one ASIC ($16 \times 32$), where the value shows a standard deviation across all ACIS of the module.

 %% Cell type:code id: tags:

 ``` python
 for i in modules:
    for gain in range(3):
        display(Markdown('### Variation of offset and noise across ASICs - {} gain'.format(gain_names[gain])))

        fig = plt.figure(figsize=(15, 6))
        for iconst, const in enumerate(['Offset', 'Noise']):
            data = np.copy(const_types[const][i][:, :, :, gain])
            data[badpix_consts[i][:, :, :, gain] > 0] = np.nan
            label = '$\sigma$ {} [ADU]'.format(const)

            dataA = np.nanmean(data, axis=0)  # average over cells
            dataA = dataA.reshape(32, 16, 16)
            dataA = np.nanstd(dataA, axis=1)  # average across ASICs

            ax = fig.add_subplot(1, 2, 1+iconst)
            _ = heatmapPlot(dataA, add_panels=True,
                            y_label='rows', x_label='columns',
                            lut_label=label, use_axis=ax,
                            panel_y_label=label, panel_x_label=label,
                            cmap='viridis'
                            )

        plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 for i in modules:
    for gain in range(3):
        display(Markdown('### Variation of offset and noise across tiles - {} gain'.format(gain_names[gain])))

        fig = plt.figure(figsize=(15, 6))
        for iconst, const in enumerate(['Offset', 'Noise']):
            data = np.copy(const_types[const][i][:, :, :, gain])
            data[badpix_consts[i][:, :, :, gain] > 0] = np.nan
            label = '$\sigma$ {} [ADU]'.format(const)

            dataT = data.reshape(data.shape[0], 32, 2, 128)
            dataT = np.nanstd(dataT, axis=2)
            dataT = np.nanmean(dataT, axis=0)

            ax = fig.add_subplot(121+iconst)
            _ = heatmapPlot(dataT, add_panels=True,
                            y_label='rows', x_label='columns',
                            lut_label=label, use_axis=ax,
                            panel_y_label=label, panel_x_label=label,
                            cmap='viridis')
        plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Aggregate values and per cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per-cell behavior, averaged across pixels.

 %% Cell type:code id: tags:

 ``` python
 for i in modules:
    for gain in range(3):
        display(Markdown('### Mean over pixels - {} gain'.format(gain_names[gain])))

        fig = plt.figure(figsize=(9,11))

        for iconst, const in enumerate(const_types):

            ax = fig.add_subplot(3, 1, 1+iconst)

            data = const_types[const][i][:510,:,:,gain]
            if const == 'BadPixelsDark':
                data[data>0] = 1.0

            dataBP = np.copy(data)
            dataBP[badpix_consts[i][:510,:,:,gain] > 0] = -10

            data = np.nanmean(data, axis=(1, 2))
            dataBP = np.nanmean(dataBP, axis=(1, 2))

            d = [{'y': data,
                  'x': np.arange(data.shape[0]),
                  'drawstyle': 'steps-mid',
                  'label' : 'All data'
                 }
                ]

            if const != 'BadPixelsDark':
                d.append({'y': dataBP,
                  'x': np.arange(data.shape[0]),
                  'drawstyle': 'steps-mid',
                  'label' : 'good pixels only'
                 })
                y_title = f"{const} value [ADU]"
                title = f"{const} value, {gain_names[gain]} gain"
            else:
                y_title = "Fraction of Bad Pixels"
                title = f"Fraction of Bad Pixels, {gain_names[gain]} gain"

            data_min = np.min([data, dataBP]) if const != 'BadPixelsDark' else np.min([data])
            data_max = np.max([data[20:], dataBP[20:]])
            data_dif = data_max - data_min

            local_max = np.max([data[200:300], dataBP[200:300]])
            frac = 0.35
            new_max = (local_max - data_min*(1-frac))/frac
            new_max = np.max([data_max, new_max])

            _ = simplePlot(d, figsize=(10,10), aspect=2, xrange=(-12, 510),
                              x_label = 'Memory Cell ID',
                              y_label=y_title, use_axis=ax,
                              title=title,
                              title_position=[0.5, 1.15],
                              inset='xy-coord-right', inset_x_range=(0,20), inset_indicated=True,
                              inset_labeled=True, inset_coord=[0.2,0.5,0.6,0.95],
                                inset_lw = 1.0, y_range = [data_min-data_dif*0.05, new_max+data_dif*0.05],
                              y_log=False, legend='outside-top-ncol2-frame', legend_size='18%',
                                 legend_pad=0.00)

            plt.tight_layout(pad=1.08, h_pad=0.35)

        plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 table = []
 bits = [BadPixels.NOISE_OUT_OF_THRESHOLD, BadPixels.OFFSET_OUT_OF_THRESHOLD, BadPixels.OFFSET_NOISE_EVAL_ERROR]

 for mod_num in modules:
    for gain in range(3):

        l_data = []
        l_data_old = []

        data = np.copy(badpix_consts[mod_num][:,:,:,gain])
        l_data.append(len(data[data>0].flatten()))
        for bit in bits:
            l_data.append(np.count_nonzero(badpix_consts[mod_num][:,:,:,gain] & bit.value))

        if old_const[mod_num]['BadPixelsDark'] is not None:
            old_const[mod_num]['BadPixelsDark'] = old_const[mod_num]['BadPixelsDark'].astype(np.uint32)
            dataold = np.copy(old_const[mod_num]['BadPixelsDark'][:, :, :, gain])
            l_data_old.append(len(dataold[dataold>0].flatten()))
            for bit in bits:
                l_data_old.append(np.count_nonzero(old_const[mod_num]['BadPixelsDark'][:, :, :, gain] & bit.value))

        l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',
                       'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR']

        l_threshold = ['', f'{thresholds_noise_sigma}', f'{thresholds_offset_sigma}',
                       f'{thresholds_offset_hard}/{thresholds_noise_hard}']

        for i in range(len(l_data)):
            line = [f'{l_data_name[i]}, gain {gain_names[gain]}', l_threshold[i], l_data[i]]

            if old_const[mod_num]['BadPixelsDark'] is not None:
                line += [l_data_old[i]]
            else:
                line += ['-']

            table.append(line)
        table.append(['', '', '', ''])

 display(Markdown('''

 ### Number of bad pixels ###

 One pixel can be bad for different reasons, therefore, the sum of all types of bad pixels can be more than the number of all bad pixels.

 '''))
 if len(table)>0:
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Pixel type", "Threshold",
                                              "New constant", "Old constant"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 for const in ['Offset', 'Noise']:
    table = [['','High gain', 'High gain', 'Medium gain', 'Medium gain', 'Low gain', 'Low gain']]
    for mod_num in modules:

        data = np.copy(const_types[const][mod_num])
        data[badpix_consts[mod_num] > 0] = np.nan

        if old_const[mod_num][const] is not None and old_const[mod_num]['BadPixelsDark'] is not None :
            dataold = np.copy(old_const[mod_num][const])
            dataold[old_const[mod_num]['BadPixelsDark']>0] = np.nan

        f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
        n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

        for i, f in enumerate(f_list):
            line = [n_list[i]]
            for gain in range(3):
                line.append('{:6.1f}'.format(f(data[...,gain])))
                if old_const[mod_num][const] is not None and old_const[mod_num]['BadPixelsDark'] is not None:
                    line.append('{:6.1f}'.format(f(dataold[...,gain])))
                else:
                    line.append('-')

            table.append(line)

    display(Markdown('### {} [ADU], good pixels only ###'.format(const)))
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
 ```

--- a/notebooks/LPDMini/LPD_Mini_Inject_calibration_constants_from_h5files.ipynb
+++ b/notebooks/LPDMini/LPD_Mini_Inject_calibration_constants_from_h5files.ipynb
+%% Cell type:markdown id: tags:
+
+# Injecting LPD-Mini calibration constant data to the database
+
+Author: European XFEL Detector Group, Version: 1.0
+
+This notebook is used to read HDF5 files with LPDMINI constants to inject into the CALCAT database.
+
+The notebook expects an explicit filename format: <calibration_constant_name>_<karabo_da>.h5 for example `RelativeGain_LPDMINI00_2.h5`
+  - <calibration_constant_name>: The calibration name as it is named in CALCAT.
+  - <karabo_da>: The data aggregator name. As LPDMini aggregators have `/` we replace it with `_`.
+
+%% Cell type:code id: tags:
+
+``` python
+# calibration constant parameters:
+constant_names = [""]  # calibration constant name, required.
+in_folder = ""  # calibration constants folder, required.
+out_folder = ""  # output folder to store report path in case the notebook is executed by CLI, required.
+proposal = ""  # Add proposal number to be sent to the database as a part of Raw data location.
+runs = [""]  # Add list of runs to be sent to the database as a part of Raw data location.
+
+# detector parameters:
+karabo_id = "FXE_DET_LPD_MINI"  # detector identifier.
+karabo_da = [""]  # karabo data aggregators. default "all" for all 8 karabo data aggregator names.
+
+# calibration database parameters:
+cal_db_interface = "tcp://max-exfl016:8015#8045"  # calibration DB zmq address.
+
+# calibration constant conditions:
+memory_cells = 512  # Number of memory cells. Used for constant conditions.
+bias_voltage_0 = 250 # bias voltage for minis 1, 3, 5, 7
+bias_voltage_1 = 250 # bias voltage for minis 2, 4, 6, 8
+capacitor = 5  # capacitor value. Used for constant conditions.
+photon_energy = 9.2  # calibration constant photon energy. Used for constant conditions.
+creation_time = '2023-05-07T15:10:07'  # creation time for the injected constants. required format '2019-01-20T14:12:06'
+```
+
+%% Cell type:code id: tags:
+
+``` python
+import multiprocessing
+from datetime import datetime
+from logging import warning
+from pathlib import Path
+from typing import List, Tuple
+
+import h5py
+from cal_tools.calcat_interface import CalCatApi
+from cal_tools.restful_config import calibration_client
+from cal_tools.tools import get_report, send_to_db
+from iCalibrationDB import Conditions, Constants
+```
+
+%% Cell type:code id: tags:
+
+``` python
+pixels_x = 256
+pixels_y = 32
+
+calcat_client = calibration_client()
+calcat = CalCatApi(client=calcat_client)
+
+# Look up PDUs
+detector_id = calcat.detector(karabo_id)['id']
+pdus_by_da = calcat.physical_detector_units(detector_id, pdu_snapshot_at=creation_time)
+
+if not karabo_da or karabo_da == [""]:
+    karabo_da = sorted(pdus_by_da.keys())
+else:
+    karabo_da = karabo_da
+
+# if proposal or runs are given assign file_loc
+# for calibration constant versions metadata.
+file_loc = ""
+if proposal:
+    file_loc += f"proposal:{proposal}"
+if runs[0] != "":
+    file_loc += f"runs: {runs}"
+
+if file_loc == "":
+    print(
+        "No proposal or runs were given for constant source."
+        " No \"Raw data location\" will be injected with the constants.\n"
+    )
+```
+
+%% Cell type:code id: tags:
+
+``` python
+def validate_input_paths(
+    in_folder: str,
+    creation_time: str,
+    constant_names: List[str],
+) -> Tuple[str, datetime]:
+
+    # Validate input parameters:
+    if not (in_folder):
+        raise ValueError(
+            "ERROR: \"in_folder\" is not given."
+            " Please provide the constants input folder."
+        )
+
+    c_folder = Path(in_folder)
+
+    if not c_folder.is_dir():
+        raise ValueError(
+            f"ERROR: in_folder {in_folder} directory doesn't exist."
+        )
+
+    try:
+        creation_time = datetime.strptime(creation_time, '%Y-%m-%dT%H:%M:%S')
+    except ValueError:
+        raise ValueError(
+            "Incorrect data format, "
+            "should be YYYY-MM-DDTHH:MM:SS i.e. 2019-01-20T14:12:06"
+        )
+
+    for constant in constant_names:
+        if not hasattr(Constants.LPD, constant):
+            raise ValueError(
+                f"ERROR: Constant name \"{constant}\" is not a known LPD constant. "
+                f"Available LPD Constants are {[c for c in dir(Constants.LPD) if c[0] != '_']}"
+            )
+    return c_folder, creation_time
+```
+
+%% Cell type:code id: tags:
+
+``` python
+def inject_constants(
+    constant_name: str,
+    mod_da: str,
+    physical_unit: str,
+):
+    mod_num = int(mod_da.split('/')[-1])
+    # mod_num is from 1 to 8, so b_v_0 applies to odd numbers
+    bias_voltage = bias_voltage_0 if mod_num % 2 == 1 else bias_voltage_1
+    # Calibration constants condition object.
+    condition = Conditions.Illuminated.LPD(
+        memory_cells=memory_cells,
+        bias_voltage=bias_voltage,
+        photon_energy=photon_energy,
+        pixels_x=pixels_x,
+        pixels_y=pixels_y,
+        capacitor=capacitor,
+        category=None,
+    )
+    constant = getattr(Constants.LPD, constant_name)()
+
+    cfile = c_folder / f"{constant_name}_{mod_da.replace('/', '_')}.h5"
+
+    if not cfile.exists():
+        warning(f"Constant file {cfile} doesn't exist.\n")
+        return
+
+    # load constant data.
+    with h5py.File(cfile, "r") as f:
+        cdata = f[constant_name][()]
+
+    # Validate for only LPD at the moment.
+    if not cdata.shape == (memory_cells, pixels_y, pixels_x, 3):
+        raise ValueError(
+            f"ERROR: {const} constant data shape is not as expected."
+            f" {cdata.shape} != ({memory_cells}, {pixels_y}, {pixels_x}, 3).\n"
+        )
+    constant.data = cdata
+
+    send_to_db(
+        db_module=physical_unit,
+        karabo_id=karabo_id,
+        constant=constant,
+        condition=condition,
+        file_loc=file_loc,
+        report_path=report_path,
+        cal_db_interface=cal_db_interface,
+        creation_time=creation_time,
+    )
+```
+
+%% Cell type:code id: tags:
+
+``` python
+c_folder, creation_time = validate_input_paths(
+    in_folder,
+    creation_time,
+    constant_names,
+)
+
+# create a report path for calibration constant versions metadata.
+report_name = f"No_report/LPD_{datetime.now().strftime('%Y-%m-%dT%H:%M:%S')}"
+report_path = get_report(
+    out_folder=in_folder,
+    default_path=report_name
+)
+
+
+mod_mapping = {mod: pdus_by_da[mod]["physical_name"] for mod in karabo_da}
+print("Physical detector units retrieved are: ", mod_mapping, "\n")
+
+inp = []
+for const in constant_names:
+    for k_da, pdu in mod_mapping.items():
+        inp.append((const, k_da, pdu))
+
+with multiprocessing.Pool(processes=5) as pool:
+    results = pool.starmap(inject_constants, inp)
+```
+%% Cell type:markdown id: tags:
+
+# Injecting LPD-Mini calibration constant data to the database
+
+Author: European XFEL Detector Group, Version: 1.0
+
+This notebook is used to read HDF5 files with LPDMINI constants to inject into the CALCAT database.
+
+The notebook expects an explicit filename format: <calibration_constant_name>_<karabo_da>.h5 for example `RelativeGain_LPDMINI00_2.h5`
+  - <calibration_constant_name>: The calibration name as it is named in CALCAT.
+  - <karabo_da>: The data aggregator name. As LPDMini aggregators have `/` we replace it with `_`.
+
+%% Cell type:code id: tags:
+
+``` python
+# calibration constant parameters:
+constant_names = [""]  # calibration constant name, required.
+in_folder = ""  # calibration constants folder, required.
+out_folder = ""  # output folder to store report path in case the notebook is executed by CLI, required.
+proposal = ""  # Add proposal number to be sent to the database as a part of Raw data location.
+runs = [""]  # Add list of runs to be sent to the database as a part of Raw data location.
+
+# detector parameters:
+karabo_id = "FXE_DET_LPD_MINI"  # detector identifier.
+karabo_da = [""]  # karabo data aggregators. default "all" for all 8 karabo data aggregator names.
+
+# calibration database parameters:
+cal_db_interface = "tcp://max-exfl016:8015#8045"  # calibration DB zmq address.
+
+# calibration constant conditions:
+memory_cells = 512  # Number of memory cells. Used for constant conditions.
+bias_voltage_0 = 250 # bias voltage for minis 1, 3, 5, 7
+bias_voltage_1 = 250 # bias voltage for minis 2, 4, 6, 8
+capacitor = 5  # capacitor value. Used for constant conditions.
+photon_energy = 9.2  # calibration constant photon energy. Used for constant conditions.
+creation_time = '2023-05-07T15:10:07'  # creation time for the injected constants. required format '2019-01-20T14:12:06'
+```
+
+%% Cell type:code id: tags:
+
+``` python
+import multiprocessing
+from datetime import datetime
+from logging import warning
+from pathlib import Path
+from typing import List, Tuple
+
+import h5py
+from cal_tools.calcat_interface import CalCatApi
+from cal_tools.restful_config import calibration_client
+from cal_tools.tools import get_report, send_to_db
+from iCalibrationDB import Conditions, Constants
+```
+
+%% Cell type:code id: tags:
+
+``` python
+pixels_x = 256
+pixels_y = 32
+
+calcat_client = calibration_client()
+calcat = CalCatApi(client=calcat_client)
+
+# Look up PDUs
+detector_id = calcat.detector(karabo_id)['id']
+pdus_by_da = calcat.physical_detector_units(detector_id, pdu_snapshot_at=creation_time)
+
+if not karabo_da or karabo_da == [""]:
+    karabo_da = sorted(pdus_by_da.keys())
+else:
+    karabo_da = karabo_da
+
+# if proposal or runs are given assign file_loc
+# for calibration constant versions metadata.
+file_loc = ""
+if proposal:
+    file_loc += f"proposal:{proposal}"
+if runs[0] != "":
+    file_loc += f"runs: {runs}"
+
+if file_loc == "":
+    print(
+        "No proposal or runs were given for constant source."
+        " No \"Raw data location\" will be injected with the constants.\n"
+    )
+```
+
+%% Cell type:code id: tags:
+
+``` python
+def validate_input_paths(
+    in_folder: str,
+    creation_time: str,
+    constant_names: List[str],
+) -> Tuple[str, datetime]:
+
+    # Validate input parameters:
+    if not (in_folder):
+        raise ValueError(
+            "ERROR: \"in_folder\" is not given."
+            " Please provide the constants input folder."
+        )
+
+    c_folder = Path(in_folder)
+
+    if not c_folder.is_dir():
+        raise ValueError(
+            f"ERROR: in_folder {in_folder} directory doesn't exist."
+        )
+
+    try:
+        creation_time = datetime.strptime(creation_time, '%Y-%m-%dT%H:%M:%S')
+    except ValueError:
+        raise ValueError(
+            "Incorrect data format, "
+            "should be YYYY-MM-DDTHH:MM:SS i.e. 2019-01-20T14:12:06"
+        )
+
+    for constant in constant_names:
+        if not hasattr(Constants.LPD, constant):
+            raise ValueError(
+                f"ERROR: Constant name \"{constant}\" is not a known LPD constant. "
+                f"Available LPD Constants are {[c for c in dir(Constants.LPD) if c[0] != '_']}"
+            )
+    return c_folder, creation_time
+```
+
+%% Cell type:code id: tags:
+
+``` python
+def inject_constants(
+    constant_name: str,
+    mod_da: str,
+    physical_unit: str,
+):
+    mod_num = int(mod_da.split('/')[-1])
+    # mod_num is from 1 to 8, so b_v_0 applies to odd numbers
+    bias_voltage = bias_voltage_0 if mod_num % 2 == 1 else bias_voltage_1
+    # Calibration constants condition object.
+    condition = Conditions.Illuminated.LPD(
+        memory_cells=memory_cells,
+        bias_voltage=bias_voltage,
+        photon_energy=photon_energy,
+        pixels_x=pixels_x,
+        pixels_y=pixels_y,
+        capacitor=capacitor,
+        category=None,
+    )
+    constant = getattr(Constants.LPD, constant_name)()
+
+    cfile = c_folder / f"{constant_name}_{mod_da.replace('/', '_')}.h5"
+
+    if not cfile.exists():
+        warning(f"Constant file {cfile} doesn't exist.\n")
+        return
+
+    # load constant data.
+    with h5py.File(cfile, "r") as f:
+        cdata = f[constant_name][()]
+
+    # Validate for only LPD at the moment.
+    if not cdata.shape == (memory_cells, pixels_y, pixels_x, 3):
+        raise ValueError(
+            f"ERROR: {const} constant data shape is not as expected."
+            f" {cdata.shape} != ({memory_cells}, {pixels_y}, {pixels_x}, 3).\n"
+        )
+    constant.data = cdata
+
+    send_to_db(
+        db_module=physical_unit,
+        karabo_id=karabo_id,
+        constant=constant,
+        condition=condition,
+        file_loc=file_loc,
+        report_path=report_path,
+        cal_db_interface=cal_db_interface,
+        creation_time=creation_time,
+    )
+```
+
+%% Cell type:code id: tags:
+
+``` python
+c_folder, creation_time = validate_input_paths(
+    in_folder,
+    creation_time,
+    constant_names,
+)
+
+# create a report path for calibration constant versions metadata.
+report_name = f"No_report/LPD_{datetime.now().strftime('%Y-%m-%dT%H:%M:%S')}"
+report_path = get_report(
+    out_folder=in_folder,
+    default_path=report_name
+)
+
+
+mod_mapping = {mod: pdus_by_da[mod]["physical_name"] for mod in karabo_da}
+print("Physical detector units retrieved are: ", mod_mapping, "\n")
+
+inp = []
+for const in constant_names:
+    for k_da, pdu in mod_mapping.items():
+        inp.append((const, k_da, pdu))
+
+with multiprocessing.Pool(processes=5) as pool:
+    results = pool.starmap(inject_constants, inp)
+```
--- a/notebooks/Timepix/Compute_Timepix_Event_Centroids.ipynb
+++ b/notebooks/Timepix/Compute_Timepix_Event_Centroids.ipynb
--- a/notebooks/ePix100/Characterize_Darks_ePix100_NBC.ipynb
+++ b/notebooks/ePix100/Characterize_Darks_ePix100_NBC.ipynb
 %% Cell type:markdown id: tags:

 # ePix100 Dark Characterization

 Author: European XFEL Detector Group, Version: 2.0

 The following notebook provides dark image analysis and calibration constants of the ePix100 detector.

 Dark characterization evaluates offset and noise of the detector and gives information about bad pixels.

 Noise and bad pixels maps are calculated independently for each of the 4 ASICs of ePix100, since their noise behaviour can be significantly different.

 Common mode correction can be applied to increase sensitivity to noise related bad pixels. Common mode correction is achieved by subtracting the median of all pixels that are read out at the same time along a row/column. This is done in an iterative process, by which a new bad pixels map is calculated and used to mask data as the common mode values across the rows/columns is updated.

 Resulting maps are saved as .h5 files for a later use and injected to calibration DB.

 %% Cell type:code id: tags:

 ``` python
-in_folder = '/gpfs/exfel/exp/HED/202201/p002804/raw' # input folder, required
+in_folder = '/gpfs/exfel/exp/HED/202330/p900338/raw' # input folder, required
 out_folder = '' # output folder, required
 metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequence = 0 # sequence file to use
-run = 281 # which run to read data from, required
+run = 176 # which run to read data from, required

 # Parameters for accessing the raw data.
 karabo_id = "HED_IA1_EPX100-1" # karabo karabo_id
 karabo_da = ["EPIX01"]  # data aggregators
 receiver_template = "RECEIVER" # detector receiver template for accessing raw data files
 path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5' # the template to use to access data
 instrument_source_template = '{}/DET/{}:daqOutput' # instrument detector data source in h5files

 # Parameters for the calibration database.
 use_dir_creation_date = True
 cal_db_interface = "tcp://max-exfl016:8020" # calibration DB interface to use
 cal_db_timeout = 300000 # timeout on caldb requests
 db_output = False # Output constants to the calibration database
 local_output = True # Output constants locally

 # Conditions used for injected calibration constants.
 bias_voltage = 200 # Bias voltage
 in_vacuum = False # Detector operated in vacuum
 fix_integration_time = -1 # Integration time. Set to -1 to read from .h5 file
 fix_temperature = -1 # Fixed temperature in Kelvin. Set to -1 to read from .h5 file
 temp_limits = 5 # Limit for parameter Operational temperature
-badpixel_threshold_sigma = 5.  # Bad pixels defined by values outside n times this std from median. Default: 5
+badpixel_noise_sigma = 5  # Bad pixels defined by noise value outside n * std from median. Default: 5
+badpixel_offset_sigma = 2  # Bad pixels defined by offset value outside n * std from median. Default: 2
 CM_N_iterations = 2  # Number of iterations for common mode correction. Set to 0 to skip it

 # Parameters used during selecting raw data trains.
 min_trains = 1 # Minimum number of trains that should be available to process dark constants. Default 1.
 max_trains = 1000  # Maximum number of trains to use for processing dark constants. Set to 0 to use all available trains.

 # Don't delete! myMDC sends this by default.
 operation_mode = ''  # Detector operation mode, optional

 # TODO: delete after removing from calibration_configurations
 db_module = ''  # ID of module in calibration database, this parameter is ignore in the notebook. TODO: remove from calibration_configurations.
 ```

 %% Cell type:code id: tags:

 ``` python
 import os
 import warnings

 import matplotlib.pyplot as plt
 import numpy as np
 from IPython.display import display, Markdown
 from prettytable import PrettyTable
 from extra_data import RunDirectory
 from pathlib import Path

 import XFELDetAna.xfelprofiler as xprof
 from XFELDetAna import xfelpyanatools as xana
 from XFELDetAna import xfelpycaltools as xcal
 from XFELDetAna.plotting.util import prettyPlotting

 from cal_tools.enums import BadPixels
 from cal_tools.step_timing import StepTimer
 from cal_tools.epix100 import epix100lib
 from cal_tools.tools import (
    get_dir_creation_date,
    get_pdu_from_db,
    get_report,
    save_const_to_h5,
    send_to_db,
 )
 from iCalibrationDB import Conditions, Constants
 ```

 %% Cell type:code id: tags:

 ``` python
 %matplotlib inline

 warnings.filterwarnings('ignore')

 prettyPlotting = True

 profiler = xprof.Profiler()
 profiler.disable()

 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read report path and create file location tuple to add with the injection
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = f'proposal:{proposal} runs:{run}'
 report = get_report(metadata_folder)

 ped_dir = os.path.join(in_folder, f"r{run:04d}")
 fp_name = path_template.format(run, karabo_da[0]).format(sequence)
 run_dir = RunDirectory(ped_dir)

 print(f"Reading from: {Path(ped_dir) / fp_name}")
 print(f"Run number: {run}")
 print(f"Instrument H5File source: {instrument_src}")
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time.isoformat()} as creation time")
 os.makedirs(out_folder, exist_ok=True)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read sensor size
 sensor_size = run_dir[instrument_src, 'data.image.dims'].as_single_value(reduce_by='first') # (x=768, y=708) expected
 sensor_size = sensor_size[sensor_size != 1] # data.image.dims for old data is [768, 708, 1]
 assert np.array_equal(sensor_size, [768, 708]), 'Unexpected sensor dimensions.'

 # Path to pixels ADC values
 pixels_src = (instrument_src, "data.image.pixels")

 # Specifies total number of images to proceed
 n_trains = run_dir.get_data_counts(*pixels_src).shape[0]

 # Modify n_trains to process based on given maximum
 # and minimun number of trains.
 if max_trains:
    n_trains = min(max_trains, n_trains)

 if n_trains < min_trains:
    raise ValueError(
        f"Less than {min_trains} trains are available in RAW data."
         " Not enough data to process darks.")

 print(f"Number of dark images to analyze: {n_trains}")
 ```

 %% Cell type:code id: tags:

 ``` python
 ctrl_data = epix100lib.epix100Ctrl(
    run_dc=run_dir,
    instrument_src=instrument_src,
    ctrl_src=f"{karabo_id}/DET/CONTROL",
    )
 # Read integration time
 if fix_integration_time == -1:
    integration_time = ctrl_data.get_integration_time()
    integration_time_str_add = ''
 else:
    integration_time = fix_integration_time
    integration_time_str_add = '(manual input)'

 # Read temperature
 if fix_temperature == -1:
    temperature = ctrl_data.get_temprature()
    temperature_k = temperature + 273.15
    temp_str_add = ''
 else:
    temperature_k = fix_temperature
    temperature = fix_temperature - 273.15
    temp_str_add = '(manual input)'

 # Print operating conditions
 print(f"Bias voltage: {bias_voltage} V")
 print(f"Detector integration time: {integration_time} \u03BCs {integration_time_str_add}")
 print(f"Mean temperature: {temperature:0.2f}°C / {temperature_k:0.2f} K {temp_str_add}")
 print(f"Operated in vacuum: {in_vacuum}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Passing repetitive code along the notebook to a function
 def stats_calc(data):
    '''
    Calculates basic statistical parameters of the input data:
    mean, standard deviation, median, minimum and maximum.

    Returns dictionary with keys: 'mean', 'std', 'median',
    'min', 'max' and 'legend'.

    Parameters
    ----------
    data : ndarray
           Data to analyse.
    '''

    stats = {}
    stats['mean'] = np.nanmean(data)
    stats['std'] = np.nanstd(data)
    stats['median'] = np.nanmedian(data)
    stats['min'] = np.nanmin(data)
    stats['max'] = np.nanmax(data)
    stats['legend'] = ('mean: ' + str(np.round(stats['mean'],2)) +
                       '\nstd: ' + str(np.round(stats['std'],2)) +
                       '\nmedian: ' + str(np.round(stats['median'],2)) +
                       '\nmin: ' + str(np.round(stats['min'],2)) +
                       '\nmax: ' + str(np.round(stats['max'],2)))
    return stats
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 step_timer.start()

 # Read data
 data_dc = run_dir.select(
    *pixels_src, require_all=True).select_trains(np.s_[:n_trains])

 data = data_dc[pixels_src].ndarray()

 # Instantiate constant maps to be filled with offset, noise and bad pixels maps
 constant_maps = {}
 step_timer.done_step('Darks loaded. Elapsed Time')
 ```

 %% Cell type:markdown id: tags:

 ## Offset Map

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 # Calculate offset per pixel and store it in constant_maps
 constant_maps['Offset'] = np.nanmean(data, axis=0)[..., np.newaxis]

 # Calculate basic statistical parameters
 stats = stats_calc(constant_maps['Offset'].flatten())

 # Offset map
 fig = xana.heatmapPlot(
    constant_maps['Offset'].squeeze(),
    lut_label='[ADU]',
    x_label='Column',
    y_label='Row',
    vmin=max(0, np.round((stats['median']-stats['std'])/250)*250), # Force cb label to be multiple of 250 for reproducibility
    vmax=min(np.power(2,14)-1, np.round((stats['median']+stats['std'])/250)*250)
 )
 fig.suptitle('Offset Map', x=.48, y=.9, fontsize=16)
 fig.set_size_inches(h=15, w=15)

 # Offset Histogram
 h, c = np.histogram(
    constant_maps['Offset'].flatten(),
    bins = np.arange(stats['min'], stats['max'], stats['std']/100)
 )
 d = {'x': c[:-1],
     'y': h,
     'color': 'blue'
     }

 fig = xana.simplePlot(
    d,
    aspect=1.5,
    x_label='Offset [ADU]',
    y_label='Counts',
    x_range=(0, np.power(2,14)-1),
    y_range=(0, max(h)*1.1),
    y_log=True
 )
 fig.text(
    s=stats['legend'],
    x=.7,
    y=.7,
    fontsize=14,
    bbox=dict(facecolor='yellow', edgecolor='black', alpha=.1)
 )
 fig.suptitle('Offset Map Histogram', x=.5,y =.92, fontsize=16)

 step_timer.done_step('Offset Map created. Elapsed Time')
 ```

 %% Cell type:markdown id: tags:

 ## Noise Map

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 # Calculate noise per pixel and store it in constant_maps
 constant_maps['Noise'] = np.nanstd(data, axis=0)[..., np.newaxis]

 # Calculate basic statistical parameters
 stats = stats_calc(constant_maps['Noise'].flatten())

 # Noise heat map
 fig = xana.heatmapPlot(
    constant_maps['Noise'].squeeze(),
    lut_label='[ADU]',
    x_label='Column',
    y_label='Row',
-    vmin=max(0, np.round((stats['median'] - badpixel_threshold_sigma*stats['std']))),
-    vmax=np.round(stats['median'] + badpixel_threshold_sigma*stats['std'])
+    vmin=max(0, np.round((stats['median'] - badpixel_noise_sigma*stats['std']))),
+    vmax=np.round(stats['median'] + badpixel_noise_sigma*stats['std'])
 )
 fig.suptitle('Noise Map', x=.5, y=.9, fontsize=16)
 fig.set_size_inches(h=15, w=15)

 # Calculate overall noise histogram
-bins = np.arange(max(0, stats['mean'] - badpixel_threshold_sigma*stats['std']),
-                 stats['mean'] + badpixel_threshold_sigma*stats['std'],
+bins = np.arange(max(0, stats['mean'] - badpixel_noise_sigma*stats['std']),
+                 stats['mean'] + badpixel_noise_sigma*stats['std'],
                 stats['std']/100)

 h, c = np.histogram(
    constant_maps['Noise'].flatten(),
    bins = bins
 )
 d = [{'x': c[:-1],
      'y': h,
      'color': 'black',
      'label': 'Total'
     }]

 # Calculate per ASIC histogram
 asic = []
 asic.append({'noise': constant_maps['Noise'][:int(sensor_size[1]//2), int(sensor_size[0]//2):],
            'label': 'ASIC 0 (bottom right)',
            'color': 'blue'})
 asic.append({'noise': constant_maps['Noise'][int(sensor_size[1]//2):, int(sensor_size[0]//2):],
            'label': 'ASIC 1 (top right)',
            'color': 'red'})
 asic.append({'noise': constant_maps['Noise'][int(sensor_size[1]//2):, :int(sensor_size[0]//2)],
            'label': 'ASIC 2 (top left)',
            'color': 'green'})
 asic.append({'noise': constant_maps['Noise'][:int(sensor_size[1]//2), :int(sensor_size[0]//2)],
            'label': 'ASIC 3 (bottom left)',
            'color': 'magenta'})

 min_std = np.inf
 for a in asic:
    h, c = np.histogram(a['noise'].flatten(), bins=bins)
    d.append({'x': c[:-1], 'y': h, 'color': a['color'], 'label': a['label']})
    min_std = np.nanmin((min_std, np.nanstd(a['noise'])))
    print(a['label'][:6] +
          ': median = ' + "{:.2f}".format(np.round(np.nanmedian(a['noise']),2)) +
          ' | std = ' + "{:.2f}".format(np.round(np.nanstd(a['noise']),2)) +
          a['label'][6:])

 arg_max_median = 0
 for i in range(0,np.size(d)):
    arg_max_median = np.max((arg_max_median, np.argmax(d[i]['y'])))

 # Plot noise histogram
 fig = xana.simplePlot(
    d,
    aspect=1.5,
    x_label='Noise [ADU]',
    y_label='Counts',
-    x_range=(max(0, stats['median'] - badpixel_threshold_sigma*stats['std']),
-             stats['median'] + badpixel_threshold_sigma*stats['std']),
+    x_range=(max(0, stats['median'] - badpixel_noise_sigma*stats['std']),
+             stats['median'] + badpixel_noise_sigma*stats['std']),
    y_range=(0, max(d[0]['y'])*1.1),
 )
 plt.grid(linestyle = ':')
 leg = fig.legend(bbox_to_anchor=(.42, .88),fontsize = 14)

 fig.text(
    s=stats['legend'],
    x=.75,
    y=.7,
    fontsize=14,
    bbox=dict(facecolor='yellow', edgecolor='black', alpha=.1)
 )
 fig.suptitle('Noise Map Histogram',x=.5,y=.92,fontsize=16)

 step_timer.done_step('\nNoise Map created. Elapsed Time')
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels

 The bad pixel map is deduced by comparing offset and noise of each pixel ($v_i$) against the median value of the respective maps for each ASIC ($v_k$):

 $$
 v_i > \mathrm{median}(v_{k}) + n \sigma_{v_{k}}
 $$
 or
 $$
 v_i < \mathrm{median}(v_{k}) - n \sigma_{v_{k}}
 $$

 Values are encoded in a 32 bit mask, where for the dark image deduced bad pixels the following non-zero entries are relevant:

 %% Cell type:code id: tags:

 ``` python
 def print_bp_entry(bp):
    '''
    Prints bad pixels bit encoding.

    Parameters
    ----------
    bp : enum 'BadPixels'
    '''
    print('{:<23s}: {:032b} ({})'.format(bp.name, bp.value, bp.real))

 print_bp_entry(BadPixels.OFFSET_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.NOISE_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.OFFSET_NOISE_EVAL_ERROR)
 ```

 %% Cell type:code id: tags:

 ``` python
 def eval_bpidx(const_map, n_sigma, block_size):
    '''
    Evaluates bad pixels by comparing the average offset and
    noise of each pixel against the median value of the
    respective maps of each ASIC.

    Returns boolean array.

    Parameters
    ----------
    const_map : ndarray
                Offset or noise constant map to input.
    n_sigma : float
              Standard deviation multiplicity interval outside
              which bad pixels are defined.
    block_size : ndarray
                 dimensions ([x,y]) of each ASIC.
    '''

    blocks = {} # Each block corresponds to 1 ASIC
    blocks['0'] = const_map[:int(block_size[1]), int(block_size[0]):] # bottom right
    blocks['1'] = const_map[int(block_size[1]):, int(block_size[0]):] # top right
    blocks['2'] = const_map[int(block_size[1]):, :int(block_size[0])] # top left
    blocks['3'] = const_map[:int(block_size[1]), :int(block_size[0])] # bottom left

    idx = {}
    for b in range(0, len(blocks)):
        mdn = np.nanmedian(blocks[str(b)])
        std = np.nanstd(blocks[str(b)])
        idx[str(b)] = ( (blocks[str(b)] > mdn + n_sigma*std) | (blocks[str(b)] < mdn - n_sigma*std) )

    idx_output = np.zeros(const_map.shape, dtype=bool)
    idx_output[:int(block_size[1]), int(block_size[0]):] = idx['0']
    idx_output[int(block_size[1]):, int(block_size[0]):] = idx['1']
    idx_output[int(block_size[1]):, :int(block_size[0])] = idx['2']
    idx_output[:int(block_size[1]), :int(block_size[0])] = idx['3']

    return idx_output
 ```

 %% Cell type:code id: tags:

 ``` python
 def bp_analysis_table(bad_pixels_map, title = ''):
    '''
    Prints a table with short analysis of the number and
    percentage of bad pixels on count of offset or noise
    out of threshold, or evaluation error.

    Returns PrettyTable.

    Parameters
    ----------
    bad_pixels_map : ndarray
                     Bad pixel map to analyse.
    title : string, optional
            Table title to be used.
    '''

    num_bp = np.sum(bad_pixels_map!=0)
    offset_bp = np.sum(bad_pixels_map==1)
    noise_bp = np.sum(bad_pixels_map==2)
    eval_error_bp = np.sum(bad_pixels_map==4)

    t = PrettyTable()
    t.field_names = [title]
    t.add_row(['Total number of Bad Pixels : {:<5} ({:<.2f}%)'.format(num_bp, 100*num_bp/np.prod(bad_pixels_map.shape))])
    t.add_row(['  Offset out of threshold  : {:<5} ({:<.2f}%)'.format(offset_bp, 100*offset_bp/np.prod(bad_pixels_map.shape))])
    t.add_row(['  Noise out of threshold   : {:<5} ({:<.2f}%)'.format(noise_bp, 100*noise_bp/np.prod(bad_pixels_map.shape))])
    t.add_row(['  Evaluation error         : {:<5} ({:<.2f}%)'.format(eval_error_bp, 100*eval_error_bp/np.prod(bad_pixels_map.shape))])

    return t
 ```

 %% Cell type:code id: tags:

 ``` python
 # Add bad pixels from darks to constant_maps
 constant_maps['BadPixelsDark'] = np.zeros(constant_maps['Offset'].shape, np.uint32)

 # Find noise related bad pixels
-constant_maps['BadPixelsDark'][eval_bpidx(constant_maps['Noise'], badpixel_threshold_sigma, sensor_size//2)] = BadPixels.NOISE_OUT_OF_THRESHOLD.value
+constant_maps['BadPixelsDark'][eval_bpidx(constant_maps['Noise'], badpixel_noise_sigma, sensor_size//2)] = BadPixels.NOISE_OUT_OF_THRESHOLD.value
 constant_maps['BadPixelsDark'][~np.isfinite(constant_maps['Noise'])] = BadPixels.OFFSET_NOISE_EVAL_ERROR.value

 # Find offset related bad pixels
-constant_maps['BadPixelsDark'][eval_bpidx(constant_maps['Offset'], badpixel_threshold_sigma, sensor_size//2)] = BadPixels.OFFSET_OUT_OF_THRESHOLD.value
+constant_maps['BadPixelsDark'][eval_bpidx(constant_maps['Offset'], badpixel_offset_sigma, sensor_size//2)] = BadPixels.OFFSET_OUT_OF_THRESHOLD.value
 constant_maps['BadPixelsDark'][~np.isfinite(constant_maps['Offset'])] = BadPixels.OFFSET_NOISE_EVAL_ERROR.value

 # Plot Bad Pixels Map
 fig = xana.heatmapPlot(
    np.exp(np.nan_to_num(np.log(constant_maps['BadPixelsDark']), neginf=np.nan)).squeeze(), # convert zeros to NaN
    lut_label='Bad pixel value [ADU]',                                                      # for plotting purposes
    x_label='Column',
    y_label='Row',
    x_range=(0, sensor_size[0]),
    y_range=(0, sensor_size[1])
 )
 fig.suptitle('Initial Bad Pixels Map', x=.5, y=.9, fontsize=16)
 fig.set_size_inches(h=15, w=15)

 step_timer.done_step('Initial Bad Pixels Map created. Elapsed Time')
 print(bp_analysis_table(constant_maps['BadPixelsDark'], title='Initial Bad Pixel Analysis'))
 ```

 %% Cell type:markdown id: tags:

 ## Common Mode Correction

 Common mode correction is applied here to increase sensitivy to bad pixels. This is done in an iterative process. Each iteration is composed by the follwing steps:

 1. Common mode noise is calculated and subtracted from data.
 2. Noise map is recalculated.
 3. Bad pixels are recalulated based on the new noise map.
 4. Data is masked based on the new bad pixels map.

 %% Cell type:code id: tags:

 ``` python
 if CM_N_iterations < 1:
    print('Common mode correction not applied.')
 else:

-    commonModeBlockSize = sensor_size//2
+    commonModeBlockSize = (sensor_size//[8,2]).astype(int) # bank size (x=96,y=354) pixels

    # Instantiate common mode calculators for column and row CM correction
    cmCorrection_col = xcal.CommonModeCorrection(
        sensor_size.tolist(),
        commonModeBlockSize.tolist(),
        'col',
        parallel=False)

    cmCorrection_row = xcal.CommonModeCorrection(
        sensor_size.tolist(),
        commonModeBlockSize.tolist(),
        'row',
        parallel=False)
 ```

 %% Cell type:code id: tags:

 ``` python
 if CM_N_iterations > 0:

    data = data.astype(float) # This conversion is needed for offset subtraction

    # Subtract Offset
    data -= constant_maps['Offset'].squeeze()

    noise_map_corrected = np.copy(constant_maps['Noise'])

    bp_offset = [np.sum(constant_maps['BadPixelsDark']==1)]
    bp_noise = [np.sum(constant_maps['BadPixelsDark']==2)]

    for it in range (0,CM_N_iterations):
        step_timer.start()

        # Mask bad pixels
        BadPixels_mask = np.squeeze(constant_maps['BadPixelsDark'] != 0)
        BadPixels_mask = np.repeat(BadPixels_mask[np.newaxis,...],data.shape[0],axis=0)
        data[BadPixels_mask] = np.nan

        # Common mode correction
        data = np.swapaxes(data,0,-1)
        data = cmCorrection_col.correct(data)
        data = cmCorrection_row.correct(data)
        data = np.swapaxes(data,0,-1)

        # Update noise map
        noise_map_corrected = np.nanstd(data, axis=0)[..., np.newaxis]

        # Update bad pixels map
-        constant_maps['BadPixelsDark'][eval_bpidx(noise_map_corrected, badpixel_threshold_sigma, sensor_size//2)] = BadPixels.NOISE_OUT_OF_THRESHOLD.value
+        constant_maps['BadPixelsDark'][eval_bpidx(noise_map_corrected, badpixel_noise_sigma, sensor_size//2)] = BadPixels.NOISE_OUT_OF_THRESHOLD.value
        bp_offset.append(np.sum(constant_maps['BadPixelsDark']==1))
        bp_noise.append(np.sum(constant_maps['BadPixelsDark']==2))

        print(bp_analysis_table(constant_maps['BadPixelsDark'], title=f'{it+1} CM correction iterations'))
        step_timer.done_step('Elapsed Time')
        print('\n')

    # Apply final bad pixels mask
    BadPixels_mask = np.broadcast_to(np.squeeze(constant_maps['BadPixelsDark'] != 0), data.shape)
    data[BadPixels_mask] = np.nan

    it = np.arange(0, CM_N_iterations+1)

    plt.figure()
    plt.plot(it, np.sum((bp_offset,bp_noise),axis=0), c = 'black', ls = '--', marker = 'o', label = 'Total')
    plt.plot(it, bp_noise, c = 'red', ls = '--', marker = 'v', label = 'Noise out of threshold')
    plt.plot(it, bp_offset, c = 'blue', ls = '--', marker = 's',label = 'Offset out of threshold')
    plt.xticks(it)
    plt.xlabel('CM correction iteration')
    plt.ylabel('# Bad Pixels')
    plt.legend()
    plt.grid(linestyle = ':')
 ```

 %% Cell type:code id: tags:

 ``` python
 if CM_N_iterations > 0:
    display(Markdown(f'## Common-Mode Corrected Bad Pixels Map\n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 if CM_N_iterations > 0:

    # Plot final bad pixels map
    fig = xana.heatmapPlot(
        np.exp(np.nan_to_num(np.log(constant_maps['BadPixelsDark']),neginf=np.nan)).squeeze(), # convert zeros to NaN
        lut_label='Bad pixel value [ADU]',                                                     # for plotting purposes
        x_label='Column',
        y_label='Row',
        x_range=(0, sensor_size[0]),
        y_range=(0, sensor_size[1])
    )
    fig.suptitle('Final Bad Pixels Map', x=.5, y=.9, fontsize=16)
    fig.set_size_inches(h=15, w=15)

    print(bp_analysis_table(constant_maps['BadPixelsDark'], title='Final Bad Pixels Analysis'))
 ```

 %% Cell type:markdown id: tags:

 ## Calibration Constants DB

 Send the dark constants to the database and/or save them locally.

 %% Cell type:code id: tags:

 ``` python
 # Save constants to DB
 md = None

 for const_name in constant_maps.keys():
    const = getattr(Constants.ePix100, const_name)()
    const.data = constant_maps[const_name].data

    # Set the operating condition
    condition = Conditions.Dark.ePix100(
        bias_voltage=bias_voltage,
        integration_time=integration_time,
        temperature=temperature_k,
        in_vacuum=in_vacuum)

    for parm in condition.parameters:
        if parm.name == "Sensor Temperature":
            parm.lower_deviation = temp_limits
            parm.upper_deviation = temp_limits

    # Get physical detector unit
    db_module = get_pdu_from_db(
        karabo_id=karabo_id,
        karabo_da=karabo_da,
        constant=const,
        condition=condition,
        cal_db_interface=cal_db_interface,
        snapshot_at=creation_time)[0]

    # Inject or save calibration constants
    if db_output:
        md = send_to_db(
            db_module=db_module,
            karabo_id=karabo_id,
            constant=const,
            condition=condition,
            file_loc=file_loc,
            report_path=report,
            cal_db_interface=cal_db_interface,
            creation_time=creation_time,
            timeout=cal_db_timeout
        )
    if local_output:
        md = save_const_to_h5(
            db_module=db_module,
            karabo_id=karabo_id,
            constant=const,
            condition=condition,
            data=const.data,
            file_loc=file_loc,
            report=report,
            creation_time=creation_time,
            out_folder=out_folder
        )
        print(f"Calibration constant {const_name} is stored locally at {out_folder} \n")

 print("Constants parameter conditions are:\n"
    f"• Bias voltage: {bias_voltage}\n"
    f"• Integration time: {integration_time}\n"
    f"• Temperature: {temperature_k}\n"
    f"• In Vacuum: {in_vacuum}\n"
    f"• Creation time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:markdown id: tags:

 # ePix100 Dark Characterization

 Author: European XFEL Detector Group, Version: 2.0

 The following notebook provides dark image analysis and calibration constants of the ePix100 detector.

 Dark characterization evaluates offset and noise of the detector and gives information about bad pixels.

 Noise and bad pixels maps are calculated independently for each of the 4 ASICs of ePix100, since their noise behaviour can be significantly different.

 Common mode correction can be applied to increase sensitivity to noise related bad pixels. Common mode correction is achieved by subtracting the median of all pixels that are read out at the same time along a row/column. This is done in an iterative process, by which a new bad pixels map is calculated and used to mask data as the common mode values across the rows/columns is updated.

 Resulting maps are saved as .h5 files for a later use and injected to calibration DB.

 %% Cell type:code id: tags:

 ``` python
-in_folder = '/gpfs/exfel/exp/HED/202201/p002804/raw' # input folder, required
+in_folder = '/gpfs/exfel/exp/HED/202330/p900338/raw' # input folder, required
 out_folder = '' # output folder, required
 metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequence = 0 # sequence file to use
-run = 281 # which run to read data from, required
+run = 176 # which run to read data from, required

 # Parameters for accessing the raw data.
 karabo_id = "HED_IA1_EPX100-1" # karabo karabo_id
 karabo_da = ["EPIX01"]  # data aggregators
 receiver_template = "RECEIVER" # detector receiver template for accessing raw data files
 path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5' # the template to use to access data
 instrument_source_template = '{}/DET/{}:daqOutput' # instrument detector data source in h5files

 # Parameters for the calibration database.
 use_dir_creation_date = True
 cal_db_interface = "tcp://max-exfl016:8020" # calibration DB interface to use
 cal_db_timeout = 300000 # timeout on caldb requests
 db_output = False # Output constants to the calibration database
 local_output = True # Output constants locally

 # Conditions used for injected calibration constants.
 bias_voltage = 200 # Bias voltage
 in_vacuum = False # Detector operated in vacuum
 fix_integration_time = -1 # Integration time. Set to -1 to read from .h5 file
 fix_temperature = -1 # Fixed temperature in Kelvin. Set to -1 to read from .h5 file
 temp_limits = 5 # Limit for parameter Operational temperature
-badpixel_threshold_sigma = 5.  # Bad pixels defined by values outside n times this std from median. Default: 5
+badpixel_noise_sigma = 5  # Bad pixels defined by noise value outside n * std from median. Default: 5
+badpixel_offset_sigma = 2  # Bad pixels defined by offset value outside n * std from median. Default: 2
 CM_N_iterations = 2  # Number of iterations for common mode correction. Set to 0 to skip it

 # Parameters used during selecting raw data trains.
 min_trains = 1 # Minimum number of trains that should be available to process dark constants. Default 1.
 max_trains = 1000  # Maximum number of trains to use for processing dark constants. Set to 0 to use all available trains.

 # Don't delete! myMDC sends this by default.
 operation_mode = ''  # Detector operation mode, optional

 # TODO: delete after removing from calibration_configurations
 db_module = ''  # ID of module in calibration database, this parameter is ignore in the notebook. TODO: remove from calibration_configurations.
 ```

 %% Cell type:code id: tags:

 ``` python
 import os
 import warnings

 import matplotlib.pyplot as plt
 import numpy as np
 from IPython.display import display, Markdown
 from prettytable import PrettyTable
 from extra_data import RunDirectory
 from pathlib import Path

 import XFELDetAna.xfelprofiler as xprof
 from XFELDetAna import xfelpyanatools as xana
 from XFELDetAna import xfelpycaltools as xcal
 from XFELDetAna.plotting.util import prettyPlotting

 from cal_tools.enums import BadPixels
 from cal_tools.step_timing import StepTimer
 from cal_tools.epix100 import epix100lib
 from cal_tools.tools import (
    get_dir_creation_date,
    get_pdu_from_db,
    get_report,
    save_const_to_h5,
    send_to_db,
 )
 from iCalibrationDB import Conditions, Constants
 ```

 %% Cell type:code id: tags:

 ``` python
 %matplotlib inline

 warnings.filterwarnings('ignore')

 prettyPlotting = True

 profiler = xprof.Profiler()
 profiler.disable()

 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read report path and create file location tuple to add with the injection
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = f'proposal:{proposal} runs:{run}'
 report = get_report(metadata_folder)

 ped_dir = os.path.join(in_folder, f"r{run:04d}")
 fp_name = path_template.format(run, karabo_da[0]).format(sequence)
 run_dir = RunDirectory(ped_dir)

 print(f"Reading from: {Path(ped_dir) / fp_name}")
 print(f"Run number: {run}")
 print(f"Instrument H5File source: {instrument_src}")
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time.isoformat()} as creation time")
 os.makedirs(out_folder, exist_ok=True)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read sensor size
 sensor_size = run_dir[instrument_src, 'data.image.dims'].as_single_value(reduce_by='first') # (x=768, y=708) expected
 sensor_size = sensor_size[sensor_size != 1] # data.image.dims for old data is [768, 708, 1]
 assert np.array_equal(sensor_size, [768, 708]), 'Unexpected sensor dimensions.'

 # Path to pixels ADC values
 pixels_src = (instrument_src, "data.image.pixels")

 # Specifies total number of images to proceed
 n_trains = run_dir.get_data_counts(*pixels_src).shape[0]

 # Modify n_trains to process based on given maximum
 # and minimun number of trains.
 if max_trains:
    n_trains = min(max_trains, n_trains)

 if n_trains < min_trains:
    raise ValueError(
        f"Less than {min_trains} trains are available in RAW data."
         " Not enough data to process darks.")

 print(f"Number of dark images to analyze: {n_trains}")
 ```

 %% Cell type:code id: tags:

 ``` python
 ctrl_data = epix100lib.epix100Ctrl(
    run_dc=run_dir,
    instrument_src=instrument_src,
    ctrl_src=f"{karabo_id}/DET/CONTROL",
    )
 # Read integration time
 if fix_integration_time == -1:
    integration_time = ctrl_data.get_integration_time()
    integration_time_str_add = ''
 else:
    integration_time = fix_integration_time
    integration_time_str_add = '(manual input)'

 # Read temperature
 if fix_temperature == -1:
    temperature = ctrl_data.get_temprature()
    temperature_k = temperature + 273.15
    temp_str_add = ''
 else:
    temperature_k = fix_temperature
    temperature = fix_temperature - 273.15
    temp_str_add = '(manual input)'

 # Print operating conditions
 print(f"Bias voltage: {bias_voltage} V")
 print(f"Detector integration time: {integration_time} \u03BCs {integration_time_str_add}")
 print(f"Mean temperature: {temperature:0.2f}°C / {temperature_k:0.2f} K {temp_str_add}")
 print(f"Operated in vacuum: {in_vacuum}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Passing repetitive code along the notebook to a function
 def stats_calc(data):
    '''
    Calculates basic statistical parameters of the input data:
    mean, standard deviation, median, minimum and maximum.

    Returns dictionary with keys: 'mean', 'std', 'median',
    'min', 'max' and 'legend'.

    Parameters
    ----------
    data : ndarray
           Data to analyse.
    '''

    stats = {}
    stats['mean'] = np.nanmean(data)
    stats['std'] = np.nanstd(data)
    stats['median'] = np.nanmedian(data)
    stats['min'] = np.nanmin(data)
    stats['max'] = np.nanmax(data)
    stats['legend'] = ('mean: ' + str(np.round(stats['mean'],2)) +
                       '\nstd: ' + str(np.round(stats['std'],2)) +
                       '\nmedian: ' + str(np.round(stats['median'],2)) +
                       '\nmin: ' + str(np.round(stats['min'],2)) +
                       '\nmax: ' + str(np.round(stats['max'],2)))
    return stats
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 step_timer.start()

 # Read data
 data_dc = run_dir.select(
    *pixels_src, require_all=True).select_trains(np.s_[:n_trains])

 data = data_dc[pixels_src].ndarray()

 # Instantiate constant maps to be filled with offset, noise and bad pixels maps
 constant_maps = {}
 step_timer.done_step('Darks loaded. Elapsed Time')
 ```

 %% Cell type:markdown id: tags:

 ## Offset Map

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 # Calculate offset per pixel and store it in constant_maps
 constant_maps['Offset'] = np.nanmean(data, axis=0)[..., np.newaxis]

 # Calculate basic statistical parameters
 stats = stats_calc(constant_maps['Offset'].flatten())

 # Offset map
 fig = xana.heatmapPlot(
    constant_maps['Offset'].squeeze(),
    lut_label='[ADU]',
    x_label='Column',
    y_label='Row',
    vmin=max(0, np.round((stats['median']-stats['std'])/250)*250), # Force cb label to be multiple of 250 for reproducibility
    vmax=min(np.power(2,14)-1, np.round((stats['median']+stats['std'])/250)*250)
 )
 fig.suptitle('Offset Map', x=.48, y=.9, fontsize=16)
 fig.set_size_inches(h=15, w=15)

 # Offset Histogram
 h, c = np.histogram(
    constant_maps['Offset'].flatten(),
    bins = np.arange(stats['min'], stats['max'], stats['std']/100)
 )
 d = {'x': c[:-1],
     'y': h,
     'color': 'blue'
     }

 fig = xana.simplePlot(
    d,
    aspect=1.5,
    x_label='Offset [ADU]',
    y_label='Counts',
    x_range=(0, np.power(2,14)-1),
    y_range=(0, max(h)*1.1),
    y_log=True
 )
 fig.text(
    s=stats['legend'],
    x=.7,
    y=.7,
    fontsize=14,
    bbox=dict(facecolor='yellow', edgecolor='black', alpha=.1)
 )
 fig.suptitle('Offset Map Histogram', x=.5,y =.92, fontsize=16)

 step_timer.done_step('Offset Map created. Elapsed Time')
 ```

 %% Cell type:markdown id: tags:

 ## Noise Map

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 # Calculate noise per pixel and store it in constant_maps
 constant_maps['Noise'] = np.nanstd(data, axis=0)[..., np.newaxis]

 # Calculate basic statistical parameters
 stats = stats_calc(constant_maps['Noise'].flatten())

 # Noise heat map
 fig = xana.heatmapPlot(
    constant_maps['Noise'].squeeze(),
    lut_label='[ADU]',
    x_label='Column',
    y_label='Row',
-    vmin=max(0, np.round((stats['median'] - badpixel_threshold_sigma*stats['std']))),
-    vmax=np.round(stats['median'] + badpixel_threshold_sigma*stats['std'])
+    vmin=max(0, np.round((stats['median'] - badpixel_noise_sigma*stats['std']))),
+    vmax=np.round(stats['median'] + badpixel_noise_sigma*stats['std'])
 )
 fig.suptitle('Noise Map', x=.5, y=.9, fontsize=16)
 fig.set_size_inches(h=15, w=15)

 # Calculate overall noise histogram
-bins = np.arange(max(0, stats['mean'] - badpixel_threshold_sigma*stats['std']),
-                 stats['mean'] + badpixel_threshold_sigma*stats['std'],
+bins = np.arange(max(0, stats['mean'] - badpixel_noise_sigma*stats['std']),
+                 stats['mean'] + badpixel_noise_sigma*stats['std'],
                 stats['std']/100)

 h, c = np.histogram(
    constant_maps['Noise'].flatten(),
    bins = bins
 )
 d = [{'x': c[:-1],
      'y': h,
      'color': 'black',
      'label': 'Total'
     }]

 # Calculate per ASIC histogram
 asic = []
 asic.append({'noise': constant_maps['Noise'][:int(sensor_size[1]//2), int(sensor_size[0]//2):],
            'label': 'ASIC 0 (bottom right)',
            'color': 'blue'})
 asic.append({'noise': constant_maps['Noise'][int(sensor_size[1]//2):, int(sensor_size[0]//2):],
            'label': 'ASIC 1 (top right)',
            'color': 'red'})
 asic.append({'noise': constant_maps['Noise'][int(sensor_size[1]//2):, :int(sensor_size[0]//2)],
            'label': 'ASIC 2 (top left)',
            'color': 'green'})
 asic.append({'noise': constant_maps['Noise'][:int(sensor_size[1]//2), :int(sensor_size[0]//2)],
            'label': 'ASIC 3 (bottom left)',
            'color': 'magenta'})

 min_std = np.inf
 for a in asic:
    h, c = np.histogram(a['noise'].flatten(), bins=bins)
    d.append({'x': c[:-1], 'y': h, 'color': a['color'], 'label': a['label']})
    min_std = np.nanmin((min_std, np.nanstd(a['noise'])))
    print(a['label'][:6] +
          ': median = ' + "{:.2f}".format(np.round(np.nanmedian(a['noise']),2)) +
          ' | std = ' + "{:.2f}".format(np.round(np.nanstd(a['noise']),2)) +
          a['label'][6:])

 arg_max_median = 0
 for i in range(0,np.size(d)):
    arg_max_median = np.max((arg_max_median, np.argmax(d[i]['y'])))

 # Plot noise histogram
 fig = xana.simplePlot(
    d,
    aspect=1.5,
    x_label='Noise [ADU]',
    y_label='Counts',
-    x_range=(max(0, stats['median'] - badpixel_threshold_sigma*stats['std']),
-             stats['median'] + badpixel_threshold_sigma*stats['std']),
+    x_range=(max(0, stats['median'] - badpixel_noise_sigma*stats['std']),
+             stats['median'] + badpixel_noise_sigma*stats['std']),
    y_range=(0, max(d[0]['y'])*1.1),
 )
 plt.grid(linestyle = ':')
 leg = fig.legend(bbox_to_anchor=(.42, .88),fontsize = 14)

 fig.text(
    s=stats['legend'],
    x=.75,
    y=.7,
    fontsize=14,
    bbox=dict(facecolor='yellow', edgecolor='black', alpha=.1)
 )
 fig.suptitle('Noise Map Histogram',x=.5,y=.92,fontsize=16)

 step_timer.done_step('\nNoise Map created. Elapsed Time')
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels

 The bad pixel map is deduced by comparing offset and noise of each pixel ($v_i$) against the median value of the respective maps for each ASIC ($v_k$):

 $$
 v_i > \mathrm{median}(v_{k}) + n \sigma_{v_{k}}
 $$
 or
 $$
 v_i < \mathrm{median}(v_{k}) - n \sigma_{v_{k}}
 $$

 Values are encoded in a 32 bit mask, where for the dark image deduced bad pixels the following non-zero entries are relevant:

 %% Cell type:code id: tags:

 ``` python
 def print_bp_entry(bp):
    '''
    Prints bad pixels bit encoding.

    Parameters
    ----------
    bp : enum 'BadPixels'
    '''
    print('{:<23s}: {:032b} ({})'.format(bp.name, bp.value, bp.real))

 print_bp_entry(BadPixels.OFFSET_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.NOISE_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.OFFSET_NOISE_EVAL_ERROR)
 ```

 %% Cell type:code id: tags:

 ``` python
 def eval_bpidx(const_map, n_sigma, block_size):
    '''
    Evaluates bad pixels by comparing the average offset and
    noise of each pixel against the median value of the
    respective maps of each ASIC.

    Returns boolean array.

    Parameters
    ----------
    const_map : ndarray
                Offset or noise constant map to input.
    n_sigma : float
              Standard deviation multiplicity interval outside
              which bad pixels are defined.
    block_size : ndarray
                 dimensions ([x,y]) of each ASIC.
    '''

    blocks = {} # Each block corresponds to 1 ASIC
    blocks['0'] = const_map[:int(block_size[1]), int(block_size[0]):] # bottom right
    blocks['1'] = const_map[int(block_size[1]):, int(block_size[0]):] # top right
    blocks['2'] = const_map[int(block_size[1]):, :int(block_size[0])] # top left
    blocks['3'] = const_map[:int(block_size[1]), :int(block_size[0])] # bottom left

    idx = {}
    for b in range(0, len(blocks)):
        mdn = np.nanmedian(blocks[str(b)])
        std = np.nanstd(blocks[str(b)])
        idx[str(b)] = ( (blocks[str(b)] > mdn + n_sigma*std) | (blocks[str(b)] < mdn - n_sigma*std) )

    idx_output = np.zeros(const_map.shape, dtype=bool)
    idx_output[:int(block_size[1]), int(block_size[0]):] = idx['0']
    idx_output[int(block_size[1]):, int(block_size[0]):] = idx['1']
    idx_output[int(block_size[1]):, :int(block_size[0])] = idx['2']
    idx_output[:int(block_size[1]), :int(block_size[0])] = idx['3']

    return idx_output
 ```

 %% Cell type:code id: tags:

 ``` python
 def bp_analysis_table(bad_pixels_map, title = ''):
    '''
    Prints a table with short analysis of the number and
    percentage of bad pixels on count of offset or noise
    out of threshold, or evaluation error.

    Returns PrettyTable.

    Parameters
    ----------
    bad_pixels_map : ndarray
                     Bad pixel map to analyse.
    title : string, optional
            Table title to be used.
    '''

    num_bp = np.sum(bad_pixels_map!=0)
    offset_bp = np.sum(bad_pixels_map==1)
    noise_bp = np.sum(bad_pixels_map==2)
    eval_error_bp = np.sum(bad_pixels_map==4)

    t = PrettyTable()
    t.field_names = [title]
    t.add_row(['Total number of Bad Pixels : {:<5} ({:<.2f}%)'.format(num_bp, 100*num_bp/np.prod(bad_pixels_map.shape))])
    t.add_row(['  Offset out of threshold  : {:<5} ({:<.2f}%)'.format(offset_bp, 100*offset_bp/np.prod(bad_pixels_map.shape))])
    t.add_row(['  Noise out of threshold   : {:<5} ({:<.2f}%)'.format(noise_bp, 100*noise_bp/np.prod(bad_pixels_map.shape))])
    t.add_row(['  Evaluation error         : {:<5} ({:<.2f}%)'.format(eval_error_bp, 100*eval_error_bp/np.prod(bad_pixels_map.shape))])

    return t
 ```

 %% Cell type:code id: tags:

 ``` python
 # Add bad pixels from darks to constant_maps
 constant_maps['BadPixelsDark'] = np.zeros(constant_maps['Offset'].shape, np.uint32)

 # Find noise related bad pixels
-constant_maps['BadPixelsDark'][eval_bpidx(constant_maps['Noise'], badpixel_threshold_sigma, sensor_size//2)] = BadPixels.NOISE_OUT_OF_THRESHOLD.value
+constant_maps['BadPixelsDark'][eval_bpidx(constant_maps['Noise'], badpixel_noise_sigma, sensor_size//2)] = BadPixels.NOISE_OUT_OF_THRESHOLD.value
 constant_maps['BadPixelsDark'][~np.isfinite(constant_maps['Noise'])] = BadPixels.OFFSET_NOISE_EVAL_ERROR.value

 # Find offset related bad pixels
-constant_maps['BadPixelsDark'][eval_bpidx(constant_maps['Offset'], badpixel_threshold_sigma, sensor_size//2)] = BadPixels.OFFSET_OUT_OF_THRESHOLD.value
+constant_maps['BadPixelsDark'][eval_bpidx(constant_maps['Offset'], badpixel_offset_sigma, sensor_size//2)] = BadPixels.OFFSET_OUT_OF_THRESHOLD.value
 constant_maps['BadPixelsDark'][~np.isfinite(constant_maps['Offset'])] = BadPixels.OFFSET_NOISE_EVAL_ERROR.value

 # Plot Bad Pixels Map
 fig = xana.heatmapPlot(
    np.exp(np.nan_to_num(np.log(constant_maps['BadPixelsDark']), neginf=np.nan)).squeeze(), # convert zeros to NaN
    lut_label='Bad pixel value [ADU]',                                                      # for plotting purposes
    x_label='Column',
    y_label='Row',
    x_range=(0, sensor_size[0]),
    y_range=(0, sensor_size[1])
 )
 fig.suptitle('Initial Bad Pixels Map', x=.5, y=.9, fontsize=16)
 fig.set_size_inches(h=15, w=15)

 step_timer.done_step('Initial Bad Pixels Map created. Elapsed Time')
 print(bp_analysis_table(constant_maps['BadPixelsDark'], title='Initial Bad Pixel Analysis'))
 ```

 %% Cell type:markdown id: tags:

 ## Common Mode Correction

 Common mode correction is applied here to increase sensitivy to bad pixels. This is done in an iterative process. Each iteration is composed by the follwing steps:

 1. Common mode noise is calculated and subtracted from data.
 2. Noise map is recalculated.
 3. Bad pixels are recalulated based on the new noise map.
 4. Data is masked based on the new bad pixels map.

 %% Cell type:code id: tags:

 ``` python
 if CM_N_iterations < 1:
    print('Common mode correction not applied.')
 else:

-    commonModeBlockSize = sensor_size//2
+    commonModeBlockSize = (sensor_size//[8,2]).astype(int) # bank size (x=96,y=354) pixels

    # Instantiate common mode calculators for column and row CM correction
    cmCorrection_col = xcal.CommonModeCorrection(
        sensor_size.tolist(),
        commonModeBlockSize.tolist(),
        'col',
        parallel=False)

    cmCorrection_row = xcal.CommonModeCorrection(
        sensor_size.tolist(),
        commonModeBlockSize.tolist(),
        'row',
        parallel=False)
 ```

 %% Cell type:code id: tags:

 ``` python
 if CM_N_iterations > 0:

    data = data.astype(float) # This conversion is needed for offset subtraction

    # Subtract Offset
    data -= constant_maps['Offset'].squeeze()

    noise_map_corrected = np.copy(constant_maps['Noise'])

    bp_offset = [np.sum(constant_maps['BadPixelsDark']==1)]
    bp_noise = [np.sum(constant_maps['BadPixelsDark']==2)]

    for it in range (0,CM_N_iterations):
        step_timer.start()

        # Mask bad pixels
        BadPixels_mask = np.squeeze(constant_maps['BadPixelsDark'] != 0)
        BadPixels_mask = np.repeat(BadPixels_mask[np.newaxis,...],data.shape[0],axis=0)
        data[BadPixels_mask] = np.nan

        # Common mode correction
        data = np.swapaxes(data,0,-1)
        data = cmCorrection_col.correct(data)
        data = cmCorrection_row.correct(data)
        data = np.swapaxes(data,0,-1)

        # Update noise map
        noise_map_corrected = np.nanstd(data, axis=0)[..., np.newaxis]

        # Update bad pixels map
-        constant_maps['BadPixelsDark'][eval_bpidx(noise_map_corrected, badpixel_threshold_sigma, sensor_size//2)] = BadPixels.NOISE_OUT_OF_THRESHOLD.value
+        constant_maps['BadPixelsDark'][eval_bpidx(noise_map_corrected, badpixel_noise_sigma, sensor_size//2)] = BadPixels.NOISE_OUT_OF_THRESHOLD.value
        bp_offset.append(np.sum(constant_maps['BadPixelsDark']==1))
        bp_noise.append(np.sum(constant_maps['BadPixelsDark']==2))

        print(bp_analysis_table(constant_maps['BadPixelsDark'], title=f'{it+1} CM correction iterations'))
        step_timer.done_step('Elapsed Time')
        print('\n')

    # Apply final bad pixels mask
    BadPixels_mask = np.broadcast_to(np.squeeze(constant_maps['BadPixelsDark'] != 0), data.shape)
    data[BadPixels_mask] = np.nan

    it = np.arange(0, CM_N_iterations+1)

    plt.figure()
    plt.plot(it, np.sum((bp_offset,bp_noise),axis=0), c = 'black', ls = '--', marker = 'o', label = 'Total')
    plt.plot(it, bp_noise, c = 'red', ls = '--', marker = 'v', label = 'Noise out of threshold')
    plt.plot(it, bp_offset, c = 'blue', ls = '--', marker = 's',label = 'Offset out of threshold')
    plt.xticks(it)
    plt.xlabel('CM correction iteration')
    plt.ylabel('# Bad Pixels')
    plt.legend()
    plt.grid(linestyle = ':')
 ```

 %% Cell type:code id: tags:

 ``` python
 if CM_N_iterations > 0:
    display(Markdown(f'## Common-Mode Corrected Bad Pixels Map\n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 if CM_N_iterations > 0:

    # Plot final bad pixels map
    fig = xana.heatmapPlot(
        np.exp(np.nan_to_num(np.log(constant_maps['BadPixelsDark']),neginf=np.nan)).squeeze(), # convert zeros to NaN
        lut_label='Bad pixel value [ADU]',                                                     # for plotting purposes
        x_label='Column',
        y_label='Row',
        x_range=(0, sensor_size[0]),
        y_range=(0, sensor_size[1])
    )
    fig.suptitle('Final Bad Pixels Map', x=.5, y=.9, fontsize=16)
    fig.set_size_inches(h=15, w=15)

    print(bp_analysis_table(constant_maps['BadPixelsDark'], title='Final Bad Pixels Analysis'))
 ```

 %% Cell type:markdown id: tags:

 ## Calibration Constants DB

 Send the dark constants to the database and/or save them locally.

 %% Cell type:code id: tags:

 ``` python
 # Save constants to DB
 md = None

 for const_name in constant_maps.keys():
    const = getattr(Constants.ePix100, const_name)()
    const.data = constant_maps[const_name].data

    # Set the operating condition
    condition = Conditions.Dark.ePix100(
        bias_voltage=bias_voltage,
        integration_time=integration_time,
        temperature=temperature_k,
        in_vacuum=in_vacuum)

    for parm in condition.parameters:
        if parm.name == "Sensor Temperature":
            parm.lower_deviation = temp_limits
            parm.upper_deviation = temp_limits

    # Get physical detector unit
    db_module = get_pdu_from_db(
        karabo_id=karabo_id,
        karabo_da=karabo_da,
        constant=const,
        condition=condition,
        cal_db_interface=cal_db_interface,
        snapshot_at=creation_time)[0]

    # Inject or save calibration constants
    if db_output:
        md = send_to_db(
            db_module=db_module,
            karabo_id=karabo_id,
            constant=const,
            condition=condition,
            file_loc=file_loc,
            report_path=report,
            cal_db_interface=cal_db_interface,
            creation_time=creation_time,
            timeout=cal_db_timeout
        )
    if local_output:
        md = save_const_to_h5(
            db_module=db_module,
            karabo_id=karabo_id,
            constant=const,
            condition=condition,
            data=const.data,
            file_loc=file_loc,
            report=report,
            creation_time=creation_time,
            out_folder=out_folder
        )
        print(f"Calibration constant {const_name} is stored locally at {out_folder} \n")

 print("Constants parameter conditions are:\n"
    f"• Bias voltage: {bias_voltage}\n"
    f"• Integration time: {integration_time}\n"
    f"• Temperature: {temperature_k}\n"
    f"• In Vacuum: {in_vacuum}\n"
    f"• Creation time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

--- a/notebooks/pnCCD/Correct_pnCCD_NBC.ipynb
+++ b/notebooks/pnCCD/Correct_pnCCD_NBC.ipynb
 %% Cell type:markdown id: tags:

 # pnCCD Data Correction #

 Authors: DET Group, Modified by Kiana Setoodehnia - Version 5.0

 The following notebook provides offset, common mode, relative gain, split events and pattern classification corrections of images acquired with the pnCCD. This notebook *does not* yet correct for charge transfer inefficiency.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SQS/202031/p900166/raw"  # input folder
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/remove/pnccd_correct"  # output folder
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run = 347  # which run to read data from
 sequences = [-1]  # sequences to correct, set to -1 for all, range allowed
 sequences_per_node = 1  # number of sequences running on the same slurm node.

 karabo_da = 'PNCCD01' # data aggregators
 karabo_id = "SQS_NQS_PNCCD1MP" # karabo prefix of PNCCD devices
 receiver_id = "PNCCD_FMT-0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5'  # the template to use to access data
 instrument_source_template = '{}/CAL/{}:output'  # template for data source name, will be filled with karabo_id and receiver_id.

 # Parameters affecting data correction.
 commonModeAxis = 0 # axis along which common mode will be calculated, 0 = row, and 1 = column
 commonModeBlockSize = [512, 512] # size of the detector in pixels for common mode calculations
 split_evt_primary_threshold = 4. # primary threshold for split event classification in terms of n sigma noise
 split_evt_secondary_threshold = 3. # secondary threshold for split event classification in terms of n sigma noise
 saturated_threshold = 32000.  # full well capacity in ADU

 # Conditions for retrieving calibration constants
 fix_temperature_top = 0. # fix temperature for top sensor in K, set to 0. to use value from slow data.
 fix_temperature_bot = 0. # fix temperature for bottom senspr in K, set to 0. to use value from slow data.
 gain = -1  # the detector's gain setting. Set to -1 to use the value from the slow data.
 bias_voltage = 0. # the detector's bias voltage. set to 0. to use value from slow data.
 integration_time = 70  # detector's integration time
 photon_energy = 1.6 # Al fluorescence in keV

 # Parameters for the calibration database.
 cal_db_interface = "tcp://max-exfl016:8015" # calibration DB interface to use
 cal_db_timeout = 300000 # timeout on caldb requests
 creation_time = ""  # The timestamp to use with Calibration DB. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41
+remove_bias_voltage_if_zero = True  # This flag enables removing bias voltage from the conditions if a 0 value is read from RAW data. This is useful when the corresponding constants for old RAW had no bias voltage because of a mistake in control data. e.g. p002857

 # Booleans for selecting corrections to apply.
 only_offset = False # Only, apply offset.
 common_mode = True # Apply common mode correction
 relgain = True # Apply relative gain correction
 pattern_classification = True  # classify split events

 # parameters affecting stored output data.
 chunk_size_idim = 1  # H5 chunking size of output data
 limit_trains = 0  # this parameter is used for limiting number of images to correct from a sequence file.


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Here the herarichy and dependability for correction booleans are defined
 corr_bools = {}

 corr_bools["only_offset"] = only_offset

 # Apply offset only.
 if not only_offset:
    corr_bools["relgain"] = relgain
    corr_bools["common_mode"] = common_mode
    corr_bools["pattern_class"] = pattern_classification
 ```

 %% Cell type:code id: tags:

 ``` python
 import os
 import sys
 import warnings
 from logging import warning
 from pathlib import Path
 warnings.filterwarnings('ignore')

-import h5py
 import matplotlib.pyplot as plt
 import numpy as np
 import pasha as psh
 from IPython.display import Markdown, display
 from extra_data import H5File, RunDirectory
 from prettytable import PrettyTable

 %matplotlib inline

 import cal_tools.restful_config as rest_cfg
 from XFELDetAna import xfelpyanatools as xana
 from XFELDetAna import xfelpycaltools as xcal
 from cal_tools import pnccdlib
 from cal_tools.files import DataFile
 from cal_tools.calcat_interface import CalCatError, PNCCD_CalibrationData

 from cal_tools.tools import (
    calcat_creation_time,
    write_constants_fragment,
 )
 from cal_tools.step_timing import StepTimer
 ```

 %% Cell type:code id: tags:

 ``` python
 # Calibration Database Settings, and Some Initial Run Parameters & Paths:
 display(Markdown('### Initial Settings and Paths'))

 # Sensor size and block size definitions (important for common mode and other corrections):
 pixels_x = 1024  # rows of pnCCD in pixels
 pixels_y = 1024  # columns of pnCCD in pixels
 in_folder = Path(in_folder)
 sensorSize = [pixels_x, pixels_y]
 # For xcal.HistogramCalculators.
 blockSize = [pixels_x//2, pixels_y//2]  # sensor area will be analysed according to blockSize.

 print(f"pnCCD size is: {pixels_x}x{pixels_y} pixels.")
 print(f'Calibration database interface selected: {cal_db_interface}')

 # Paths to the data:
 instrument_src = instrument_source_template.format(karabo_id, receiver_id)
 print(f"Instrument H5File source: {instrument_src}\n")

 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f"Creation time: {creation_time}")
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id: tags:

 ``` python
 run_dc = RunDirectory(in_folder / f"r{run:04d}", _use_voview=False)

 # Output Folder Creation:
 os.makedirs(out_folder, exist_ok=True)

 # extract control data
 step_timer.start()
-
 ctrl_data = pnccdlib.PnccdCtrl(run_dc, karabo_id)
 if bias_voltage == 0.:
    bias_voltage = ctrl_data.get_bias_voltage()
 if gain == -1:
    gain = ctrl_data.get_gain()
 if fix_temperature_top == 0:
    fix_temperature_top = ctrl_data.get_fix_temperature_top()
 if fix_temperature_bot == 0:
    fix_temperature_bot = ctrl_data.get_fix_temperature_bot()

 step_timer.done_step("Reading control parameters.")

 # Printing the Parameters Read from the Data File:
 display(Markdown('### Detector Parameters'))
 print(f"Bias voltage is {bias_voltage:0.1f} V.")
 print(f"Detector gain is set to 1/{int(gain)}.")
 print(f"Detector integration time is set to {integration_time} ms")
 print(f"Top pnCCD sensor is at temperature of {fix_temperature_top:0.2f} K")
 print(f"Bottom pnCCD sensor is at temperature of {fix_temperature_bot:0.2f} K")
 ```

 %% Cell type:code id: tags:

 ``` python
 seq_files = []
 for f in run_dc.select(instrument_src).files:
    fpath = Path(f.filename)
    if fpath.match(f"*{karabo_da}*.h5"):
        seq_files.append(fpath)
 if sequences != [-1]:
    seq_files = sorted([f for f in seq_files if any(f.match(f"*-S{s:05d}.h5") for s in sequences)])
 print(f"Processing a total of {len(seq_files)} sequence files:")
 print(*seq_files, sep='\n')
 ```

 %% Cell type:code id: tags:

 ``` python
 gain_k = [k for k, v in pnccdlib.VALID_GAINS.items() if v == gain][0]
 if gain_k == 'a':
    split_evt_mip_threshold = 1000. # MIP threshold in ADU for event classification (10 times average noise)

    # Each xcal.HistogramCalculator requires a total number of bins and a binning range. We define these
    # using a dictionary:

    # For all xcal histograms:

    Hist_Bin_Dict = {
        "bins": 35000, # number of bins
        "bin_range": [0, 35000]
    }

    # For the numpy histograms on the last cell of the notebook:
    Event_Bin_Dict = {
        "event_bins": 1000, # number of bins
        "b_range": [0, 35000] # bin range
    }
 elif gain_k == 'b':
    split_evt_mip_threshold = 270. # 10 times the average noise
    Hist_Bin_Dict = {
        "bins": 10000,
        "bin_range": [0, 10000]
    }
    Event_Bin_Dict = {
        "event_bins": 1000,
        "b_range": [0, 10000]
    }
 elif gain_k == 'c':
    split_evt_mip_threshold = 110. # 10 times the average noise
    Hist_Bin_Dict = {
        "bins": 3000,
        "bin_range": [0, 3000]
    }
    Event_Bin_Dict = {
        "event_bins": 1000,
        "b_range": [0, 3000]
    }
 elif gain_k == 'd':
    split_evt_mip_threshold = 90. # 10 times the average noise
    Hist_Bin_Dict = {
        "bins": 1000,
        "bin_range": [0, 1000]
    }
    Event_Bin_Dict = {
        "event_bins": 1000,
        "b_range": [0, 1000]
    }
 elif gain_k == 'e':
    split_evt_mip_threshold = 90. # 10 times the average noise
    Hist_Bin_Dict = {
        "bins": 500,
        "bin_range": [0, 500]
    }
    Event_Bin_Dict = {
        "event_bins": 500,
        "b_range": [0, 500]
    }
 else:
    split_evt_mip_threshold = 90. # 10 times the average noise
    Hist_Bin_Dict = {
        "bins": 220,
        "bin_range": [0, 220]
    }
    Event_Bin_Dict = {
        "event_bins": 220,
        "b_range": [0, 220]
    }

 bins = Hist_Bin_Dict["bins"]
 bin_range = Hist_Bin_Dict["bin_range"]
 event_bins = Event_Bin_Dict["event_bins"]
 b_range = Event_Bin_Dict["b_range"]
 ```

 %% Cell type:markdown id: tags:

 As a first step, calibration constants have to be retrieved from the calibration database

 %% Cell type:code id: tags:

 ``` python
 display(Markdown("### Constants retrieval"))
 step_timer.start()
-constant_names = ["OffsetCCD", "NoiseCCD", "BadPixelsDarkCCD"]
-if relgain:
-    constant_names += ["RelativeGainCCD"]
+
+# In the case of an older proposal (e.g., proposal 002857),
+# it is possible that the bias voltage was 0
+# resulting in the absence of bias voltage values in
+# the previously injected dark constants. This situation can be
+# attributed to a feature that is currently not available in iCalibrationDB.
+if bias_voltage == 0 and remove_bias_voltage_if_zero:
+    bias_voltage = None

 pnccd_cal = PNCCD_CalibrationData(
    detector_name=karabo_id,
    sensor_bias_voltage=bias_voltage,
    integration_time=integration_time,
    sensor_temperature=fix_temperature_top,
    gain_setting=gain,
    event_at=creation_time,
    source_energy=photon_energy,
    client=rest_cfg.calibration_client(),
 )

-try:
-    pnccd_metadata = pnccd_cal.metadata(calibrations=constant_names)
-except CalCatError as e:
-    warning(e)
+pnccd_metadata = pnccd_cal.metadata(calibrations=pnccd_cal.dark_calibrations)
+
+if relgain:
+    try:
+        gain_metadata = pnccd_cal.metadata(calibrations=["RelativeGainCCD"])
+        for mod, md in gain_metadata.items():
+            pnccd_metadata[mod].update(md)
+
+    except CalCatError as e:  # TODO: fix after getting new exceptions.
+        warning(f"{e} While asking for {pnccd_cal.illuminated_calibrations}")
+        warning("RelativeGainEPix100 is not retrieved from the calibration database. "
+                "Relative gain correction is disabled.")
+        corr_bools['relgain'] = False

 mod_to_pdu = pnccd_cal.mod_to_pdu
 ccvs_url = "https://in.xfel.eu/calibration/calibration_constant_versions/"
 for mod, mod_md in pnccd_metadata.items():
    display(Markdown(f"{mod}({mod_to_pdu[mod]}):"))
    for cname, c_mdata in mod_md.items():
        display(Markdown(
            f"- [{cname}]({ccvs_url}/{c_mdata['ccv_id']}): {c_mdata['begin_validity_at']}"))

-constants = pnccd_cal.ndarray_map(metadata=pnccd_metadata).get(karabo_da, {})
+metadata = pnccd_metadata[karabo_da]

 # Validate the constants availability and raise/warn correspondingly.
 missing_dark_constants = set(
-    c for c in ["OffsetCCD", "NoiseCCD", "BadPixelsDarkCCD"] if c not in constants.keys())
+    c for c in pnccd_cal.dark_calibrations if c not in metadata.keys())

 if missing_dark_constants:
    raise KeyError(
        f"Dark constants {missing_dark_constants} are not available for correction.")

-if corr_bools.get('relgain') and "RelativeGainCCD" not in constants.keys():
-    warning("RelativeGainEPix100 is not found in the calibration database.")
-    corr_bools['relgain'] = False
-step_timer.done_step("Constants retrieval")
-```
-
-%% Cell type:code id: tags:
-
-``` python
 # Record constant details in YAML metadata
 write_constants_fragment(
    out_folder=(metadata_folder or out_folder),
    det_metadata=pnccd_metadata,
    caldb_root=pnccd_cal.caldb_root,
-    )
+)
+
+# load constants arrays after storing fragment YAML file
+# and validating constants availability.
+constants = pnccd_cal.ndarray_map(metadata=pnccd_metadata).get(karabo_da, {})
+
+step_timer.done_step("Constants retrieval")
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = xana.heatmapPlot(constants["OffsetCCD"][:,:,0], x_label='Columns', y_label='Rows', lut_label='Offset (ADU)',
                       aspect=1,
                       x_range=(0, pixels_y), y_range=(0, pixels_x), vmax=16000,
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Dark Offset Map')

 fig = xana.heatmapPlot(constants["NoiseCCD"][:,:,0], x_label='Columns', y_label='Rows',
                       lut_label='Corrected Noise (ADU)',
                       aspect=1, x_range=(0, pixels_y), y_range=(0, pixels_x),
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Dark Noise Map')

 fig = xana.heatmapPlot(np.log2(constants["BadPixelsDarkCCD"][:,:,0]), x_label='Columns', y_label='Rows',
                       lut_label='Bad Pixel Value (ADU)',
                       aspect=1, x_range=(0, pixels_y), y_range=(0, pixels_x),
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Dark Bad Pixels Map')

 if corr_bools.get('relgain'):
    fig = xana.heatmapPlot(constants["RelativeGainCCD"], figsize=(8, 8), x_label='Columns', y_label='Rows',
                            lut_label='Relative Gain',
                            aspect=1, x_range=(0, pixels_y), y_range=(0, pixels_x), vmin=0.8, vmax=1.2,
                            panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                            panel_top_low_lim = 0.5, panel_top_high_lim = 1.5, panel_side_low_lim = 0.5,
                            panel_side_high_lim = 1.5,
                            title = f'Relative Gain Map for pnCCD (Gain = 1/{int(gain)})')
 ```

 %% Cell type:code id: tags:

 ``` python
 #************************ Calculators ************************#
 if corr_bools.get('common_mode'):
    # Common Mode Correction Calculator:
    cmCorrection = xcal.CommonModeCorrection([pixels_x, pixels_y],
                                             commonModeBlockSize,
                                             commonModeAxis,
                                             parallel=False, dType=np.float32, stride=1,
                                             noiseMap=constants["NoiseCCD"].astype(np.float32), minFrac=0.25)

 if corr_bools.get('pattern_class'):
    # Pattern Classifier Calculator:
    # Left Hemisphere:
    patternClassifierLH = xcal.PatternClassifier([pixels_x, pixels_y//2],
                                                 constants["NoiseCCD"][:, :pixels_y//2],
                                                 split_evt_primary_threshold,
                                                 split_evt_secondary_threshold,
                                                 split_evt_mip_threshold,
                                                 tagFirstSingles=3, # track along y-axis, left to right (see
                                                 nCells=1, # split_event.py file in pydetlib/lib/src/
                                                 allowElongated=False, # XFELDetAna/algorithms)
                                                 blockSize=[pixels_x, pixels_y//2],
                                                 parallel=False)

    # Right Hemisphere:
    patternClassifierRH = xcal.PatternClassifier([pixels_x, pixels_y//2],
                                                 constants["NoiseCCD"][:, pixels_y//2:],
                                                 split_evt_primary_threshold,
                                                 split_evt_secondary_threshold,
                                                 split_evt_mip_threshold,
                                                 tagFirstSingles=4, # track along y-axis, right to left
                                                 nCells=1,
                                                 allowElongated=False,
                                                 blockSize=[pixels_x, pixels_y//2],
                                                 parallel=False)

    patternClassifierLH._imagesPerChunk = 1
    patternClassifierRH._imagesPerChunk = 1

    patternClassifierLH._noisemap = constants["NoiseCCD"][:, :pixels_x//2]
    patternClassifierRH._noisemap = constants["NoiseCCD"][:, pixels_x//2:]
    # Setting bad pixels:
    patternClassifierLH.setBadPixelMask(constants["BadPixelsDarkCCD"][:, :pixels_x//2] != 0)
    patternClassifierRH.setBadPixelMask(constants["BadPixelsDarkCCD"][:, pixels_x//2:] != 0)
 ```

 %% Cell type:code id: tags:

 ``` python
 #***************** Histogram Calculators ******************#
 # Will contain uncorrected data:
 histCalRaw = xcal.HistogramCalculator(sensorSize,
                                      bins=bins,
                                      range=bin_range,
                                      nCells=1,
                                      parallel=False,
                                      blockSize=blockSize)
 # Will contain offset corrected data:
 histCalOffsetCor = xcal.HistogramCalculator(sensorSize,
                                            bins=bins,
                                            range=bin_range,
                                            nCells=1,
                                            parallel=False,
                                            blockSize=blockSize)
 if corr_bools.get('common_mode'):
    # Will contain common mode corrected data:
    histCalCommonModeCor = xcal.HistogramCalculator(sensorSize,
                                                    bins=bins,
                                                    range=bin_range,
                                                    nCells=1,
                                                    parallel=False,
                                                    blockSize=blockSize)
 if corr_bools.get('pattern_class'):
    # Will contain split events pattern data:
    histCalPcorr = xcal.HistogramCalculator(sensorSize,
                                            bins=bins,
                                            range=bin_range,
                                            nCells=1,
                                            parallel=False,
                                            blockSize=blockSize)
    # Will contain singles events data:
    histCalPcorrS = xcal.HistogramCalculator(sensorSize,
                                             bins=bins,
                                             range=bin_range,
                                             nCells=1,
                                             parallel=False,
                                             blockSize=blockSize)
 if corr_bools.get('relgain'):
    # Will contain gain corrected data:
    histCalGainCor = xcal.HistogramCalculator(sensorSize,
                                              bins=bins,
                                              range=bin_range,
                                              nCells=1,
                                              parallel=False,
                                              blockSize=blockSize)
 ```

 %% Cell type:markdown id: tags:

 ## Applying corrections to the raw data

 %% Cell type:code id: tags:

 ``` python
 def offset_correction(wid, index, d):
    """offset correction.
    Equating bad pixels' values to np.nan,
    so that the pattern classifier ignores them:
    """
    d = d.copy()

    # TODO: To clear this up. Is it on purpose to save corrected data with nans?
    d[bpix != 0] = np.nan
    d -= offset  # offset correction

    # TODO: to clear this up. why save the badpixels map in the corrected data?
    bpix_data[index, ...] = bpix
    data[index, ...] = d

 def common_mode(wid, index, d):
    """common-mode correction.
    Discarding events caused by saturated pixels:
    """
    d = np.squeeze(cmCorrection.correct(d, cellTable=np.zeros(pixels_y, np.int32)))
    # we equate these values to np.nan so that the pattern classifier ignores them:
    d[d >= saturated_threshold] = np.nan
    data[index, ...] = d


 def gain_correction(wid, index, d):
    """relative gain correction."""
    d /= relativegain
    data[index, ...] = d


 def pattern_classification_correction(wid, index, d):
    """pattern classification correction.
    data set to save split event corrected images

    The calculation of the cluster map:]
    Dividing the data into left and right hemispheres:
    """

    # pattern classification on corrected data
    dataLH, patternsLH = patternClassifierLH.classify(d[:, :pixels_x//2])
    dataRH, patternsRH = patternClassifierRH.classify(d[:, pixels_x//2:])

    d[:, :pixels_x//2] = np.squeeze(dataLH)
    d[:, pixels_x//2:] = np.squeeze(dataRH)

    patterns = np.zeros(d.shape, patternsLH.dtype)
    patterns[:, :pixels_x//2] = np.squeeze(patternsLH)
    patterns[:, pixels_x//2:] = np.squeeze(patternsRH)
    d[d < split_evt_primary_threshold*noise] = 0
    data[index, ...] = d
    ptrn_data[index, ...] = patterns
    d[patterns != 100] = np.nan  # Discard doubles, triples, quadruple, clusters, first singles
    filtered_data[index, ...] = d
 ```

 %% Cell type:code id: tags:

 ``` python
 # 10 is a number chosen after testing 1 ... 71 parallel threads for a node with 72 cpus.
 parallel_num_threads = 10
 context = psh.context.ThreadContext(num_workers=parallel_num_threads)

 data_path = "INSTRUMENT/"+instrument_src+"/data/"

 offset = np.squeeze(constants["OffsetCCD"])
 noise = np.squeeze(constants["NoiseCCD"])
 bpix = np.squeeze(constants["BadPixelsDarkCCD"])
 relativegain = constants.get("RelativeGainCCD")
 ```

 %% Cell type:code id: tags:

 ``` python
 def write_datasets(seq_dc, corr_arrays, out_file, instrument_src):
    """
    Creating datasets first then adding data.
    To have metadata together available at the start of the file,
    so it's quick to see what the file contains.
    """
    # Create CORR files and add corrected data sections.
    image_counts = seq_dc[instrument_src, "data.image"].data_counts(labelled=False)
    dataset_chunk = ((chunk_size_idim,) + corr_arrays["pixels"].shape[1:])  # e.g. (1, pixels_x, pixels_y)
    with DataFile(out_file, 'w') as ofile:

        # Create INDEX datasets.
        ofile.create_index(seq_dc.train_ids, from_file=seq_dc.files[0])
        # Create METDATA datasets
        ofile.create_metadata(
            like=seq_dc,
            sequence=seq_dc.run_metadata()["sequenceNumber"],
            instrument_channels=(f"{instrument_src}/data",)
        )

        # Create Instrument section to later add corrected datasets.
        outp_source = ofile.create_instrument_source(instrument_src)

        # Create count/first datasets at INDEX source.
        outp_source.create_index(data=image_counts)

        # Store uncorrected trainId in the corrected file.
        outp_source.create_key(
            f"data.trainId", data=seq_dc.train_ids,
            chunks=min(50, len(seq_dc.train_ids))
        )

        # TODO: gain dataset is just the RelativeGain constant
        # and it doesn't make sense to write it into corrected data.
        comp_fields = ["gain", "patterns", "pixels_classified"]

        # TODO: to clear this up: why save corrected data
        # in data/pixels rather than data/image.
        for field, data in corr_arrays.items():
            if field in comp_fields:  # Write compressed corrected data.
                outp_source.create_compressed_key(f"data.{field}", data=data)
            else:
                outp_source.create_key(
                    f"data.{field}", data=data,
                    chunks=dataset_chunk
                )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Data corrections and event classifications happen here.
 # Also, the corrected data are written to datasets:
 empty_seq = 0
 for seq_n, seq_f in enumerate(seq_files):
    seq_dc = H5File(seq_f)
    out_file = f"{out_folder}/{seq_f.name}".replace("RAW", "CORR")

    step_timer.start()

    img_dc = seq_dc[instrument_src, "data.image"]

    dshape = seq_dc[instrument_src, "data.image"].shape

    n_trains = dshape[0]
    corr_ntrains = dshape[0]  # number of available trains to correct.
    all_train_ids = img_dc.train_ids    # All trains including trains with no data.

    # Raise a WARNING if this sequence has no trains to correct.
    # Otherwise, print number of trains with no data.
    if corr_ntrains == 0:
        warning(f"No trains to correct for {seq_f.name}: "
                "Skipping the processing of this file.")
        empty_seq += 1
        continue
    elif len(all_train_ids) != corr_ntrains:
        print(
            f"{seq_f.name} has {len(all_train_ids) - corr_ntrains} "
            "trains with missing data."
        )

    # If you want to analyze only a certain number of frames
    # instead of all available good frames.

    if limit_trains > 0:
        print(f"\nCorrected trains are limited to: {limit_trains} trains")
        corr_ntrains = min(corr_ntrains, limit_trains)
    data_shape = (corr_ntrains, dshape[1], dshape[2])

    print(f"Correcting file {seq_f} of {corr_ntrains} trains.")

    # Overwrite seq_dc after eliminating empty trains or/and applying limited images.
    seq_dc = seq_dc.select(
        instrument_src, "*", require_all=True).select_trains(np.s_[:corr_ntrains])

    raw_data = seq_dc[instrument_src, "data.image"].ndarray().astype(np.float32)
    to_store_arrays = {"image": raw_data}

    # TODO: move the parts for reading data to plot to later cells.
    if seq_n == 0:
        raw_plt = raw_data.copy()  # plot first sequence only

    step_timer.start()

    # Allocating shared arrays for data arrays for each correction stage.
    data = context.alloc(shape=data_shape, dtype=np.float32)
    bpix_data = context.alloc(shape=data_shape, dtype=np.uint32)
    histCalRaw.fill(raw_data)  # filling histogram with raw uncorrected data

    # Applying offset correction
    context.map(offset_correction, raw_data)
    histCalOffsetCor.fill(data)  # filling histogram with offset corrected data

    if seq_n == 0:
        off_data = data.copy()  # plot first sequence only

    to_store_arrays["pixels"] = data.copy()
    to_store_arrays["mask"] = bpix_data

    step_timer.done_step(f'offset correction.')

    if corr_bools.get('common_mode'):
        step_timer.start()

        # Applying common mode correction
        context.map(common_mode, data)
        if seq_n == 0:
            cm_data = data.copy()  # plot first sequence only
        to_store_arrays["pixels_cm"] = data.copy()
        histCalCommonModeCor.fill(data)  # filling histogram with common mode corrected data

        step_timer.done_step(f'common-mode correction.')

    if corr_bools.get('relgain'):

        step_timer.start()

        # Applying gain correction
        context.map(gain_correction, data)
        if seq_n == 0:
            rg_data = data.copy()  # plot first sequence only
        # TODO: Why storing a repeated constant for each image in corrected files.
        to_store_arrays["gain"] = np.repeat(relativegain[np.newaxis, ...], corr_ntrains, axis=0).astype(np.float32)  # noqa
        histCalGainCor.fill(data)  # filling histogram with gain corrected data
        step_timer.done_step(f'gain correction.')

    if corr_bools.get('pattern_class'):
        step_timer.start()

        ptrn_data = context.alloc(shape=data_shape, dtype=np.int32)
        filtered_data = context.alloc(shape=data_shape, dtype=np.int32)
        # Applying pattern classification correction
        # Even though data is indeed of dtype np.float32,
        # not specifying this again screw with the data quality.
        context.map(pattern_classification_correction, data.astype(np.float32))

        if seq_n == 0:
            cls_data = data.copy()  # plot first sequence only
        # split event corrected images plotted for first sequence only
        # (also these events are only singles events):
        to_store_arrays["pixels_classified"] = data.copy()
        to_store_arrays["patterns"] = ptrn_data

        histCalPcorr.fill(data)  # filling histogram with split events corrected data
        # filling histogram with corr data after discarding doubles, triples, quadruple, clusters, and first singles
        histCalPcorrS.fill(filtered_data)
        step_timer.done_step(f'pattern classification correction.')

    step_timer.start()

    # Storing corrected data sources.
    write_datasets(
        seq_dc=seq_dc,
        corr_arrays=to_store_arrays,
        out_file=out_file,
        instrument_src=instrument_src,
    )
    step_timer.done_step(f'Storing data.')
 # Exit and raise warning if there are no data to correct for all sequences.
 if empty_seq == len(seq_files):
    warning("No valid trains for RAW data to correct.")
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 print("In addition to offset correction, the following corrections were performed:")
 for k, v in corr_bools.items():
    if v:
        print("  -", k.upper())

 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 print("In addition to offset correction, the following corrections were performed:")
 for k, v in corr_bools.items():
    if v:
        print("  -", k.upper())

 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Histograming the resulting spectra:

 # First _ refers to the bin edges and second _ refers to statistics and we ignore both.

 # if you use histCalRaw.get(cumulatative = True) and so on, the cumulatative = True turns the counts array such as
 # RawHistVals and so on into a 1D array instead of keeping the original shape:

 RawHistVals, _, RawHistMids, _ = histCalRaw.get()
 off_cor_HistVals, _, off_cor_HistMids, _ = histCalOffsetCor.get()
 if corr_bools.get('common_mode'):
    cm_cor_HistVals, _, cm_HistMids, _ = histCalCommonModeCor.get()
 if corr_bools.get('relgain'):
    gain_cor_HistVals, _, gain_cor_HistMids, _ = histCalGainCor.get()
 if corr_bools.get('pattern_class'):
    split_HistVals, _, split_HistMids, _ = histCalPcorr.get() # split events corrected
    singles_HistVals, _, singles_HistMids, _ = histCalPcorrS.get() # last s in variable names: singles events
 ```

 %% Cell type:code id: tags:

 ``` python
 # Saving intermediate data to disk:
 step_timer.start()
 np.savez(os.path.join(out_folder, 'Raw_Events.npz'), RawHistMids, RawHistVals)
 np.savez(os.path.join(out_folder, 'Offset_Corrected_Events.npz'), off_cor_HistMids, off_cor_HistVals)
 if corr_bools.get('common_mode'):
    np.savez(os.path.join(out_folder, 'Common_Mode_Corrected_Events.npz'), cm_HistMids, cm_cor_HistVals)
 if corr_bools.get('relgain'):
    np.savez(os.path.join(out_folder, 'Gain_Corrected_Events.npz'), gain_cor_HistMids, gain_cor_HistVals)
 if corr_bools.get('pattern_class'):
    np.savez(os.path.join(out_folder, 'Split_Events_Corrected_Events.npz'), split_HistMids, split_HistVals)
    np.savez(os.path.join(out_folder, 'Singles_Events.npz'), singles_HistMids, singles_HistVals)
 step_timer.done_step(f'Saving intermediate data to disk.')
 print("Various spectra are saved to disk in the form of histograms. Please check {}".format(out_folder))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Raw vs. Corrected Spectra'))
 step_timer.start()

 figure = [{'x': RawHistMids,
      'y': RawHistVals,
      'y_err': np.sqrt(RawHistVals[:]),
      'drawstyle': 'steps-post',
      'errorstyle': 'bars',
      'errorcoarsing': 2,
      'label': 'Uncorrected'
     },
     {'x': off_cor_HistMids,
      'y': off_cor_HistVals,
      'y_err': np.sqrt(off_cor_HistVals[:]),
      'drawstyle': 'steps-post',
      'errorstyle': 'bars',
      'errorcoarsing': 2,
      'label': 'Offset Corrected'
     }]

 if corr_bools.get('common_mode'):
    figure.append({'x': cm_HistMids,
                   'y': cm_cor_HistVals,
                   'y_err': np.sqrt(cm_cor_HistVals[:]),
                   'drawstyle': 'steps-post',
                   'errorstyle': 'bars',
                   'errorcoarsing': 2,
                   'label': 'Common Mode Corrected'})

 if corr_bools.get('relgain'):
    xrange = bin_range
    figure.append({'x': gain_cor_HistMids,
                   'y': gain_cor_HistVals,
                   'y_err': np.sqrt(gain_cor_HistVals[:]),
                   'drawstyle': 'steps-post',
                   'errorstyle': 'bars',
                   'errorcoarsing': 2,
                   'label': 'Gain Corrected'})

 if corr_bools.get('pattern_class'):
    figure.extend([{'x': split_HistMids,
                   'y': split_HistVals,
                   'y_err': np.sqrt(split_HistVals[:]),
                   'drawstyle': 'steps-post',
                   'errorstyle': 'bars',
                   'errorcoarsing': 2,
                   'label': 'Split Events Corrected'
                  },
                  {'x': singles_HistMids,
                   'y': singles_HistVals,
                   'y_err': np.sqrt(singles_HistVals[:]),
                   'drawstyle': 'steps-post',
                   'errorstyle': 'bars',
                   'errorcoarsing': 2,
                   'label': 'Singles Events'
                  }])
 fig = xana.simplePlot(figure, aspect=1, x_label='ADU', y_label='Number of Occurrences', figsize='2col',
                      y_log=True, x_range=bin_range, title = '1 ADU per bin is used.',
                      legend='top-right-frame-1col')
 step_timer.done_step('Plotting')
 ```

 %% Cell type:code id: tags:

 ``` python
 # This function plots pattern statistics:

 def classification_plot(patternStats, hemisphere):

    print("****************** {} HEMISPHERE ******************\n"
          .format(hemisphere))
    fig = plt.figure(figsize=(15, 15))
    ax = fig.add_subplot(4, 4, 1)
    sfields = ["singles", "first singles", "clusters"]
    mfields = ["doubles", "triples", "quads"]
    relativeOccurances = []
    labels = []
    for i, f in enumerate(sfields):
        relativeOccurances.append(patternStats[f])
        labels.append(f)
    for i, f in enumerate(mfields):
        for k in range(len(patternStats[f])):
            relativeOccurances.append(patternStats[f][k])
            labels.append("{}({})".format(f, k))
    relativeOccurances = np.array(relativeOccurances, np.float)
    relativeOccurances /= np.sum(relativeOccurances)
    pie = ax.pie(relativeOccurances, labels=labels, autopct='%1.1f%%', shadow=True)
    ax.set_title("Pattern Occurrence")
    # Set aspect ratio to be equal so that pie is drawn as a circle.
    a = ax.axis('equal')

    smaps = ["singlemap", "firstsinglemap", "clustermap"]
    for i, m in enumerate(smaps):
        ax = fig.add_subplot(4, 4, 2+i)
        pmap = ax.imshow(patternStats[m], interpolation="nearest", vmax=2*np.nanmedian(patternStats[m]))
        ax.set_title(m)
        cb = fig.colorbar(pmap)

    mmaps = ["doublemap", "triplemap", "quadmap"]
    k = 0
    for i, m in enumerate(mmaps):
        for j in range(4):
            ax = fig.add_subplot(4, 4, 2+len(smaps)+k)
            pmap = ax.imshow(patternStats[m][j], interpolation="nearest", vmax=2*np.median(patternStats[m][j]))
            ax.set_title("{}({})".format(m,j))
            cb = fig.colorbar(pmap)
            k+=1
 ```

 %% Cell type:code id: tags:

 ``` python
 # The next two cells plot the classification results for left and right hemispheres, respectively:
 display(Markdown('### Classification Results - Plots'))
 if corr_bools.get('pattern_class'):
    patternStatsLH = patternClassifierLH.getPatternStats()
    classification_plot(patternStatsLH, 'Left')
 ```

 %% Cell type:code id: tags:

 ``` python
 if corr_bools.get('pattern_class'):
    patternStatsRH = patternClassifierRH.getPatternStats()
    classification_plot(patternStatsRH, 'Right')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Classification Results - Tabulated Statistics'))

 if corr_bools.get('pattern_class'):
    step_timer.start()
    t0 = PrettyTable()
    t0.title = "Total Number of Counts after All Corrections"
    t0.field_names = ["Hemisphere", "Singles", "First-Singles", "Clusters"]
    t0.add_row(["LH", patternStatsLH['singles'], patternStatsLH['first singles'], patternStatsLH['clusters']])
    t0.add_row(["RH", patternStatsRH['singles'], patternStatsRH['first singles'], patternStatsRH['clusters']])
    print(t0)

    print("Abbreviations: D (Doubles), T (Triples), Q (Quadruples), L (Left), R (Right), and H (Hemisphere).")
    t1 = PrettyTable()
    t1.field_names = ["Index", "D-LH", "D-RH", "T-LH", "T-RH", "Q-LH", "Q-RH"]
    t1.add_row([0, patternStatsLH['doubles'][0], patternStatsRH['doubles'][0], patternStatsLH['triples'][0],
                patternStatsRH['triples'][0], patternStatsLH['quads'][0], patternStatsRH['quads'][0]])
    t1.add_row([1, patternStatsLH['doubles'][1], patternStatsRH['doubles'][1], patternStatsLH['triples'][1],
                patternStatsRH['triples'][1], patternStatsLH['quads'][1], patternStatsRH['quads'][1]])
    t1.add_row([2, patternStatsLH['doubles'][2], patternStatsRH['doubles'][2], patternStatsLH['triples'][2],
                patternStatsRH['triples'][2], patternStatsLH['quads'][2], patternStatsRH['quads'][2]])
    t1.add_row([3, patternStatsLH['doubles'][3], patternStatsRH['doubles'][3], patternStatsLH['triples'][3],
                patternStatsRH['triples'][3], patternStatsLH['quads'][3], patternStatsRH['quads'][3]])
    print(t1)
    step_timer.done_step('Classification Results - Tabulated Statistics')
 ```

 %% Cell type:code id: tags:

 ``` python
 if corr_bools.get('pattern_class'):
    doublesLH = patternStatsLH['doubles'][0] + patternStatsLH['doubles'][1] + patternStatsLH['doubles'][2] + \
    patternStatsLH['doubles'][3]
    triplesLH = patternStatsLH['triples'][0] + patternStatsLH['triples'][1] + patternStatsLH['triples'][2] + \
    patternStatsLH['triples'][3]
    quadsLH = patternStatsLH['quads'][0] + patternStatsLH['quads'][1] + patternStatsLH['quads'][2] + \
    patternStatsLH['quads'][3]
    allsinglesLH = patternStatsLH['singles'] + patternStatsLH['first singles']
    eventsLH = allsinglesLH + doublesLH + triplesLH + quadsLH

    doublesRH = patternStatsRH['doubles'][0] + patternStatsRH['doubles'][1] + patternStatsRH['doubles'][2] + \
    patternStatsRH['doubles'][3]
    triplesRH = patternStatsRH['triples'][0] + patternStatsRH['triples'][1] + patternStatsRH['triples'][2] + \
    patternStatsRH['triples'][3]
    quadsRH = patternStatsRH['quads'][0] + patternStatsRH['quads'][1] + patternStatsRH['quads'][2] + \
    patternStatsRH['quads'][3]
    allsinglesRH = patternStatsRH['singles'] + patternStatsRH['first singles']
    eventsRH = allsinglesRH + doublesRH + triplesRH + quadsRH

    if eventsLH > 0.:
        reloccurLH = np.array([allsinglesLH/eventsLH, doublesLH/eventsLH, triplesLH/eventsLH, quadsLH/eventsLH])
    else:
        reloccurLH = np.array([0]*4)
    if eventsRH > 0.:
        reloccurRH = np.array([allsinglesRH/eventsRH, doublesRH/eventsRH, triplesRH/eventsRH, quadsRH/eventsRH])
    else:
        reloccurRH = np.array([0]*4)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Classification Results - Pie Charts'))

 if corr_bools.get('pattern_class'):
    step_timer.start()
    fig = plt.figure(figsize=(12, 7))
    ax = fig.add_subplot(1, 2, 1)
    labels = ['Singles', 'Doubles', 'Triples', 'Quads']
    pie = ax.pie(reloccurLH, labels=labels, autopct='%1.1f%%', shadow=True)
    ax.set_title("Pattern Occurrence in LH")
    # Set aspect ratio to be equal so that pie is drawn as a circle.
    a = ax.axis('equal')
    ax = fig.add_subplot(1, 2, 2)
    pie = ax.pie(reloccurRH, labels=labels, autopct='%1.1f%%', shadow=True)
    ax.set_title("Pattern Occurrence in RH")
    # Set aspect ratio to be equal so that pie is drawn as a circle.
    a = ax.axis('equal')
    step_timer.done_step('Classification Results - Pie Charts')
 ```

 %% Cell type:markdown id: tags:

 ### Various Images Averaged Over All Frames of Only the First Sequence ###

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 uncor_mean_im = np.nanmean(raw_data, axis=0)
 offset_mean_im = np.nanmean(off_data, axis=0)

 if corr_bools.get('common_mode'):
    cm_mean_im = np.nanmean(cm_data, axis=0)
 if corr_bools.get('relgain'):
    gain_mean_im = np.nanmean(rg_data, axis=0)
 if corr_bools.get('pattern_class'):
    mean_im_cc = np.nanmean(cls_data, axis=0)

 fig = xana.heatmapPlot(uncor_mean_im, x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                       x_range=(0, pixels_y), y_range=(0, pixels_x),
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Uncorrected Image Averaged over Frames in the First Sequence')

 fig = xana.heatmapPlot(offset_mean_im, x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                       x_range=(0, pixels_y), y_range=(0, pixels_x),
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Offset Corrected Image Averaged over Frames in the First Sequence')

 if corr_bools.get('common_mode'):
    fig = xana.heatmapPlot(cm_mean_im, x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                           x_range=(0, pixels_y), y_range=(0, pixels_x),
                           panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                           title = 'Common Mode Corrected Image Averaged over Frames in the First Sequence')

 if corr_bools.get('relgain'):
    fig = xana.heatmapPlot(gain_mean_im, x_label='Columns', y_label='Rows',
                           lut_label='Signal (ADU)', aspect=1,
                           x_range=(0, pixels_y), y_range=(0, pixels_x),
                           panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                           title = 'Gain Corrected Image Averaged over Frames in the First Sequence')

 if corr_bools.get('pattern_class'):
    fig = xana.heatmapPlot(mean_im_cc, x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                           x_range=(0, pixels_y), y_range=(0, pixels_x), vmin=0, vmax= 18000,
                           title = 'Image of Single Events Averaged over Frames in the First Sequence')
 step_timer.done_step("Plotting")
 ```

 %% Cell type:markdown id: tags:

 ### Images of the First Frame of the First Sequence ###

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 fig = xana.heatmapPlot(raw_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                       x_range=(0, pixels_y), y_range=(0, pixels_x),
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Uncorrected Image (First Frame of the First Sequence)')

 fig = xana.heatmapPlot(off_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                       x_range=(0, pixels_y), y_range=(0, pixels_x),
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Offset Corrected Image (First Frame of the First Sequence)')

 if corr_bools.get('common_mode'):
        fig = xana.heatmapPlot(cm_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)',
                               aspect=1,
                               x_range=(0, pixels_y), y_range=(0, pixels_x),
                               panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                               title = 'Common Mode Corrected Image (First Frame of the First Sequence)')

 if corr_bools.get('relgain'):
        fig = xana.heatmapPlot(rg_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)',
                               aspect=1,
                               x_range=(0, pixels_y), y_range=(0, pixels_x),
                               panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                               title = 'Gain Corrected Image (First Frame of the First Sequence)')

 if corr_bools.get('pattern_class'):
    fig = xana.heatmapPlot(cls_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                           x_range=(0, pixels_y), y_range=(0, pixels_x),
                           panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                           title = 'Image of Single Events (First Frame of the First Sequence)')
 step_timer.done_step("Plotting")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Resetting the histogram calculators:
 histCalRaw.reset()
 histCalOffsetCor.reset()
 if corr_bools.get('common_mode'):
    histCalCommonModeCor.reset()
 if corr_bools.get('relgain'):
    histCalGainCor.reset()
 if corr_bools.get('pattern_class'):
    histCalPcorr.reset()
    histCalPcorrS.reset()
 ```

 %% Cell type:markdown id: tags:

 Next, the corrected event patterns are read from the patterns/dataset created previously and are separated into 4 different categories (singles, doubles, triples and quadruples) using the pattern indices. However, this is done only for one sequence, corresponding to the seq_num variable, as an example.

 Note that the number of bins and the bin range for the following histograms may be different from those presented above (depending on gain) to make the counts more noticible and the peaks more defined.

 If you are interested in plotting the events from all sequences or the spectra of half of the sensor, execute the spectra_pnCCD_NBC.ipynb notebook.

 %% Cell type:code id: tags:

 ``` python
 if corr_bools.get('pattern_class'):
    singles = []
    doubles = []
    triples = []
    quads =  []
    with H5File(f"{out_folder}/{seq_files[0].name.replace('RAW', 'CORR')}") as dc:  # noqa
        data = dc[instrument_src, "data.pixels_classified"].ndarray()
        patterns = dc[instrument_src, "data.patterns"].ndarray()
    # events' patterns indices are as follows: 100 (singles), 101 (first singles), 200 - 203 (doubles),
    # 300 - 303 (triples), and 400 - 403 (quadruples). Note that for the last three types of patterns,
    # there are left, right, up, and down indices.

    # Separating the events:
    # Singles and First Singles:
    for s in range(100, 102):
        single = data.copy()
        single[patterns != s] = np.nan
        singles.append(single)


    for d in range(200, 204):
        double = data.copy()
        double[patterns != d] = np.nan
        doubles.append(double)

    for t in range(300, 304):
        triple = data.copy()
        triple[patterns != t] = np.nan
        triples.append(triple)

    for q in range(400, 404):
        quad = data.copy()
        quad[patterns != q] = np.nan
        quads.append(quad)
 ```

 %% Cell type:code id: tags:

 ``` python
 if corr_bools.get('pattern_class'):
    step_timer.start()
    hA = 0
    h = 0
    for single in singles:
        hs, e = np.histogram(single.flatten(), bins=event_bins, range=b_range) # h: histogram counts, e: bin edges
        h += hs
        hA += hs # hA: counts all events (see below)

    # bin edges array has one extra element => need to plot from 0 to the one before the last element to have the
    # same size as h-array => in what follows, we use e[:-1] (-1 means one before the last element)

    display(Markdown('### Histograms of Corrected Events for One Sequence Only'))
    fig = plt.figure(figsize=(10, 7))
    ax = fig.add_subplot(111)
    ax.step(e[:-1], h, color='blue', label='Events Involving Single Pixels Only')
    ax.semilogy() # y-axis is log, x-axis is linear
    ax.set_xlabel("Energy (ADU) [{} bins per {} ADU]".format(event_bins, b_range[1]-b_range[0]))
    ax.set_ylabel("Corrected Events for One Sequence (counts)")
    ax.set_xlim(b_range)

    h = 0
    for double in doubles:
        hd, e = np.histogram(double.flatten(), bins=event_bins, range=b_range)
        h += hd
        hA += hd

    ax.step(e[:-1], h, color='red', label='Events Splitting on Double Pixels')

    h = 0
    for triple in triples:
        ht, e = np.histogram(triple.flatten(), bins=event_bins, range=b_range)
        h += ht
        hA += ht

    ax.step(e[:-1], h, color='green', label='Events Splitting on Triple Pixels')

    h = 0
    for quad in quads:
        hq, e = np.histogram(quad.flatten(), bins=event_bins, range=b_range)
        h += hq
        hA += hq

    ax.step(e[:-1], h, color='purple', label='Events Splitting on Quadruple Pixels')

    ax.step(e[:-1], hA, color='grey', label='All Valid Events')
    l = ax.legend()
    step_timer.done_step("Plotting")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:markdown id: tags:

 # pnCCD Data Correction #

 Authors: DET Group, Modified by Kiana Setoodehnia - Version 5.0

 The following notebook provides offset, common mode, relative gain, split events and pattern classification corrections of images acquired with the pnCCD. This notebook *does not* yet correct for charge transfer inefficiency.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SQS/202031/p900166/raw"  # input folder
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/remove/pnccd_correct"  # output folder
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run = 347  # which run to read data from
 sequences = [-1]  # sequences to correct, set to -1 for all, range allowed
 sequences_per_node = 1  # number of sequences running on the same slurm node.

 karabo_da = 'PNCCD01' # data aggregators
 karabo_id = "SQS_NQS_PNCCD1MP" # karabo prefix of PNCCD devices
 receiver_id = "PNCCD_FMT-0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5'  # the template to use to access data
 instrument_source_template = '{}/CAL/{}:output'  # template for data source name, will be filled with karabo_id and receiver_id.

 # Parameters affecting data correction.
 commonModeAxis = 0 # axis along which common mode will be calculated, 0 = row, and 1 = column
 commonModeBlockSize = [512, 512] # size of the detector in pixels for common mode calculations
 split_evt_primary_threshold = 4. # primary threshold for split event classification in terms of n sigma noise
 split_evt_secondary_threshold = 3. # secondary threshold for split event classification in terms of n sigma noise
 saturated_threshold = 32000.  # full well capacity in ADU

 # Conditions for retrieving calibration constants
 fix_temperature_top = 0. # fix temperature for top sensor in K, set to 0. to use value from slow data.
 fix_temperature_bot = 0. # fix temperature for bottom senspr in K, set to 0. to use value from slow data.
 gain = -1  # the detector's gain setting. Set to -1 to use the value from the slow data.
 bias_voltage = 0. # the detector's bias voltage. set to 0. to use value from slow data.
 integration_time = 70  # detector's integration time
 photon_energy = 1.6 # Al fluorescence in keV

 # Parameters for the calibration database.
 cal_db_interface = "tcp://max-exfl016:8015" # calibration DB interface to use
 cal_db_timeout = 300000 # timeout on caldb requests
 creation_time = ""  # The timestamp to use with Calibration DB. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41
+remove_bias_voltage_if_zero = True  # This flag enables removing bias voltage from the conditions if a 0 value is read from RAW data. This is useful when the corresponding constants for old RAW had no bias voltage because of a mistake in control data. e.g. p002857

 # Booleans for selecting corrections to apply.
 only_offset = False # Only, apply offset.
 common_mode = True # Apply common mode correction
 relgain = True # Apply relative gain correction
 pattern_classification = True  # classify split events

 # parameters affecting stored output data.
 chunk_size_idim = 1  # H5 chunking size of output data
 limit_trains = 0  # this parameter is used for limiting number of images to correct from a sequence file.


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Here the herarichy and dependability for correction booleans are defined
 corr_bools = {}

 corr_bools["only_offset"] = only_offset

 # Apply offset only.
 if not only_offset:
    corr_bools["relgain"] = relgain
    corr_bools["common_mode"] = common_mode
    corr_bools["pattern_class"] = pattern_classification
 ```

 %% Cell type:code id: tags:

 ``` python
 import os
 import sys
 import warnings
 from logging import warning
 from pathlib import Path
 warnings.filterwarnings('ignore')

-import h5py
 import matplotlib.pyplot as plt
 import numpy as np
 import pasha as psh
 from IPython.display import Markdown, display
 from extra_data import H5File, RunDirectory
 from prettytable import PrettyTable

 %matplotlib inline

 import cal_tools.restful_config as rest_cfg
 from XFELDetAna import xfelpyanatools as xana
 from XFELDetAna import xfelpycaltools as xcal
 from cal_tools import pnccdlib
 from cal_tools.files import DataFile
 from cal_tools.calcat_interface import CalCatError, PNCCD_CalibrationData

 from cal_tools.tools import (
    calcat_creation_time,
    write_constants_fragment,
 )
 from cal_tools.step_timing import StepTimer
 ```

 %% Cell type:code id: tags:

 ``` python
 # Calibration Database Settings, and Some Initial Run Parameters & Paths:
 display(Markdown('### Initial Settings and Paths'))

 # Sensor size and block size definitions (important for common mode and other corrections):
 pixels_x = 1024  # rows of pnCCD in pixels
 pixels_y = 1024  # columns of pnCCD in pixels
 in_folder = Path(in_folder)
 sensorSize = [pixels_x, pixels_y]
 # For xcal.HistogramCalculators.
 blockSize = [pixels_x//2, pixels_y//2]  # sensor area will be analysed according to blockSize.

 print(f"pnCCD size is: {pixels_x}x{pixels_y} pixels.")
 print(f'Calibration database interface selected: {cal_db_interface}')

 # Paths to the data:
 instrument_src = instrument_source_template.format(karabo_id, receiver_id)
 print(f"Instrument H5File source: {instrument_src}\n")

 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f"Creation time: {creation_time}")
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id: tags:

 ``` python
 run_dc = RunDirectory(in_folder / f"r{run:04d}", _use_voview=False)

 # Output Folder Creation:
 os.makedirs(out_folder, exist_ok=True)

 # extract control data
 step_timer.start()
-
 ctrl_data = pnccdlib.PnccdCtrl(run_dc, karabo_id)
 if bias_voltage == 0.:
    bias_voltage = ctrl_data.get_bias_voltage()
 if gain == -1:
    gain = ctrl_data.get_gain()
 if fix_temperature_top == 0:
    fix_temperature_top = ctrl_data.get_fix_temperature_top()
 if fix_temperature_bot == 0:
    fix_temperature_bot = ctrl_data.get_fix_temperature_bot()

 step_timer.done_step("Reading control parameters.")

 # Printing the Parameters Read from the Data File:
 display(Markdown('### Detector Parameters'))
 print(f"Bias voltage is {bias_voltage:0.1f} V.")
 print(f"Detector gain is set to 1/{int(gain)}.")
 print(f"Detector integration time is set to {integration_time} ms")
 print(f"Top pnCCD sensor is at temperature of {fix_temperature_top:0.2f} K")
 print(f"Bottom pnCCD sensor is at temperature of {fix_temperature_bot:0.2f} K")
 ```

 %% Cell type:code id: tags:

 ``` python
 seq_files = []
 for f in run_dc.select(instrument_src).files:
    fpath = Path(f.filename)
    if fpath.match(f"*{karabo_da}*.h5"):
        seq_files.append(fpath)
 if sequences != [-1]:
    seq_files = sorted([f for f in seq_files if any(f.match(f"*-S{s:05d}.h5") for s in sequences)])
 print(f"Processing a total of {len(seq_files)} sequence files:")
 print(*seq_files, sep='\n')
 ```

 %% Cell type:code id: tags:

 ``` python
 gain_k = [k for k, v in pnccdlib.VALID_GAINS.items() if v == gain][0]
 if gain_k == 'a':
    split_evt_mip_threshold = 1000. # MIP threshold in ADU for event classification (10 times average noise)

    # Each xcal.HistogramCalculator requires a total number of bins and a binning range. We define these
    # using a dictionary:

    # For all xcal histograms:

    Hist_Bin_Dict = {
        "bins": 35000, # number of bins
        "bin_range": [0, 35000]
    }

    # For the numpy histograms on the last cell of the notebook:
    Event_Bin_Dict = {
        "event_bins": 1000, # number of bins
        "b_range": [0, 35000] # bin range
    }
 elif gain_k == 'b':
    split_evt_mip_threshold = 270. # 10 times the average noise
    Hist_Bin_Dict = {
        "bins": 10000,
        "bin_range": [0, 10000]
    }
    Event_Bin_Dict = {
        "event_bins": 1000,
        "b_range": [0, 10000]
    }
 elif gain_k == 'c':
    split_evt_mip_threshold = 110. # 10 times the average noise
    Hist_Bin_Dict = {
        "bins": 3000,
        "bin_range": [0, 3000]
    }
    Event_Bin_Dict = {
        "event_bins": 1000,
        "b_range": [0, 3000]
    }
 elif gain_k == 'd':
    split_evt_mip_threshold = 90. # 10 times the average noise
    Hist_Bin_Dict = {
        "bins": 1000,
        "bin_range": [0, 1000]
    }
    Event_Bin_Dict = {
        "event_bins": 1000,
        "b_range": [0, 1000]
    }
 elif gain_k == 'e':
    split_evt_mip_threshold = 90. # 10 times the average noise
    Hist_Bin_Dict = {
        "bins": 500,
        "bin_range": [0, 500]
    }
    Event_Bin_Dict = {
        "event_bins": 500,
        "b_range": [0, 500]
    }
 else:
    split_evt_mip_threshold = 90. # 10 times the average noise
    Hist_Bin_Dict = {
        "bins": 220,
        "bin_range": [0, 220]
    }
    Event_Bin_Dict = {
        "event_bins": 220,
        "b_range": [0, 220]
    }

 bins = Hist_Bin_Dict["bins"]
 bin_range = Hist_Bin_Dict["bin_range"]
 event_bins = Event_Bin_Dict["event_bins"]
 b_range = Event_Bin_Dict["b_range"]
 ```

 %% Cell type:markdown id: tags:

 As a first step, calibration constants have to be retrieved from the calibration database

 %% Cell type:code id: tags:

 ``` python
 display(Markdown("### Constants retrieval"))
 step_timer.start()
-constant_names = ["OffsetCCD", "NoiseCCD", "BadPixelsDarkCCD"]
-if relgain:
-    constant_names += ["RelativeGainCCD"]
+
+# In the case of an older proposal (e.g., proposal 002857),
+# it is possible that the bias voltage was 0
+# resulting in the absence of bias voltage values in
+# the previously injected dark constants. This situation can be
+# attributed to a feature that is currently not available in iCalibrationDB.
+if bias_voltage == 0 and remove_bias_voltage_if_zero:
+    bias_voltage = None

 pnccd_cal = PNCCD_CalibrationData(
    detector_name=karabo_id,
    sensor_bias_voltage=bias_voltage,
    integration_time=integration_time,
    sensor_temperature=fix_temperature_top,
    gain_setting=gain,
    event_at=creation_time,
    source_energy=photon_energy,
    client=rest_cfg.calibration_client(),
 )

-try:
-    pnccd_metadata = pnccd_cal.metadata(calibrations=constant_names)
-except CalCatError as e:
-    warning(e)
+pnccd_metadata = pnccd_cal.metadata(calibrations=pnccd_cal.dark_calibrations)
+
+if relgain:
+    try:
+        gain_metadata = pnccd_cal.metadata(calibrations=["RelativeGainCCD"])
+        for mod, md in gain_metadata.items():
+            pnccd_metadata[mod].update(md)
+
+    except CalCatError as e:  # TODO: fix after getting new exceptions.
+        warning(f"{e} While asking for {pnccd_cal.illuminated_calibrations}")
+        warning("RelativeGainEPix100 is not retrieved from the calibration database. "
+                "Relative gain correction is disabled.")
+        corr_bools['relgain'] = False

 mod_to_pdu = pnccd_cal.mod_to_pdu
 ccvs_url = "https://in.xfel.eu/calibration/calibration_constant_versions/"
 for mod, mod_md in pnccd_metadata.items():
    display(Markdown(f"{mod}({mod_to_pdu[mod]}):"))
    for cname, c_mdata in mod_md.items():
        display(Markdown(
            f"- [{cname}]({ccvs_url}/{c_mdata['ccv_id']}): {c_mdata['begin_validity_at']}"))

-constants = pnccd_cal.ndarray_map(metadata=pnccd_metadata).get(karabo_da, {})
+metadata = pnccd_metadata[karabo_da]

 # Validate the constants availability and raise/warn correspondingly.
 missing_dark_constants = set(
-    c for c in ["OffsetCCD", "NoiseCCD", "BadPixelsDarkCCD"] if c not in constants.keys())
+    c for c in pnccd_cal.dark_calibrations if c not in metadata.keys())

 if missing_dark_constants:
    raise KeyError(
        f"Dark constants {missing_dark_constants} are not available for correction.")

-if corr_bools.get('relgain') and "RelativeGainCCD" not in constants.keys():
-    warning("RelativeGainEPix100 is not found in the calibration database.")
-    corr_bools['relgain'] = False
-step_timer.done_step("Constants retrieval")
-```
-
-%% Cell type:code id: tags:
-
-``` python
 # Record constant details in YAML metadata
 write_constants_fragment(
    out_folder=(metadata_folder or out_folder),
    det_metadata=pnccd_metadata,
    caldb_root=pnccd_cal.caldb_root,
-    )
+)
+
+# load constants arrays after storing fragment YAML file
+# and validating constants availability.
+constants = pnccd_cal.ndarray_map(metadata=pnccd_metadata).get(karabo_da, {})
+
+step_timer.done_step("Constants retrieval")
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = xana.heatmapPlot(constants["OffsetCCD"][:,:,0], x_label='Columns', y_label='Rows', lut_label='Offset (ADU)',
                       aspect=1,
                       x_range=(0, pixels_y), y_range=(0, pixels_x), vmax=16000,
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Dark Offset Map')

 fig = xana.heatmapPlot(constants["NoiseCCD"][:,:,0], x_label='Columns', y_label='Rows',
                       lut_label='Corrected Noise (ADU)',
                       aspect=1, x_range=(0, pixels_y), y_range=(0, pixels_x),
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Dark Noise Map')

 fig = xana.heatmapPlot(np.log2(constants["BadPixelsDarkCCD"][:,:,0]), x_label='Columns', y_label='Rows',
                       lut_label='Bad Pixel Value (ADU)',
                       aspect=1, x_range=(0, pixels_y), y_range=(0, pixels_x),
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Dark Bad Pixels Map')

 if corr_bools.get('relgain'):
    fig = xana.heatmapPlot(constants["RelativeGainCCD"], figsize=(8, 8), x_label='Columns', y_label='Rows',
                            lut_label='Relative Gain',
                            aspect=1, x_range=(0, pixels_y), y_range=(0, pixels_x), vmin=0.8, vmax=1.2,
                            panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                            panel_top_low_lim = 0.5, panel_top_high_lim = 1.5, panel_side_low_lim = 0.5,
                            panel_side_high_lim = 1.5,
                            title = f'Relative Gain Map for pnCCD (Gain = 1/{int(gain)})')
 ```

 %% Cell type:code id: tags:

 ``` python
 #************************ Calculators ************************#
 if corr_bools.get('common_mode'):
    # Common Mode Correction Calculator:
    cmCorrection = xcal.CommonModeCorrection([pixels_x, pixels_y],
                                             commonModeBlockSize,
                                             commonModeAxis,
                                             parallel=False, dType=np.float32, stride=1,
                                             noiseMap=constants["NoiseCCD"].astype(np.float32), minFrac=0.25)

 if corr_bools.get('pattern_class'):
    # Pattern Classifier Calculator:
    # Left Hemisphere:
    patternClassifierLH = xcal.PatternClassifier([pixels_x, pixels_y//2],
                                                 constants["NoiseCCD"][:, :pixels_y//2],
                                                 split_evt_primary_threshold,
                                                 split_evt_secondary_threshold,
                                                 split_evt_mip_threshold,
                                                 tagFirstSingles=3, # track along y-axis, left to right (see
                                                 nCells=1, # split_event.py file in pydetlib/lib/src/
                                                 allowElongated=False, # XFELDetAna/algorithms)
                                                 blockSize=[pixels_x, pixels_y//2],
                                                 parallel=False)

    # Right Hemisphere:
    patternClassifierRH = xcal.PatternClassifier([pixels_x, pixels_y//2],
                                                 constants["NoiseCCD"][:, pixels_y//2:],
                                                 split_evt_primary_threshold,
                                                 split_evt_secondary_threshold,
                                                 split_evt_mip_threshold,
                                                 tagFirstSingles=4, # track along y-axis, right to left
                                                 nCells=1,
                                                 allowElongated=False,
                                                 blockSize=[pixels_x, pixels_y//2],
                                                 parallel=False)

    patternClassifierLH._imagesPerChunk = 1
    patternClassifierRH._imagesPerChunk = 1

    patternClassifierLH._noisemap = constants["NoiseCCD"][:, :pixels_x//2]
    patternClassifierRH._noisemap = constants["NoiseCCD"][:, pixels_x//2:]
    # Setting bad pixels:
    patternClassifierLH.setBadPixelMask(constants["BadPixelsDarkCCD"][:, :pixels_x//2] != 0)
    patternClassifierRH.setBadPixelMask(constants["BadPixelsDarkCCD"][:, pixels_x//2:] != 0)
 ```

 %% Cell type:code id: tags:

 ``` python
 #***************** Histogram Calculators ******************#
 # Will contain uncorrected data:
 histCalRaw = xcal.HistogramCalculator(sensorSize,
                                      bins=bins,
                                      range=bin_range,
                                      nCells=1,
                                      parallel=False,
                                      blockSize=blockSize)
 # Will contain offset corrected data:
 histCalOffsetCor = xcal.HistogramCalculator(sensorSize,
                                            bins=bins,
                                            range=bin_range,
                                            nCells=1,
                                            parallel=False,
                                            blockSize=blockSize)
 if corr_bools.get('common_mode'):
    # Will contain common mode corrected data:
    histCalCommonModeCor = xcal.HistogramCalculator(sensorSize,
                                                    bins=bins,
                                                    range=bin_range,
                                                    nCells=1,
                                                    parallel=False,
                                                    blockSize=blockSize)
 if corr_bools.get('pattern_class'):
    # Will contain split events pattern data:
    histCalPcorr = xcal.HistogramCalculator(sensorSize,
                                            bins=bins,
                                            range=bin_range,
                                            nCells=1,
                                            parallel=False,
                                            blockSize=blockSize)
    # Will contain singles events data:
    histCalPcorrS = xcal.HistogramCalculator(sensorSize,
                                             bins=bins,
                                             range=bin_range,
                                             nCells=1,
                                             parallel=False,
                                             blockSize=blockSize)
 if corr_bools.get('relgain'):
    # Will contain gain corrected data:
    histCalGainCor = xcal.HistogramCalculator(sensorSize,
                                              bins=bins,
                                              range=bin_range,
                                              nCells=1,
                                              parallel=False,
                                              blockSize=blockSize)
 ```

 %% Cell type:markdown id: tags:

 ## Applying corrections to the raw data

 %% Cell type:code id: tags:

 ``` python
 def offset_correction(wid, index, d):
    """offset correction.
    Equating bad pixels' values to np.nan,
    so that the pattern classifier ignores them:
    """
    d = d.copy()

    # TODO: To clear this up. Is it on purpose to save corrected data with nans?
    d[bpix != 0] = np.nan
    d -= offset  # offset correction

    # TODO: to clear this up. why save the badpixels map in the corrected data?
    bpix_data[index, ...] = bpix
    data[index, ...] = d

 def common_mode(wid, index, d):
    """common-mode correction.
    Discarding events caused by saturated pixels:
    """
    d = np.squeeze(cmCorrection.correct(d, cellTable=np.zeros(pixels_y, np.int32)))
    # we equate these values to np.nan so that the pattern classifier ignores them:
    d[d >= saturated_threshold] = np.nan
    data[index, ...] = d


 def gain_correction(wid, index, d):
    """relative gain correction."""
    d /= relativegain
    data[index, ...] = d


 def pattern_classification_correction(wid, index, d):
    """pattern classification correction.
    data set to save split event corrected images

    The calculation of the cluster map:]
    Dividing the data into left and right hemispheres:
    """

    # pattern classification on corrected data
    dataLH, patternsLH = patternClassifierLH.classify(d[:, :pixels_x//2])
    dataRH, patternsRH = patternClassifierRH.classify(d[:, pixels_x//2:])

    d[:, :pixels_x//2] = np.squeeze(dataLH)
    d[:, pixels_x//2:] = np.squeeze(dataRH)

    patterns = np.zeros(d.shape, patternsLH.dtype)
    patterns[:, :pixels_x//2] = np.squeeze(patternsLH)
    patterns[:, pixels_x//2:] = np.squeeze(patternsRH)
    d[d < split_evt_primary_threshold*noise] = 0
    data[index, ...] = d
    ptrn_data[index, ...] = patterns
    d[patterns != 100] = np.nan  # Discard doubles, triples, quadruple, clusters, first singles
    filtered_data[index, ...] = d
 ```

 %% Cell type:code id: tags:

 ``` python
 # 10 is a number chosen after testing 1 ... 71 parallel threads for a node with 72 cpus.
 parallel_num_threads = 10
 context = psh.context.ThreadContext(num_workers=parallel_num_threads)

 data_path = "INSTRUMENT/"+instrument_src+"/data/"

 offset = np.squeeze(constants["OffsetCCD"])
 noise = np.squeeze(constants["NoiseCCD"])
 bpix = np.squeeze(constants["BadPixelsDarkCCD"])
 relativegain = constants.get("RelativeGainCCD")
 ```

 %% Cell type:code id: tags:

 ``` python
 def write_datasets(seq_dc, corr_arrays, out_file, instrument_src):
    """
    Creating datasets first then adding data.
    To have metadata together available at the start of the file,
    so it's quick to see what the file contains.
    """
    # Create CORR files and add corrected data sections.
    image_counts = seq_dc[instrument_src, "data.image"].data_counts(labelled=False)
    dataset_chunk = ((chunk_size_idim,) + corr_arrays["pixels"].shape[1:])  # e.g. (1, pixels_x, pixels_y)
    with DataFile(out_file, 'w') as ofile:

        # Create INDEX datasets.
        ofile.create_index(seq_dc.train_ids, from_file=seq_dc.files[0])
        # Create METDATA datasets
        ofile.create_metadata(
            like=seq_dc,
            sequence=seq_dc.run_metadata()["sequenceNumber"],
            instrument_channels=(f"{instrument_src}/data",)
        )

        # Create Instrument section to later add corrected datasets.
        outp_source = ofile.create_instrument_source(instrument_src)

        # Create count/first datasets at INDEX source.
        outp_source.create_index(data=image_counts)

        # Store uncorrected trainId in the corrected file.
        outp_source.create_key(
            f"data.trainId", data=seq_dc.train_ids,
            chunks=min(50, len(seq_dc.train_ids))
        )

        # TODO: gain dataset is just the RelativeGain constant
        # and it doesn't make sense to write it into corrected data.
        comp_fields = ["gain", "patterns", "pixels_classified"]

        # TODO: to clear this up: why save corrected data
        # in data/pixels rather than data/image.
        for field, data in corr_arrays.items():
            if field in comp_fields:  # Write compressed corrected data.
                outp_source.create_compressed_key(f"data.{field}", data=data)
            else:
                outp_source.create_key(
                    f"data.{field}", data=data,
                    chunks=dataset_chunk
                )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Data corrections and event classifications happen here.
 # Also, the corrected data are written to datasets:
 empty_seq = 0
 for seq_n, seq_f in enumerate(seq_files):
    seq_dc = H5File(seq_f)
    out_file = f"{out_folder}/{seq_f.name}".replace("RAW", "CORR")

    step_timer.start()

    img_dc = seq_dc[instrument_src, "data.image"]

    dshape = seq_dc[instrument_src, "data.image"].shape

    n_trains = dshape[0]
    corr_ntrains = dshape[0]  # number of available trains to correct.
    all_train_ids = img_dc.train_ids    # All trains including trains with no data.

    # Raise a WARNING if this sequence has no trains to correct.
    # Otherwise, print number of trains with no data.
    if corr_ntrains == 0:
        warning(f"No trains to correct for {seq_f.name}: "
                "Skipping the processing of this file.")
        empty_seq += 1
        continue
    elif len(all_train_ids) != corr_ntrains:
        print(
            f"{seq_f.name} has {len(all_train_ids) - corr_ntrains} "
            "trains with missing data."
        )

    # If you want to analyze only a certain number of frames
    # instead of all available good frames.

    if limit_trains > 0:
        print(f"\nCorrected trains are limited to: {limit_trains} trains")
        corr_ntrains = min(corr_ntrains, limit_trains)
    data_shape = (corr_ntrains, dshape[1], dshape[2])

    print(f"Correcting file {seq_f} of {corr_ntrains} trains.")

    # Overwrite seq_dc after eliminating empty trains or/and applying limited images.
    seq_dc = seq_dc.select(
        instrument_src, "*", require_all=True).select_trains(np.s_[:corr_ntrains])

    raw_data = seq_dc[instrument_src, "data.image"].ndarray().astype(np.float32)
    to_store_arrays = {"image": raw_data}

    # TODO: move the parts for reading data to plot to later cells.
    if seq_n == 0:
        raw_plt = raw_data.copy()  # plot first sequence only

    step_timer.start()

    # Allocating shared arrays for data arrays for each correction stage.
    data = context.alloc(shape=data_shape, dtype=np.float32)
    bpix_data = context.alloc(shape=data_shape, dtype=np.uint32)
    histCalRaw.fill(raw_data)  # filling histogram with raw uncorrected data

    # Applying offset correction
    context.map(offset_correction, raw_data)
    histCalOffsetCor.fill(data)  # filling histogram with offset corrected data

    if seq_n == 0:
        off_data = data.copy()  # plot first sequence only

    to_store_arrays["pixels"] = data.copy()
    to_store_arrays["mask"] = bpix_data

    step_timer.done_step(f'offset correction.')

    if corr_bools.get('common_mode'):
        step_timer.start()

        # Applying common mode correction
        context.map(common_mode, data)
        if seq_n == 0:
            cm_data = data.copy()  # plot first sequence only
        to_store_arrays["pixels_cm"] = data.copy()
        histCalCommonModeCor.fill(data)  # filling histogram with common mode corrected data

        step_timer.done_step(f'common-mode correction.')

    if corr_bools.get('relgain'):

        step_timer.start()

        # Applying gain correction
        context.map(gain_correction, data)
        if seq_n == 0:
            rg_data = data.copy()  # plot first sequence only
        # TODO: Why storing a repeated constant for each image in corrected files.
        to_store_arrays["gain"] = np.repeat(relativegain[np.newaxis, ...], corr_ntrains, axis=0).astype(np.float32)  # noqa
        histCalGainCor.fill(data)  # filling histogram with gain corrected data
        step_timer.done_step(f'gain correction.')

    if corr_bools.get('pattern_class'):
        step_timer.start()

        ptrn_data = context.alloc(shape=data_shape, dtype=np.int32)
        filtered_data = context.alloc(shape=data_shape, dtype=np.int32)
        # Applying pattern classification correction
        # Even though data is indeed of dtype np.float32,
        # not specifying this again screw with the data quality.
        context.map(pattern_classification_correction, data.astype(np.float32))

        if seq_n == 0:
            cls_data = data.copy()  # plot first sequence only
        # split event corrected images plotted for first sequence only
        # (also these events are only singles events):
        to_store_arrays["pixels_classified"] = data.copy()
        to_store_arrays["patterns"] = ptrn_data

        histCalPcorr.fill(data)  # filling histogram with split events corrected data
        # filling histogram with corr data after discarding doubles, triples, quadruple, clusters, and first singles
        histCalPcorrS.fill(filtered_data)
        step_timer.done_step(f'pattern classification correction.')

    step_timer.start()

    # Storing corrected data sources.
    write_datasets(
        seq_dc=seq_dc,
        corr_arrays=to_store_arrays,
        out_file=out_file,
        instrument_src=instrument_src,
    )
    step_timer.done_step(f'Storing data.')
 # Exit and raise warning if there are no data to correct for all sequences.
 if empty_seq == len(seq_files):
    warning("No valid trains for RAW data to correct.")
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 print("In addition to offset correction, the following corrections were performed:")
 for k, v in corr_bools.items():
    if v:
        print("  -", k.upper())

 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 print("In addition to offset correction, the following corrections were performed:")
 for k, v in corr_bools.items():
    if v:
        print("  -", k.upper())

 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Histograming the resulting spectra:

 # First _ refers to the bin edges and second _ refers to statistics and we ignore both.

 # if you use histCalRaw.get(cumulatative = True) and so on, the cumulatative = True turns the counts array such as
 # RawHistVals and so on into a 1D array instead of keeping the original shape:

 RawHistVals, _, RawHistMids, _ = histCalRaw.get()
 off_cor_HistVals, _, off_cor_HistMids, _ = histCalOffsetCor.get()
 if corr_bools.get('common_mode'):
    cm_cor_HistVals, _, cm_HistMids, _ = histCalCommonModeCor.get()
 if corr_bools.get('relgain'):
    gain_cor_HistVals, _, gain_cor_HistMids, _ = histCalGainCor.get()
 if corr_bools.get('pattern_class'):
    split_HistVals, _, split_HistMids, _ = histCalPcorr.get() # split events corrected
    singles_HistVals, _, singles_HistMids, _ = histCalPcorrS.get() # last s in variable names: singles events
 ```

 %% Cell type:code id: tags:

 ``` python
 # Saving intermediate data to disk:
 step_timer.start()
 np.savez(os.path.join(out_folder, 'Raw_Events.npz'), RawHistMids, RawHistVals)
 np.savez(os.path.join(out_folder, 'Offset_Corrected_Events.npz'), off_cor_HistMids, off_cor_HistVals)
 if corr_bools.get('common_mode'):
    np.savez(os.path.join(out_folder, 'Common_Mode_Corrected_Events.npz'), cm_HistMids, cm_cor_HistVals)
 if corr_bools.get('relgain'):
    np.savez(os.path.join(out_folder, 'Gain_Corrected_Events.npz'), gain_cor_HistMids, gain_cor_HistVals)
 if corr_bools.get('pattern_class'):
    np.savez(os.path.join(out_folder, 'Split_Events_Corrected_Events.npz'), split_HistMids, split_HistVals)
    np.savez(os.path.join(out_folder, 'Singles_Events.npz'), singles_HistMids, singles_HistVals)
 step_timer.done_step(f'Saving intermediate data to disk.')
 print("Various spectra are saved to disk in the form of histograms. Please check {}".format(out_folder))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Raw vs. Corrected Spectra'))
 step_timer.start()

 figure = [{'x': RawHistMids,
      'y': RawHistVals,
      'y_err': np.sqrt(RawHistVals[:]),
      'drawstyle': 'steps-post',
      'errorstyle': 'bars',
      'errorcoarsing': 2,
      'label': 'Uncorrected'
     },
     {'x': off_cor_HistMids,
      'y': off_cor_HistVals,
      'y_err': np.sqrt(off_cor_HistVals[:]),
      'drawstyle': 'steps-post',
      'errorstyle': 'bars',
      'errorcoarsing': 2,
      'label': 'Offset Corrected'
     }]

 if corr_bools.get('common_mode'):
    figure.append({'x': cm_HistMids,
                   'y': cm_cor_HistVals,
                   'y_err': np.sqrt(cm_cor_HistVals[:]),
                   'drawstyle': 'steps-post',
                   'errorstyle': 'bars',
                   'errorcoarsing': 2,
                   'label': 'Common Mode Corrected'})

 if corr_bools.get('relgain'):
    xrange = bin_range
    figure.append({'x': gain_cor_HistMids,
                   'y': gain_cor_HistVals,
                   'y_err': np.sqrt(gain_cor_HistVals[:]),
                   'drawstyle': 'steps-post',
                   'errorstyle': 'bars',
                   'errorcoarsing': 2,
                   'label': 'Gain Corrected'})

 if corr_bools.get('pattern_class'):
    figure.extend([{'x': split_HistMids,
                   'y': split_HistVals,
                   'y_err': np.sqrt(split_HistVals[:]),
                   'drawstyle': 'steps-post',
                   'errorstyle': 'bars',
                   'errorcoarsing': 2,
                   'label': 'Split Events Corrected'
                  },
                  {'x': singles_HistMids,
                   'y': singles_HistVals,
                   'y_err': np.sqrt(singles_HistVals[:]),
                   'drawstyle': 'steps-post',
                   'errorstyle': 'bars',
                   'errorcoarsing': 2,
                   'label': 'Singles Events'
                  }])
 fig = xana.simplePlot(figure, aspect=1, x_label='ADU', y_label='Number of Occurrences', figsize='2col',
                      y_log=True, x_range=bin_range, title = '1 ADU per bin is used.',
                      legend='top-right-frame-1col')
 step_timer.done_step('Plotting')
 ```

 %% Cell type:code id: tags:

 ``` python
 # This function plots pattern statistics:

 def classification_plot(patternStats, hemisphere):

    print("****************** {} HEMISPHERE ******************\n"
          .format(hemisphere))
    fig = plt.figure(figsize=(15, 15))
    ax = fig.add_subplot(4, 4, 1)
    sfields = ["singles", "first singles", "clusters"]
    mfields = ["doubles", "triples", "quads"]
    relativeOccurances = []
    labels = []
    for i, f in enumerate(sfields):
        relativeOccurances.append(patternStats[f])
        labels.append(f)
    for i, f in enumerate(mfields):
        for k in range(len(patternStats[f])):
            relativeOccurances.append(patternStats[f][k])
            labels.append("{}({})".format(f, k))
    relativeOccurances = np.array(relativeOccurances, np.float)
    relativeOccurances /= np.sum(relativeOccurances)
    pie = ax.pie(relativeOccurances, labels=labels, autopct='%1.1f%%', shadow=True)
    ax.set_title("Pattern Occurrence")
    # Set aspect ratio to be equal so that pie is drawn as a circle.
    a = ax.axis('equal')

    smaps = ["singlemap", "firstsinglemap", "clustermap"]
    for i, m in enumerate(smaps):
        ax = fig.add_subplot(4, 4, 2+i)
        pmap = ax.imshow(patternStats[m], interpolation="nearest", vmax=2*np.nanmedian(patternStats[m]))
        ax.set_title(m)
        cb = fig.colorbar(pmap)

    mmaps = ["doublemap", "triplemap", "quadmap"]
    k = 0
    for i, m in enumerate(mmaps):
        for j in range(4):
            ax = fig.add_subplot(4, 4, 2+len(smaps)+k)
            pmap = ax.imshow(patternStats[m][j], interpolation="nearest", vmax=2*np.median(patternStats[m][j]))
            ax.set_title("{}({})".format(m,j))
            cb = fig.colorbar(pmap)
            k+=1
 ```

 %% Cell type:code id: tags:

 ``` python
 # The next two cells plot the classification results for left and right hemispheres, respectively:
 display(Markdown('### Classification Results - Plots'))
 if corr_bools.get('pattern_class'):
    patternStatsLH = patternClassifierLH.getPatternStats()
    classification_plot(patternStatsLH, 'Left')
 ```

 %% Cell type:code id: tags:

 ``` python
 if corr_bools.get('pattern_class'):
    patternStatsRH = patternClassifierRH.getPatternStats()
    classification_plot(patternStatsRH, 'Right')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Classification Results - Tabulated Statistics'))

 if corr_bools.get('pattern_class'):
    step_timer.start()
    t0 = PrettyTable()
    t0.title = "Total Number of Counts after All Corrections"
    t0.field_names = ["Hemisphere", "Singles", "First-Singles", "Clusters"]
    t0.add_row(["LH", patternStatsLH['singles'], patternStatsLH['first singles'], patternStatsLH['clusters']])
    t0.add_row(["RH", patternStatsRH['singles'], patternStatsRH['first singles'], patternStatsRH['clusters']])
    print(t0)

    print("Abbreviations: D (Doubles), T (Triples), Q (Quadruples), L (Left), R (Right), and H (Hemisphere).")
    t1 = PrettyTable()
    t1.field_names = ["Index", "D-LH", "D-RH", "T-LH", "T-RH", "Q-LH", "Q-RH"]
    t1.add_row([0, patternStatsLH['doubles'][0], patternStatsRH['doubles'][0], patternStatsLH['triples'][0],
                patternStatsRH['triples'][0], patternStatsLH['quads'][0], patternStatsRH['quads'][0]])
    t1.add_row([1, patternStatsLH['doubles'][1], patternStatsRH['doubles'][1], patternStatsLH['triples'][1],
                patternStatsRH['triples'][1], patternStatsLH['quads'][1], patternStatsRH['quads'][1]])
    t1.add_row([2, patternStatsLH['doubles'][2], patternStatsRH['doubles'][2], patternStatsLH['triples'][2],
                patternStatsRH['triples'][2], patternStatsLH['quads'][2], patternStatsRH['quads'][2]])
    t1.add_row([3, patternStatsLH['doubles'][3], patternStatsRH['doubles'][3], patternStatsLH['triples'][3],
                patternStatsRH['triples'][3], patternStatsLH['quads'][3], patternStatsRH['quads'][3]])
    print(t1)
    step_timer.done_step('Classification Results - Tabulated Statistics')
 ```

 %% Cell type:code id: tags:

 ``` python
 if corr_bools.get('pattern_class'):
    doublesLH = patternStatsLH['doubles'][0] + patternStatsLH['doubles'][1] + patternStatsLH['doubles'][2] + \
    patternStatsLH['doubles'][3]
    triplesLH = patternStatsLH['triples'][0] + patternStatsLH['triples'][1] + patternStatsLH['triples'][2] + \
    patternStatsLH['triples'][3]
    quadsLH = patternStatsLH['quads'][0] + patternStatsLH['quads'][1] + patternStatsLH['quads'][2] + \
    patternStatsLH['quads'][3]
    allsinglesLH = patternStatsLH['singles'] + patternStatsLH['first singles']
    eventsLH = allsinglesLH + doublesLH + triplesLH + quadsLH

    doublesRH = patternStatsRH['doubles'][0] + patternStatsRH['doubles'][1] + patternStatsRH['doubles'][2] + \
    patternStatsRH['doubles'][3]
    triplesRH = patternStatsRH['triples'][0] + patternStatsRH['triples'][1] + patternStatsRH['triples'][2] + \
    patternStatsRH['triples'][3]
    quadsRH = patternStatsRH['quads'][0] + patternStatsRH['quads'][1] + patternStatsRH['quads'][2] + \
    patternStatsRH['quads'][3]
    allsinglesRH = patternStatsRH['singles'] + patternStatsRH['first singles']
    eventsRH = allsinglesRH + doublesRH + triplesRH + quadsRH

    if eventsLH > 0.:
        reloccurLH = np.array([allsinglesLH/eventsLH, doublesLH/eventsLH, triplesLH/eventsLH, quadsLH/eventsLH])
    else:
        reloccurLH = np.array([0]*4)
    if eventsRH > 0.:
        reloccurRH = np.array([allsinglesRH/eventsRH, doublesRH/eventsRH, triplesRH/eventsRH, quadsRH/eventsRH])
    else:
        reloccurRH = np.array([0]*4)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Classification Results - Pie Charts'))

 if corr_bools.get('pattern_class'):
    step_timer.start()
    fig = plt.figure(figsize=(12, 7))
    ax = fig.add_subplot(1, 2, 1)
    labels = ['Singles', 'Doubles', 'Triples', 'Quads']
    pie = ax.pie(reloccurLH, labels=labels, autopct='%1.1f%%', shadow=True)
    ax.set_title("Pattern Occurrence in LH")
    # Set aspect ratio to be equal so that pie is drawn as a circle.
    a = ax.axis('equal')
    ax = fig.add_subplot(1, 2, 2)
    pie = ax.pie(reloccurRH, labels=labels, autopct='%1.1f%%', shadow=True)
    ax.set_title("Pattern Occurrence in RH")
    # Set aspect ratio to be equal so that pie is drawn as a circle.
    a = ax.axis('equal')
    step_timer.done_step('Classification Results - Pie Charts')
 ```

 %% Cell type:markdown id: tags:

 ### Various Images Averaged Over All Frames of Only the First Sequence ###

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 uncor_mean_im = np.nanmean(raw_data, axis=0)
 offset_mean_im = np.nanmean(off_data, axis=0)

 if corr_bools.get('common_mode'):
    cm_mean_im = np.nanmean(cm_data, axis=0)
 if corr_bools.get('relgain'):
    gain_mean_im = np.nanmean(rg_data, axis=0)
 if corr_bools.get('pattern_class'):
    mean_im_cc = np.nanmean(cls_data, axis=0)

 fig = xana.heatmapPlot(uncor_mean_im, x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                       x_range=(0, pixels_y), y_range=(0, pixels_x),
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Uncorrected Image Averaged over Frames in the First Sequence')

 fig = xana.heatmapPlot(offset_mean_im, x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                       x_range=(0, pixels_y), y_range=(0, pixels_x),
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Offset Corrected Image Averaged over Frames in the First Sequence')

 if corr_bools.get('common_mode'):
    fig = xana.heatmapPlot(cm_mean_im, x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                           x_range=(0, pixels_y), y_range=(0, pixels_x),
                           panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                           title = 'Common Mode Corrected Image Averaged over Frames in the First Sequence')

 if corr_bools.get('relgain'):
    fig = xana.heatmapPlot(gain_mean_im, x_label='Columns', y_label='Rows',
                           lut_label='Signal (ADU)', aspect=1,
                           x_range=(0, pixels_y), y_range=(0, pixels_x),
                           panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                           title = 'Gain Corrected Image Averaged over Frames in the First Sequence')

 if corr_bools.get('pattern_class'):
    fig = xana.heatmapPlot(mean_im_cc, x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                           x_range=(0, pixels_y), y_range=(0, pixels_x), vmin=0, vmax= 18000,
                           title = 'Image of Single Events Averaged over Frames in the First Sequence')
 step_timer.done_step("Plotting")
 ```

 %% Cell type:markdown id: tags:

 ### Images of the First Frame of the First Sequence ###

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 fig = xana.heatmapPlot(raw_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                       x_range=(0, pixels_y), y_range=(0, pixels_x),
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Uncorrected Image (First Frame of the First Sequence)')

 fig = xana.heatmapPlot(off_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                       x_range=(0, pixels_y), y_range=(0, pixels_x),
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Offset Corrected Image (First Frame of the First Sequence)')

 if corr_bools.get('common_mode'):
        fig = xana.heatmapPlot(cm_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)',
                               aspect=1,
                               x_range=(0, pixels_y), y_range=(0, pixels_x),
                               panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                               title = 'Common Mode Corrected Image (First Frame of the First Sequence)')

 if corr_bools.get('relgain'):
        fig = xana.heatmapPlot(rg_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)',
                               aspect=1,
                               x_range=(0, pixels_y), y_range=(0, pixels_x),
                               panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                               title = 'Gain Corrected Image (First Frame of the First Sequence)')

 if corr_bools.get('pattern_class'):
    fig = xana.heatmapPlot(cls_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                           x_range=(0, pixels_y), y_range=(0, pixels_x),
                           panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                           title = 'Image of Single Events (First Frame of the First Sequence)')
 step_timer.done_step("Plotting")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Resetting the histogram calculators:
 histCalRaw.reset()
 histCalOffsetCor.reset()
 if corr_bools.get('common_mode'):
    histCalCommonModeCor.reset()
 if corr_bools.get('relgain'):
    histCalGainCor.reset()
 if corr_bools.get('pattern_class'):
    histCalPcorr.reset()
    histCalPcorrS.reset()
 ```

 %% Cell type:markdown id: tags:

 Next, the corrected event patterns are read from the patterns/dataset created previously and are separated into 4 different categories (singles, doubles, triples and quadruples) using the pattern indices. However, this is done only for one sequence, corresponding to the seq_num variable, as an example.

 Note that the number of bins and the bin range for the following histograms may be different from those presented above (depending on gain) to make the counts more noticible and the peaks more defined.

 If you are interested in plotting the events from all sequences or the spectra of half of the sensor, execute the spectra_pnCCD_NBC.ipynb notebook.

 %% Cell type:code id: tags:

 ``` python
 if corr_bools.get('pattern_class'):
    singles = []
    doubles = []
    triples = []
    quads =  []
    with H5File(f"{out_folder}/{seq_files[0].name.replace('RAW', 'CORR')}") as dc:  # noqa
        data = dc[instrument_src, "data.pixels_classified"].ndarray()
        patterns = dc[instrument_src, "data.patterns"].ndarray()
    # events' patterns indices are as follows: 100 (singles), 101 (first singles), 200 - 203 (doubles),
    # 300 - 303 (triples), and 400 - 403 (quadruples). Note that for the last three types of patterns,
    # there are left, right, up, and down indices.

    # Separating the events:
    # Singles and First Singles:
    for s in range(100, 102):
        single = data.copy()
        single[patterns != s] = np.nan
        singles.append(single)


    for d in range(200, 204):
        double = data.copy()
        double[patterns != d] = np.nan
        doubles.append(double)

    for t in range(300, 304):
        triple = data.copy()
        triple[patterns != t] = np.nan
        triples.append(triple)

    for q in range(400, 404):
        quad = data.copy()
        quad[patterns != q] = np.nan
        quads.append(quad)
 ```

 %% Cell type:code id: tags:

 ``` python
 if corr_bools.get('pattern_class'):
    step_timer.start()
    hA = 0
    h = 0
    for single in singles:
        hs, e = np.histogram(single.flatten(), bins=event_bins, range=b_range) # h: histogram counts, e: bin edges
        h += hs
        hA += hs # hA: counts all events (see below)

    # bin edges array has one extra element => need to plot from 0 to the one before the last element to have the
    # same size as h-array => in what follows, we use e[:-1] (-1 means one before the last element)

    display(Markdown('### Histograms of Corrected Events for One Sequence Only'))
    fig = plt.figure(figsize=(10, 7))
    ax = fig.add_subplot(111)
    ax.step(e[:-1], h, color='blue', label='Events Involving Single Pixels Only')
    ax.semilogy() # y-axis is log, x-axis is linear
    ax.set_xlabel("Energy (ADU) [{} bins per {} ADU]".format(event_bins, b_range[1]-b_range[0]))
    ax.set_ylabel("Corrected Events for One Sequence (counts)")
    ax.set_xlim(b_range)

    h = 0
    for double in doubles:
        hd, e = np.histogram(double.flatten(), bins=event_bins, range=b_range)
        h += hd
        hA += hd

    ax.step(e[:-1], h, color='red', label='Events Splitting on Double Pixels')

    h = 0
    for triple in triples:
        ht, e = np.histogram(triple.flatten(), bins=event_bins, range=b_range)
        h += ht
        hA += ht

    ax.step(e[:-1], h, color='green', label='Events Splitting on Triple Pixels')

    h = 0
    for quad in quads:
        hq, e = np.histogram(quad.flatten(), bins=event_bins, range=b_range)
        h += hq
        hA += hq

    ax.step(e[:-1], h, color='purple', label='Events Splitting on Quadruple Pixels')

    ax.step(e[:-1], hA, color='grey', label='All Valid Events')
    l = ax.legend()
    step_timer.done_step("Plotting")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

--- a/setup.py
+++ b/setup.py
@@ -108,7 +108,7 @@ install_requires = [

 if "readthedocs.org" not in sys.executable:
    install_requires += [
-        "iCalibrationDB @ git+ssh://git@git.xfel.eu:10022/detectors/cal_db_interactive.git@2.4.0",  # noqa
+        "iCalibrationDB @ git+ssh://git@git.xfel.eu:10022/detectors/cal_db_interactive.git@2.4.1",  # noqa
        "XFELDetectorAnalysis @ git+ssh://git@git.xfel.eu:10022/karaboDevices/pyDetLib.git@2.7.0",  # noqa
        "CalParrot @ git+ssh://git@git.xfel.eu:10022/calibration/calparrot.git@0.1",  # noqa
    ]

--- a/src/cal_tools/calcat_interface.py
+++ b/src/cal_tools/calcat_interface.py
@@ -122,7 +122,7 @@ class CalCatApi(metaclass=ClientWrapper):

        return {
            "parameters_conditions_attributes": [
-                {"parameter_id": self.parameter_id(k), "value": str(v)}
+                {"parameter_name": k, "value": str(v)}
                for k, v in condition.items()
            ]
        }
@@ -1001,8 +1001,18 @@ class LPD_CalibrationData(SplitConditionCalibrationData):
        "Pixels X",
        "Pixels Y",
        "Feedback capacitor",
+        "Memory cell order",
+    ]
+
+    illuminated_parameters = [
+        "Sensor Bias Voltage",
+        "Memory cells",
+        "Pixels X",
+        "Pixels Y",
+        "Feedback capacitor",
+        "Source Energy",
+        "category"
    ]
-    illuminated_parameters = dark_parameters + ["Source Energy", "category"]

    def __init__(
        self,
@@ -1013,6 +1023,7 @@ class LPD_CalibrationData(SplitConditionCalibrationData):
        pixels_x=256,
        pixels_y=256,
        source_energy=9.2,
+        memory_cell_order=None,
        category=1,
        modules=None,
        client=None,
@@ -1034,6 +1045,7 @@ class LPD_CalibrationData(SplitConditionCalibrationData):
        self.pixels_x = pixels_x
        self.pixels_y = pixels_y
        self.feedback_capacitor = feedback_capacitor
+        self.memory_cell_order = memory_cell_order
        self.source_energy = source_energy
        self.category = category

@@ -1110,6 +1122,7 @@ class JUNGFRAU_CalibrationData(CalibrationData):
        "Integration Time",
        "Sensor temperature",
        "Gain Setting",
+        "Gain mode",
    ]

    def __init__(

--- a/src/cal_tools/lpdlib.py
+++ b/src/cal_tools/lpdlib.py
@@ -772,21 +772,38 @@ class LpdCorrections:
                        flat_fields)


-def get_mem_cell_order(run, sources) -> str:
-    """Load the memory cell order to use as a condition to find constants"""
-    res = set()
+def get_mem_cell_pattern(run, sources) -> np.ndarray:
+    """Load the memory cell order to use as a condition to find constants
+
+    This looks at the first train for each source, issuing a warning if the
+    pattern differs between sources.
+    """
+    patterns = []
    for source in sources:
        cell_id_data = run[source, 'image.cellId'].drop_empty_trains()
        if len(cell_id_data.train_ids) == 0:
            continue  # No data for this module
-        cell_ids = cell_id_data[0].ndarray()
-        # Trailing comma required so e.g. "...,1" doesn't match "...,10"
-        res.add(",".join([str(c) for c in cell_ids.flatten()]) + ",")
+        cell_ids = cell_id_data[0].ndarray().flatten()
+        if not any(np.array_equal(cell_ids, p) for p in patterns):
+            patterns.append(cell_ids)

-    if len(res) > 1:
+    if len(patterns) > 1:
        warn("Memory cell order varies between detector modules: "
-             "; ".join([f"{s[:10]}...{s[-10:]}" for s in res]))
-    elif not res:
+             "; ".join([f"{s[:10]}...{s[-10:]}" for s in patterns]))
+    elif not patterns:
        raise ValueError("Couldn't find memory cell order for any modules")

-    return res.pop()
+    return patterns[0]
+
+
+def make_cell_order_condition(use_param, cellid_pattern) -> Optional[str]:
+    """Convert the cell ID array to a condition string, or None if not used"""
+    if use_param == 'auto':
+        # auto -> use cell order if it wraps around (cells not filled monotonically)
+        use = len(cellid_pattern) > 2 and (
+                np.diff(cellid_pattern.astype(np.int32)) < 0
+        ).any()
+    else:
+        use = (use_param == 'always')
+
+    return (",".join([str(c) for c in cellid_pattern]) + ",") if use else None
--- a/src/cal_tools/plotting.py
+++ b/src/cal_tools/plotting.py
@@ -447,7 +447,11 @@ def init_jungfrau_geom(
            (-1, -1) for _ in range(4)] + [(1, 1) for _ in range(4)]
    elif "JF1M" in karabo_id:
        nmods = 2
-        expected_modules = [f"JNGFR{i:02d}" for i in range(1, nmods+1)]
+        st_modno = 1
+        # TODO: This is a temporary workaround. A proper solution is needed.
+        if karabo_id == "SPB_CFEL_JF1M":
+            st_modno = 9
+        expected_modules = [f"JNGFR{i:02d}" for i in range(st_modno, st_modno+nmods)]
        module_pos = ((-mod_width//2, 33), (-mod_width//2, -mod_height-33))
        orientations = [(-1,-1), (1,1)]
    else:  # e.g. HED_IA1_JF500K1, FXE_XAD_JF500K, FXE_XAD_JFHZ

--- a/src/cal_tools/tools.py
+++ b/src/cal_tools/tools.py
@@ -1030,3 +1030,17 @@ def write_compressed_frames(
            dataset.id.write_direct_chunk(chunk_start, compressed)

    return dataset
+
+
+def reorder_axes(a, from_order, to_order):
+    """Rearrange axes of array a from from_order to to_order
+
+    This does the same as np.transpose(), but making the before & after axes
+    more explicit. from_order is a sequence of strings labelling the axes of a,
+    and to_order is a similar sequence for the axes of the result.
+    """
+    assert len(from_order) == a.ndim
+    assert sorted(from_order) == sorted(to_order)
+    from_order = list(from_order)
+    order = tuple([from_order.index(lbl) for lbl in to_order])
+    return a.transpose(order)
--- a/src/xfel_calibrate/calibrate.py
+++ b/src/xfel_calibrate/calibrate.py
@@ -351,7 +351,9 @@ def prepare_job(
        if title_cell is not None:
            title_cell.source = ''
    set_figure_format(new_nb, args["vector_figs"])
-    new_name = f"{nb_path.stem}__{cparm}__{suffix}.ipynb"
+    # In some cases a suffix for example can have `/`. e.g. LPDMini and GH2
+    # have karabo_da with a `/`.
+    new_name = f"{nb_path.stem}__{cparm}__{suffix}.ipynb".replace("/", "-")

    nbformat.write(new_nb, cal_work_dir / new_name)


--- a/src/xfel_calibrate/notebooks.py
+++ b/src/xfel_calibrate/notebooks.py
@@ -76,8 +76,6 @@ notebooks = {
                            "cluster cores": 8},
        },
        "CORRECT": {
-            "pre_notebooks": [
-                "notebooks/LPD/LPD_retrieve_constants_precorrection.ipynb"],
            "notebook": "notebooks/LPD/LPD_Correct_Fast.ipynb",
            "concurrency": {"parameter": "sequences",
                            "default concurrency": [-1],
@@ -110,6 +108,12 @@ notebooks = {
                            "use function": "balance_sequences",
                            "cluster cores": 16},
        },
+        "INJECT_CONSTANTS": {
+            "notebook": "notebooks/LPDMini/LPD_Mini_Inject_calibration_constants_from_h5files.ipynb",
+            "concurrency": {"parameter": None,
+                            "default concurrency": None,
+                            "cluster cores": 1},
+        }
    },
    "PNCCD": {
        "DARK": {
@@ -284,6 +288,17 @@ notebooks = {
            },
        },
    },
+    "TIMEPIX": {
+        "CORRECT": {
+            "notebook": "notebooks/Timepix/Compute_Timepix_Event_Centroids.ipynb",
+            "concurrency": {
+                "parameter": None,
+                "use function": None,
+                "default concurrency": None,
+                "cluster cores": 1
+            },
+        },
+    },
    "TEST": {
        "TEST-CLI": {
            "notebook": "notebooks/test/test-cli.ipynb",

--- a/tests/test_cal_tools.py
+++ b/tests/test_cal_tools.py
@@ -22,6 +22,7 @@ from cal_tools.tools import (
    recursive_update,
    send_to_db,
    write_constants_fragment,
+    reorder_axes,
 )

 # AGIPD operating conditions.
@@ -614,3 +615,13 @@ def test_write_constants_fragment(tmp_path: Path):
                },
            }
        }
+
+
+def test_reorder_axes():
+    a = np.zeros((10, 32, 256, 3))
+    from_order = ('cells', 'slow_scan', 'fast_scan', 'gain')
+    to_order = ('slow_scan', 'fast_scan', 'cells', 'gain')
+    assert reorder_axes(a, from_order, to_order).shape == (32, 256, 10, 3)
+
+    to_order = ('gain', 'fast_scan', 'slow_scan', 'cells')
+    assert reorder_axes(a, from_order, to_order).shape == (3, 256, 32, 10)
--- a/tests/test_update_config.py
+++ b/tests/test_update_config.py
@@ -70,6 +70,7 @@ def test_main(capsys):
            "--rel-gain", "true",
            "--webservice-address", "inproc://socket",
            "--correct",
+            "--verbose"
        ],
    ):
        with patch("zmq.Context", return_value=context):

--- a/webservice/job_monitor.py
+++ b/webservice/job_monitor.py
@@ -23,6 +23,11 @@ except ImportError:

 log = logging.getLogger(__name__)

+STATES_FINISHED = {  # https://slurm.schedmd.com/squeue.html#lbAG
+    'BOOT_FAIL',  'CANCELLED', 'COMPLETED',  'DEADLINE', 'FAILED',
+    'OUT_OF_MEMORY', 'SPECIAL_EXIT', 'TIMEOUT',
+}
+

 class NoOpProducer:
    """Fills in for Kafka producer object when setting that up fails"""
@@ -50,10 +55,10 @@ def slurm_status(filter_user=True):
    :return: a dictionary indexed by slurm jobid and containing a tuple
             of (status, run time) as values.
    """
-    cmd = ["squeue"]
+    cmd = ["squeue", "--states=all"]
    if filter_user:
        cmd += ["--me"]
-    res = run(cmd, stdout=PIPE)
+    res = run(cmd, stdout=PIPE, stderr=PIPE)
    if res.returncode == 0:
        rlines = res.stdout.decode().split("\n")
        statii = {}
@@ -65,6 +70,10 @@ def slurm_status(filter_user=True):
            except ValueError:  # not enough values to unpack in split
                pass
        return statii
+    else:
+        log.error("Running squeue failed. stdout: %r, stderr: %r",
+                  res.stdout.decode(), res.stderr.decode())
+        return None


 def slurm_job_status(jobid):
@@ -148,15 +157,19 @@ class JobsMonitor:

        Newly completed executions are present with an empty list.
        """
+        jobs_to_check = self.job_db.execute(
+            "SELECT job_id, exec_id FROM slurm_jobs WHERE finished = 0"
+        ).fetchall()
+        if not jobs_to_check:
+            log.debug("No unfinished jobs to check")
+            return {}
+
        statii = slurm_status()
        # Check that slurm is giving proper feedback
        if statii is None:
            return {}
        log.debug(f"SLURM info {statii}")

-        jobs_to_check = self.job_db.execute(
-            "SELECT job_id, exec_id FROM slurm_jobs WHERE finished = 0"
-        ).fetchall()
        ongoing_jobs_by_exn = {}
        updates = []
        for r in jobs_to_check:
@@ -166,13 +179,13 @@ class JobsMonitor:
            if str(r['job_id']) in statii:
                # statii contains jobs which are still going (from squeue)
                slstatus, runtime = statii[str(r['job_id'])]
-                finished = False
                execn_ongoing_jobs.append(f"{slstatus}-{runtime}")

            else:
                # These jobs have finished (successfully or otherwise)
                _, runtime, slstatus = slurm_job_status(r['job_id'])
-                finished = True
+
+            finished = slstatus in STATES_FINISHED

            updates.append((finished, runtime, slstatus, r['job_id']))
No results found