Compare revisions

Karim Ahmed · Karim Ahmed · Karim Ahmed · Karim Ahmed · Karim Ahmed · Karim Ahmed
--- a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
+++ b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
 %% Cell type:markdown id: tags:

 # AGIPD Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/MID/202201/p002834/raw" # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/esobolev/pycal_litfrm/p002834/r0225"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
+overwrite = False  # IGNORED, NEEDED FOR COMPATIBILITY.
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 train_ids = [-1] # train IDs to correct, set to -1 for all, range allowed
 run = 225 # runs to process, required

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_template = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 index_source_template = 'INDEX/{}/DET/{}:xtdf/'  # path in the HDF5 file to images
 ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information
 karabo_id_control = "MID_EXP_AGIPD1M1" # karabo-id for control device

 slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants, loaded in precorrection notebook

 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milliseconds
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants

 mem_cells = -1  # Number of memory cells used, set to 0 to automatically infer
 bias_voltage = -1  # bias voltage, set to 0 to use stored value in slow data.
 acq_rate = -1. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)
 max_pulses = [0, 352, 1] # range list [st, end, step] of memory cell indices to be processed within a train. 3 allowed maximum list input elements.
 mem_cells_db = -1  # set to a value different than 0 to use this value for DB queries
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction parameters
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
 cm_dark_fraction = 0.66 # threshold for fraction of  empty pixels to consider module enough dark to perform CM correction
 cm_dark_range = [-50.,30] # range for signal value ADU for pixel to be consider as a dark pixel
 cm_n_itr = 4 # number of iterations for common mode correction
 hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
 mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
 noisy_adc_threshold = 0.25 # threshold to mask complete adc
 ff_gain = 7.2 # conversion gain for absolute FlatField constants, while applying xray_gain
 photon_energy = -1.0 # photon energy in keV, non-positive value for XGM autodetection

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 zero_nans = False # set NaN values in corrected data to 0
 zero_orange = False # set to 0 very negative and very large values in corrected data
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
 corr_asic_diag = False # if set, diagonal drop offs on ASICs are corrected
 force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold
 force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold
 mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
 common_mode = False # Common mode correction
 melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
 mask_zero_std = False # Mask pixels with zero standard deviation across train
 low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain
 round_photons = False  # Round to absolute number of photons, only use with gain corrections

 # Optional auxiliary devices
 use_ppu_device = ''  # Device ID for a pulse picker device to only process picked trains, empty string to disable
 ppu_train_offset = 0  # When using the pulse picker, offset between the PPU's sequence start and actually picked train

 use_litframe_finder = 'off' # Process only illuminated frames: 'off' - disable, 'device' - use online device data, 'offline' - use offline algorithm, 'auto' - choose online/offline source automatically (default)
 litframe_device_id = '' # Device ID for a lit frame finder device, empty string to auto detection
 energy_threshold = -1000 # The low limit for the energy (uJ) exposed by frames subject to processing. If -1000, selection by pulse energy is disabled
 use_super_selection = 'cm' # Make a common selection for entire run: 'off' - disable, 'final' - enable for final selection, 'cm' - enable only for common mode correction

 use_xgm_device = ''  # DoocsXGM device ID to obtain actual photon energy, operating condition else.

 # Output parameters
 recast_image_data = ''  # Cast data to a different dtype before saving
 compress_fields = ['gain', 'mask']  # Datasets in image group to compress.

 # Plotting parameters
 skip_plots = False # exit after writing corrected files and metadata
 cell_id_preview = 1 # cell Id used for preview in single-shot plots

 # Parallelization parameters
 chunk_size = 1000  # Size of chunk for image-wise correction
 n_cores_correct = 16 # Number of chunks to be processed in parallel
 n_cores_files = 4 # Number of files to be processed in parallel
 sequences_per_node = 2 # number of sequence files per cluster node if run as SLURM job, set to 0 to not run SLURM parallel
 max_nodes = 8 # Maximum number of SLURM jobs to split correction work into
 max_tasks_per_worker = 1  # the number of tasks a correction pool worker process can complete before it will exit and be replaced with a fresh worker process. Leave as -1 to keep worker alive as long as pool.

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import os
 import math
 import multiprocessing
 import re
 import warnings
 from datetime import timedelta
 from logging import warning
 from pathlib import Path
 from time import perf_counter

 import tabulate
 from dateutil import parser
 from IPython.display import Latex, Markdown, display

 warnings.filterwarnings('ignore')
 import matplotlib
 import matplotlib.pyplot as plt
 import yaml
 from extra_data import RunDirectory, stack_detector_data
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from matplotlib import cm as colormap
 from matplotlib.colors import LogNorm

 matplotlib.use("agg")
 %matplotlib inline
 import numpy as np
 import seaborn as sns

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")

 from cal_tools import agipdalgs as calgs
 from cal_tools.agipdlib import (
    AgipdCorrections,
    AgipdCtrl,
    CellRange,
    LitFrameSelection,
 )
 from cal_tools.ana_tools import get_range
 from cal_tools.enums import AgipdGainMode, BadPixels
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    CalibrationMetadata,
    calcat_creation_time,
    map_modules_from_folder,
    module_index_to_qm,
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_folder = in_folder / f'r{run:04d}'
 ```

 %% Cell type:markdown id: tags:

 ## Evaluated parameters ##

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the hierarchy and dependability for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["zero_nans"] = zero_nans
    corr_bools["zero_orange"] = zero_orange
    corr_bools["mask_noisy_adc"] = mask_noisy_adc
    corr_bools["force_hg_if_below"] = force_hg_if_below
    corr_bools["force_mg_if_below"] = force_mg_if_below
    corr_bools["common_mode"] = common_mode
    corr_bools["melt_snow"] = melt_snow
    corr_bools["mask_zero_std"] = mask_zero_std
    corr_bools["low_medium_gap"] = low_medium_gap
    corr_bools["round_photons"] = round_photons

 # Many corrections don't apply to fixed gain mode; will explicitly disable later if detected
 disable_for_fixed_gain = [
    "adjust_mg_baseline",
    "blc_set_min",
    "force_hg_if_below",
    "force_mg_if_below",
    "low_medium_gap",
    "melt_snow",
    "rel_gain"
 ]
 ```

 %% Cell type:code id: tags:

 ``` python
 if sequences == [-1]:
    sequences = None

 dc = RunDirectory(run_folder)

 ctrl_src = ctrl_source_template.format(karabo_id_control)
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 index_src = index_source_template.format(karabo_id, receiver_template)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create output folder
 out_folder.mkdir(parents=True, exist_ok=True)

 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 # Evaluate requested modules
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 print("Process modules:", ', '.join(module_index_to_qm(x) for x in modules))
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_ppu_device:
    # Obtain trains to process if using a pulse picker device.

    # Will throw an uncaught exception if the device is wrong.
    seq_start = dc[use_ppu_device, 'trainTrigger.sequenceStart.value'].ndarray()

    # The trains picked are the unique values of trainTrigger.sequenceStart
    # minus the first (previous trigger before this run).
    train_ids = np.unique(seq_start)[1:] + ppu_train_offset

    print(f'PPU device {use_ppu_device} triggered for {len(train_ids)} train(s)')

 elif train_ids != [-1]:
    # Specific trains passed by parameter, convert to ndarray.
    train_ids = np.array(train_ids)

    print(f'Processing up to {len(train_ids)} manually selected train(s)')
 else:
    # Process all trains.
    train_ids = None

    print(f'Processing all valid trains')
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mapped_files, _, total_sequences, _, _ =  map_modules_from_folder(
    str(in_folder), run, path_template, karabo_da, sequences
 )
 file_list = []

 # ToDo: Split table over pages
 print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
 table = []
 ti = 0
 for k, files in mapped_files.items():
    i = 0
    for f in list(files.queue):
        file_list.append(f)
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["#", "module", "# module", "file"])))
 file_list = sorted(file_list, key=lambda name: name[-10:])
 ```

 %% Cell type:code id: tags:

 ``` python
 first_mod_channel = sorted(modules)[0]

 instrument_src_mod = [
    s for s in list(dc.all_sources) if f"{first_mod_channel}CH" in s][0]
 mod_channel = int(re.findall(rf".*{first_mod_channel}CH([0-9]+):.*", instrument_src_mod)[0])

 agipd_cond = AgipdCtrl(
    run_dc=dc,
    image_src=instrument_src_mod,
    ctrl_src=ctrl_src,
    raise_error=False,  # to be able to process very old data without gain_setting value
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 offset = parser.parse(creation_date_offset)
 delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
 creation_time += delta
 print(f"Creation time: {creation_time}")

 if acq_rate == -1.:
    acq_rate = agipd_cond.get_acq_rate()
 if mem_cells == -1:
    mem_cells = agipd_cond.get_num_cells()
 # TODO: look for alternative for passing creation_time
 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == -1:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()
 if gain_mode == -1:
    gain_mode = agipd_cond.get_gain_mode()
 else:
    gain_mode = AgipdGainMode(gain_mode)
 ```

 %% Cell type:code id: tags:

 ``` python
 if mem_cells is None:
    raise ValueError(f"No raw images found for {instrument_src_mod}")

 mem_cells_db = mem_cells if mem_cells_db == -1 else mem_cells_db

 print(f"Maximum memory cells to calibrate: {mem_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Using {creation_time} as creation time")
 print("Operating conditions are:")
 print(f"• Bias voltage: {bias_voltage}")
 print(f"• Memory cells: {mem_cells_db}")
 print(f"• Acquisition rate: {acq_rate}")
 print(f"• Gain setting: {gain_setting}")
 print(f"• Gain mode: {gain_mode.name}")
 print(f"• Integration time: {integration_time}")
 print(f"• Photon Energy: 9.2")
 ```

 %% Cell type:code id: tags:

 ``` python
 if gain_mode:
    for to_disable in disable_for_fixed_gain:
        if corr_bools.get(to_disable, False):
            warning(f"{to_disable} correction was requested, but does not apply to fixed gain mode")
            corr_bools[to_disable] = False
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_litframe_finder != 'off':
    from extra_redu import make_litframe_finder, LitFrameFinderError

    if use_litframe_finder not in ['auto', 'offline', 'online']:
        raise ValueError("Unexpected value in 'use_litframe_finder'.")

    inst = karabo_id_control[:3]
    litfrm = make_litframe_finder(inst, dc, litframe_device_id)
    try:
        get_data = {'auto': litfrm.read_or_process, 'offline': litfrm.process, 'online': litfrm.read}
        r = get_data[use_litframe_finder]()
        cell_sel = LitFrameSelection(r, train_ids, max_pulses, energy_threshold, use_super_selection)
        cell_sel.print_report()
    except LitFrameFinderError as err:
        warning(f"Cannot use AgipdLitFrameFinder due to:\n{err}")
        cell_sel = CellRange(max_pulses, max_cells=mem_cells)
 else:
    # Use range selection
    cell_sel = CellRange(max_pulses, max_cells=mem_cells)

 print(cell_sel.msg())
 ```

 %% Cell type:code id: tags:

 ``` python
 if round_photons and photon_energy <= 0.0:
    if use_xgm_device:
        # Try to obtain photon energy from XGM device.
        wavelength_data = dc[use_xgm_device, 'pulseEnergy.wavelengthUsed']

        try:
            from scipy.constants import h, c, e

            # Read wavelength as a single value and convert to hv.
            photon_energy = (h * c / e) / (wavelength_data.as_single_value(rtol=1e-2) * 1e-6)
            print(f'Obtained photon energy {photon_energy:.3f} keV from {use_xgm_device}')
        except ValueError:
            warning('XGM source available but photon energy varies greater than 1%, '
                 'photon rounding disabled!')
            round_photons = False
    else:
        warning('Neither explicit photon energy nor XGM device configured, photon rounding disabled!')
        round_photons = False
 elif round_photons:
    print(f'Photon energy for rounding: {photon_energy:.3f} keV')
 ```

 %% Cell type:markdown id: tags:

 ## Data processing ##

 %% Cell type:code id: tags:

 ``` python
 agipd_corr = AgipdCorrections(
    mem_cells,
    cell_sel,
    h5_data_path=instrument_src,
    h5_index_path=index_src,
    corr_bools=corr_bools,
    gain_mode=gain_mode,
    comp_threads=os.cpu_count() // n_cores_files,
    train_ids=train_ids
 )

 agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
 agipd_corr.hg_hard_threshold = hg_hard_threshold
 agipd_corr.mg_hard_threshold = mg_hard_threshold

 agipd_corr.cm_dark_min = cm_dark_range[0]
 agipd_corr.cm_dark_max = cm_dark_range[1]
 agipd_corr.cm_dark_fraction = cm_dark_fraction
 agipd_corr.cm_n_itr = cm_n_itr
 agipd_corr.noisy_adc_threshold = noisy_adc_threshold
 agipd_corr.ff_gain = ff_gain
 agipd_corr.photon_energy = photon_energy

 agipd_corr.compress_fields = compress_fields
 if recast_image_data:
    agipd_corr.recast_image_fields['data'] = np.dtype(recast_image_data)
 ```

 %% Cell type:code id: tags:

 ``` python
 module_index_to_karabo_da = {mod: da for (mod, da) in zip(modules, karabo_da)}
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve calibration constants to RAM
 agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))

 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # NOTE: this notebook will not overwrite calibration metadata file
 const_yaml = metadata.get("retrieved-constants", {})

 def retrieve_constants(mod):
    """
    Retrieve calibration constants and load them to shared memory

    Metadata for constants is taken from yml file or retrieved from the DB
    """
    k_da = module_index_to_karabo_da[mod]
    # check if there is a yaml file in out_folder that has the device constants.
    if k_da in const_yaml:
        when = agipd_corr.initialize_from_yaml(k_da, const_yaml, mod)
        print(f"Found constants for {k_da} in calibration_metadata.yml")
    else:
        try:
            # TODO: replace with proper retrieval (as done in pre-correction)
            when = agipd_corr.initialize_from_db(
                karabo_id=karabo_id,
                karabo_da=k_da,
                cal_db_interface=cal_db_interface,
                creation_time=creation_time,
                memory_cells=mem_cells_db,
                bias_voltage=bias_voltage,
                photon_energy=9.2,
                gain_setting=gain_setting,
                acquisition_rate=acq_rate,
                integration_time=integration_time,
                module_idx=mod,
                only_dark=False,
            )
            print(f"Queried CalCat for {k_da}")
        except Exception as e:
            warning(f"Module: {k_da}, {e}")
            when = None
    return mod, when, k_da


 print(f'Preparing constants (FF: {agipd_corr.corr_bools.get("xray_corr", False)}, PC: {any(agipd_corr.pc_bools)}, '
      f'BLC: {any(agipd_corr.blc_bools)})')
 ts = perf_counter()
 with multiprocessing.Pool(processes=len(modules)) as pool:
    const_out = pool.map(retrieve_constants, modules)
 print(f"Constants were loaded in {perf_counter()-ts:.01f}s")
 ```

 %% Cell type:code id: tags:

 ``` python
 # allocate memory for images and hists
 n_images_max = mem_cells * 256
 data_shape = (n_images_max, 512, 128)
 agipd_corr.allocate_images(data_shape, n_cores_files)
 ```

 %% Cell type:code id: tags:

 ``` python
 def batches(l, batch_size):
    """Group a list into batches of (up to) batch_size elements"""
    start = 0
    while start < len(l):
        yield l[start:start + batch_size]
        start += batch_size
 ```

 %% Cell type:code id: tags:

 ``` python
 def imagewise_chunks(img_counts):
    """Break up the loaded data into chunks of up to chunk_size

    Yields (file data slot, start index, stop index)
    """


    for i_proc, n_img in enumerate(img_counts):
        n_chunks = math.ceil(n_img / chunk_size)
        for i in range(n_chunks):
            yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 if max_tasks_per_worker == -1:
    max_tasks_per_worker = None
 with multiprocessing.Pool(maxtasksperchild=max_tasks_per_worker) as pool:
    step_timer.done_step('Started pool')

    for file_batch in batches(file_list, n_cores_files):
        # TODO: Move some printed output to logging or similar
        print(f"Processing next {len(file_batch)} files")
        step_timer.start()
        img_counts = pool.starmap(
            agipd_corr.read_file,
            zip(range(len(file_batch)), file_batch, [not common_mode]*len(file_batch))
        )
        step_timer.done_step(f'Loading data from files')

        if img_counts == 0:
            # Skip any further processing and output if there are no images to
            # correct in this file.
            continue

        if mask_zero_std:
            # Evaluate zero-data-std mask
            pool.starmap(
                agipd_corr.mask_zero_std, itertools.product(
                    range(len(file_batch)),
                    np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
                )
            )
            step_timer.done_step('Mask 0 std')

        # Perform offset image-wise correction
        pool.starmap(agipd_corr.offset_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Offset correction")

        if blc_noise or blc_stripes or blc_hmatch:
            # Perform image-wise correction
            pool.starmap(agipd_corr.baseline_correction, imagewise_chunks(img_counts))
            step_timer.done_step("Base-line shift correction")

        if common_mode:
            # In common mode corrected is enabled.
            # Cell selection is only activated after common mode correction.
            # Perform cross-file correction parallel over asics
            image_files_idx = [i_proc for i_proc, n_img in enumerate(img_counts) if n_img > 0]
            pool.starmap(agipd_corr.cm_correction, itertools.product(
                image_files_idx, range(16)  # 16 ASICs per module
            ))
            step_timer.done_step("Common-mode correction")

            img_counts = pool.map(agipd_corr.apply_selected_pulses, image_files_idx)
            step_timer.done_step("Applying selected cells after common mode correction")

        # Perform image-wise correction"
        pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Gain corrections")

        # Save corrected data
        pool.starmap(agipd_corr.write_file, [
            (i_proc, file_name, str(out_folder / Path(file_name).name.replace("RAW", "CORR")))
            for i_proc, file_name in enumerate(file_batch)
        ])
        step_timer.done_step("Save")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Correction of {len(file_list)} files is finished")
 print(f"Total processing time {step_timer.timespan():.01f} s")
 print(f"Timing summary per batch of {n_cores_files} files:")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 # if the yml file contains "retrieved-constants", that means a leading
 # notebook got processed and the reporting would be generated from it.
 fst_print = True
 timestamps = {}

 for i, (modno, when, k_da) in enumerate(const_out):
    qm = module_index_to_qm(modno)

    if k_da not in const_yaml:
        if fst_print:
            print("Constants are retrieved with creation time: ")
            fst_print = False

        module_timestamps = {}

        print(f"{qm}:")
        for key, item in when.items():
            if hasattr(item, 'strftime'):
                item = item.strftime('%y-%m-%d %H:%M')
            when[key] = item
            print('{:.<12s}'.format(key), item)

        # Store few time stamps if exists
        # Add NA to keep array structure
        for key in ['Offset', 'SlopesPC', 'SlopesFF']:
            if when and key in when and when[key]:
                module_timestamps[key] = when[key]
            else:
                module_timestamps[key] = "NA"
        timestamps[qm] = module_timestamps

 seq = sequences[0] if sequences else 0

 if timestamps:
    with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fd:
        yaml.safe_dump({"time-summary": {f"S{seq}": timestamps}}, fd)
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print('Skipping plots')
    import sys
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.gca(projection='3d')

    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)
    Z = data.T

    # Plot the surface.
    ax.plot_surface(X, Y, Z, cmap=colormap.coolwarm, linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")


 def do_2d_plot(data, edges, y_axis, x_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),
              np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
                   norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 def get_trains_data(data_folder, source, include, detector_id, tid=None, modules=16, fillvalue=None):
    """Load single train for all module

    :param data_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param detector_id: The karabo id of the detector to get data for
    :param tid: Train Id to be loaded. First train is considered if None is given
    :param path: Path to find image data inside h5 file
    """
    run_data = RunDirectory(data_folder, include)
    if tid is not None:
        tid, data = run_data.select(
            f'{detector_id}/DET/*', source).train_from_id(tid, keep_dims=True)
    else:
        # A first full trainId for all available modules is of interest.
        tid, data = next(run_data.select(
            f'{detector_id}/DET/*', source).trains(require_all=True, keep_dims=True))

    stacked_data = stack_detector_data(
        train=data, data=source, fillvalue=fillvalue, modules=modules)

    return tid, stacked_data
 ```

 %% Cell type:code id: tags:

 ``` python
 if dinstance == "AGIPD500K":
    geom = AGIPD_500K2GGeometry.from_origin()
 else:
    geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
        (-525, 625),
        (-550, -10),
        (520, -160),
        (542.5, 475),
    ])
 ```

 %% Cell type:code id: tags:

 ``` python
 include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
 tid, corrected = get_trains_data(out_folder, 'image.data', include, karabo_id, modules=nmods)

 _, gains = get_trains_data(out_folder, 'image.gain', include, karabo_id, tid, modules=nmods)
 _, mask = get_trains_data(out_folder, 'image.mask', include, karabo_id, tid, modules=nmods)
 _, blshift = get_trains_data(out_folder, 'image.blShift', include, karabo_id, tid, modules=nmods)
 _, cellId = get_trains_data(out_folder, 'image.cellId', include, karabo_id, tid, modules=nmods)
 _, pulseId = get_trains_data(out_folder, 'image.pulseId', include, karabo_id, tid, modules=nmods, fillvalue=0)
 _, raw = get_trains_data(run_folder, 'image.data', include, karabo_id, tid, modules=nmods)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Analogue Gain ###

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(raw[:,0,...].flatten().astype(np.float32),
                                         raw[:,1,...].flatten().astype(np.float32),
                                         bins=(100, 100),
                                         range=[[4000, 8192], [4000, 8192]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Digitized Gain ###

 The following plot shows plots signal vs. digitized gain

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(corrected.flatten().astype(np.float32),
                                         gains.flatten().astype(np.float32), bins=(100, 3),
                                         range=[[-50, 8192], [0, 3]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Gain statistics in %")
 table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
          f'{gains[gains==1].size/gains.size*100:.03f}',
          f'{gains[gains==2].size/gains.size*100:.03f}']]
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["High", "Medium", "Low"])))
 ```

 %% Cell type:markdown id: tags:

 ### Intensity per Pulse ###

 %% Cell type:code id: tags:

 ``` python
 pulse_range = [np.min(pulseId[pulseId>=0]), np.max(pulseId[pulseId>=0])]

 # Modify pulse_range, if only one pulse is selected.
 if pulse_range[0] == pulse_range[1]:
    pulse_range = [0, pulse_range[1]+int(acq_rate)]

 mean_data = np.nanmean(corrected, axis=(2, 3))
 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100, int(pulse_range[1])),
                                      range=[[-50, 1000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")

 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100,  int(pulse_range[1])),
                                      range=[[-50, 200000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 ```

 %% Cell type:markdown id: tags:

 ### Baseline shift ###

 Estimated base-line shift with respect to the total ADU counts of corrected image.

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(blshift.flatten(), bins=100, log=True)
 _ = plt.xlabel('Baseline shift [ADU]')
 _ = plt.ylabel('Counts')
 _ = ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 10))
 corrected_ave = np.nansum(corrected, axis=(2, 3))
 plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)
 plt.xlim(-1, 1000)
 plt.grid()
 plt.xlabel('Illuminated corrected [MADU] ')
 _ = plt.ylabel('Estimated baseline shift [ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
 if cell_id_preview not in cellId[:, 0]:
    print(f"WARNING: The selected cell_id_preview value {cell_id_preview} is not available in the corrected data.")
    cell_id_preview = cellId[:, 0][0]
    cell_idx_preview = 0
    print(f"Previewing the first available cellId: {cell_id_preview}.")
 else:
    cell_idx_preview = np.where(cellId[:, 0] == cell_id_preview)[0][0]
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Raw preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown(f'Mean over images of the RAW data\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(raw[slice(*cell_sel.crange), 0, ...], axis=0)
    vmin, vmax = get_range(data, 5)
    ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 else:
    print("Skipping mean RAW preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'Single shot of the RAW data from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(raw[cell_idx_preview, 0, ...], 5)
 ax = geom.plot_data_fast(raw[cell_idx_preview, 0, ...], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Corrected preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown('### Mean CORRECTED Preview ###\n'))
    display(Markdown(f'A mean across train: {tid}\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(corrected, axis=0)
    vmin, vmax = get_range(data, 7)
    ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=-50, vmax=vmax)
 else:
    print("Skipping mean CORRECTED preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'A single shot of the CORRECTED image from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_idx_preview], 7, -50)
 vmin = - 50
 ax = geom.plot_data_fast(corrected[cell_idx_preview], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_idx_preview], 5, -50)
 nbins = np.int((vmax + 50) / 2)
 h = ax.hist(corrected[cell_idx_preview].flatten(),
            bins=nbins, range=(-50, vmax),
            histtype='stepfilled', log=True)
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected, 10, -100)
 vmax = np.nanmax(corrected)
 if vmax > 50000:
    vmax=50000
 nbins = np.int((vmax + 100) / 5)
 h = ax.hist(corrected.flatten(), bins=nbins,
            range=(-100, vmax), histtype='step', log=True, label = 'All')
 ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='High gain', color='green')
 ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Medium gain', color='red')
 ax.hist(corrected[gains == 2].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Low gain', color='yellow')
 ax.legend()
 ax.grid()
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Maximum GAIN Preview ###\n'))
 display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.max(gains, axis=0), ax=ax,
                         cmap="jet", vmin=-1, vmax=3)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'### Single Shot Bad Pixels ### \n'))
 display(Markdown(f'A single shot bad pixel map from cell {cell_id_preview} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(np.log2(mask[cell_idx_preview]), ax=ax, vmin=0, vmax=32, cmap="jet")
 ```

 %% Cell type:code id: tags:

 ``` python
 if round_photons:
    display(Markdown('### Photonization histograms ###'))

    x_preround = (agipd_corr.hist_bins_preround[1:] + agipd_corr.hist_bins_preround[:-1]) / 2
    x_postround = (agipd_corr.hist_bins_postround[1:] + agipd_corr.hist_bins_postround[:-1]) / 2
    x_photons = np.arange(0, (x_postround[-1] + 1) / photon_energy)

    fig, ax = plt.subplots(ncols=1, nrows=1, clear=True)
    ax.plot(x_preround, agipd_corr.shared_hist_preround, '.-', color='C0')
    ax.bar(x_postround, agipd_corr.shared_hist_postround, photon_energy, color='C1', alpha=0.5)
    ax.set_yscale('log')
    ax.set_ylim(0, max(agipd_corr.shared_hist_preround.max(), agipd_corr.shared_hist_postround.max())*3)
    ax.set_xlim(x_postround[0], x_postround[-1]+1)
    ax.set_xlabel('Photon energy / keV')
    ax.set_ylabel('Intensity')
    ax.vlines(x_photons * photon_energy, *ax.get_ylim(), color='k', linestyle='dashed')

    phx = ax.twiny()
    phx.set_xlim(x_postround[0] / photon_energy, (x_postround[-1]+1)/photon_energy)
    phx.set_xticks(x_photons)
    phx.set_xlabel('# Photons')
    pass
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train  ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(np.mean(mask>0, axis=0), vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train. Only Dark Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 cm = np.copy(mask)
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 ax = geom.plot_data_fast(np.mean(cm>0, axis=0),
                         vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 # AGIPD Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/MID/202201/p002834/raw" # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/esobolev/pycal_litfrm/p002834/r0225"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
+overwrite = False  # IGNORED, NEEDED FOR COMPATIBILITY.
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 train_ids = [-1] # train IDs to correct, set to -1 for all, range allowed
 run = 225 # runs to process, required

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_template = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 index_source_template = 'INDEX/{}/DET/{}:xtdf/'  # path in the HDF5 file to images
 ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information
 karabo_id_control = "MID_EXP_AGIPD1M1" # karabo-id for control device

 slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants, loaded in precorrection notebook

 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milliseconds
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants

 mem_cells = -1  # Number of memory cells used, set to 0 to automatically infer
 bias_voltage = -1  # bias voltage, set to 0 to use stored value in slow data.
 acq_rate = -1. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)
 max_pulses = [0, 352, 1] # range list [st, end, step] of memory cell indices to be processed within a train. 3 allowed maximum list input elements.
 mem_cells_db = -1  # set to a value different than 0 to use this value for DB queries
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction parameters
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
 cm_dark_fraction = 0.66 # threshold for fraction of  empty pixels to consider module enough dark to perform CM correction
 cm_dark_range = [-50.,30] # range for signal value ADU for pixel to be consider as a dark pixel
 cm_n_itr = 4 # number of iterations for common mode correction
 hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
 mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
 noisy_adc_threshold = 0.25 # threshold to mask complete adc
 ff_gain = 7.2 # conversion gain for absolute FlatField constants, while applying xray_gain
 photon_energy = -1.0 # photon energy in keV, non-positive value for XGM autodetection

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 zero_nans = False # set NaN values in corrected data to 0
 zero_orange = False # set to 0 very negative and very large values in corrected data
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
 corr_asic_diag = False # if set, diagonal drop offs on ASICs are corrected
 force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold
 force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold
 mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
 common_mode = False # Common mode correction
 melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
 mask_zero_std = False # Mask pixels with zero standard deviation across train
 low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain
 round_photons = False  # Round to absolute number of photons, only use with gain corrections

 # Optional auxiliary devices
 use_ppu_device = ''  # Device ID for a pulse picker device to only process picked trains, empty string to disable
 ppu_train_offset = 0  # When using the pulse picker, offset between the PPU's sequence start and actually picked train

 use_litframe_finder = 'off' # Process only illuminated frames: 'off' - disable, 'device' - use online device data, 'offline' - use offline algorithm, 'auto' - choose online/offline source automatically (default)
 litframe_device_id = '' # Device ID for a lit frame finder device, empty string to auto detection
 energy_threshold = -1000 # The low limit for the energy (uJ) exposed by frames subject to processing. If -1000, selection by pulse energy is disabled
 use_super_selection = 'cm' # Make a common selection for entire run: 'off' - disable, 'final' - enable for final selection, 'cm' - enable only for common mode correction

 use_xgm_device = ''  # DoocsXGM device ID to obtain actual photon energy, operating condition else.

 # Output parameters
 recast_image_data = ''  # Cast data to a different dtype before saving
 compress_fields = ['gain', 'mask']  # Datasets in image group to compress.

 # Plotting parameters
 skip_plots = False # exit after writing corrected files and metadata
 cell_id_preview = 1 # cell Id used for preview in single-shot plots

 # Parallelization parameters
 chunk_size = 1000  # Size of chunk for image-wise correction
 n_cores_correct = 16 # Number of chunks to be processed in parallel
 n_cores_files = 4 # Number of files to be processed in parallel
 sequences_per_node = 2 # number of sequence files per cluster node if run as SLURM job, set to 0 to not run SLURM parallel
 max_nodes = 8 # Maximum number of SLURM jobs to split correction work into
 max_tasks_per_worker = 1  # the number of tasks a correction pool worker process can complete before it will exit and be replaced with a fresh worker process. Leave as -1 to keep worker alive as long as pool.

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import os
 import math
 import multiprocessing
 import re
 import warnings
 from datetime import timedelta
 from logging import warning
 from pathlib import Path
 from time import perf_counter

 import tabulate
 from dateutil import parser
 from IPython.display import Latex, Markdown, display

 warnings.filterwarnings('ignore')
 import matplotlib
 import matplotlib.pyplot as plt
 import yaml
 from extra_data import RunDirectory, stack_detector_data
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from matplotlib import cm as colormap
 from matplotlib.colors import LogNorm

 matplotlib.use("agg")
 %matplotlib inline
 import numpy as np
 import seaborn as sns

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")

 from cal_tools import agipdalgs as calgs
 from cal_tools.agipdlib import (
    AgipdCorrections,
    AgipdCtrl,
    CellRange,
    LitFrameSelection,
 )
 from cal_tools.ana_tools import get_range
 from cal_tools.enums import AgipdGainMode, BadPixels
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    CalibrationMetadata,
    calcat_creation_time,
    map_modules_from_folder,
    module_index_to_qm,
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_folder = in_folder / f'r{run:04d}'
 ```

 %% Cell type:markdown id: tags:

 ## Evaluated parameters ##

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the hierarchy and dependability for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["zero_nans"] = zero_nans
    corr_bools["zero_orange"] = zero_orange
    corr_bools["mask_noisy_adc"] = mask_noisy_adc
    corr_bools["force_hg_if_below"] = force_hg_if_below
    corr_bools["force_mg_if_below"] = force_mg_if_below
    corr_bools["common_mode"] = common_mode
    corr_bools["melt_snow"] = melt_snow
    corr_bools["mask_zero_std"] = mask_zero_std
    corr_bools["low_medium_gap"] = low_medium_gap
    corr_bools["round_photons"] = round_photons

 # Many corrections don't apply to fixed gain mode; will explicitly disable later if detected
 disable_for_fixed_gain = [
    "adjust_mg_baseline",
    "blc_set_min",
    "force_hg_if_below",
    "force_mg_if_below",
    "low_medium_gap",
    "melt_snow",
    "rel_gain"
 ]
 ```

 %% Cell type:code id: tags:

 ``` python
 if sequences == [-1]:
    sequences = None

 dc = RunDirectory(run_folder)

 ctrl_src = ctrl_source_template.format(karabo_id_control)
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 index_src = index_source_template.format(karabo_id, receiver_template)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create output folder
 out_folder.mkdir(parents=True, exist_ok=True)

 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 # Evaluate requested modules
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 print("Process modules:", ', '.join(module_index_to_qm(x) for x in modules))
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_ppu_device:
    # Obtain trains to process if using a pulse picker device.

    # Will throw an uncaught exception if the device is wrong.
    seq_start = dc[use_ppu_device, 'trainTrigger.sequenceStart.value'].ndarray()

    # The trains picked are the unique values of trainTrigger.sequenceStart
    # minus the first (previous trigger before this run).
    train_ids = np.unique(seq_start)[1:] + ppu_train_offset

    print(f'PPU device {use_ppu_device} triggered for {len(train_ids)} train(s)')

 elif train_ids != [-1]:
    # Specific trains passed by parameter, convert to ndarray.
    train_ids = np.array(train_ids)

    print(f'Processing up to {len(train_ids)} manually selected train(s)')
 else:
    # Process all trains.
    train_ids = None

    print(f'Processing all valid trains')
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mapped_files, _, total_sequences, _, _ =  map_modules_from_folder(
    str(in_folder), run, path_template, karabo_da, sequences
 )
 file_list = []

 # ToDo: Split table over pages
 print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
 table = []
 ti = 0
 for k, files in mapped_files.items():
    i = 0
    for f in list(files.queue):
        file_list.append(f)
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["#", "module", "# module", "file"])))
 file_list = sorted(file_list, key=lambda name: name[-10:])
 ```

 %% Cell type:code id: tags:

 ``` python
 first_mod_channel = sorted(modules)[0]

 instrument_src_mod = [
    s for s in list(dc.all_sources) if f"{first_mod_channel}CH" in s][0]
 mod_channel = int(re.findall(rf".*{first_mod_channel}CH([0-9]+):.*", instrument_src_mod)[0])

 agipd_cond = AgipdCtrl(
    run_dc=dc,
    image_src=instrument_src_mod,
    ctrl_src=ctrl_src,
    raise_error=False,  # to be able to process very old data without gain_setting value
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 offset = parser.parse(creation_date_offset)
 delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
 creation_time += delta
 print(f"Creation time: {creation_time}")

 if acq_rate == -1.:
    acq_rate = agipd_cond.get_acq_rate()
 if mem_cells == -1:
    mem_cells = agipd_cond.get_num_cells()
 # TODO: look for alternative for passing creation_time
 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == -1:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()
 if gain_mode == -1:
    gain_mode = agipd_cond.get_gain_mode()
 else:
    gain_mode = AgipdGainMode(gain_mode)
 ```

 %% Cell type:code id: tags:

 ``` python
 if mem_cells is None:
    raise ValueError(f"No raw images found for {instrument_src_mod}")

 mem_cells_db = mem_cells if mem_cells_db == -1 else mem_cells_db

 print(f"Maximum memory cells to calibrate: {mem_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Using {creation_time} as creation time")
 print("Operating conditions are:")
 print(f"• Bias voltage: {bias_voltage}")
 print(f"• Memory cells: {mem_cells_db}")
 print(f"• Acquisition rate: {acq_rate}")
 print(f"• Gain setting: {gain_setting}")
 print(f"• Gain mode: {gain_mode.name}")
 print(f"• Integration time: {integration_time}")
 print(f"• Photon Energy: 9.2")
 ```

 %% Cell type:code id: tags:

 ``` python
 if gain_mode:
    for to_disable in disable_for_fixed_gain:
        if corr_bools.get(to_disable, False):
            warning(f"{to_disable} correction was requested, but does not apply to fixed gain mode")
            corr_bools[to_disable] = False
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_litframe_finder != 'off':
    from extra_redu import make_litframe_finder, LitFrameFinderError

    if use_litframe_finder not in ['auto', 'offline', 'online']:
        raise ValueError("Unexpected value in 'use_litframe_finder'.")

    inst = karabo_id_control[:3]
    litfrm = make_litframe_finder(inst, dc, litframe_device_id)
    try:
        get_data = {'auto': litfrm.read_or_process, 'offline': litfrm.process, 'online': litfrm.read}
        r = get_data[use_litframe_finder]()
        cell_sel = LitFrameSelection(r, train_ids, max_pulses, energy_threshold, use_super_selection)
        cell_sel.print_report()
    except LitFrameFinderError as err:
        warning(f"Cannot use AgipdLitFrameFinder due to:\n{err}")
        cell_sel = CellRange(max_pulses, max_cells=mem_cells)
 else:
    # Use range selection
    cell_sel = CellRange(max_pulses, max_cells=mem_cells)

 print(cell_sel.msg())
 ```

 %% Cell type:code id: tags:

 ``` python
 if round_photons and photon_energy <= 0.0:
    if use_xgm_device:
        # Try to obtain photon energy from XGM device.
        wavelength_data = dc[use_xgm_device, 'pulseEnergy.wavelengthUsed']

        try:
            from scipy.constants import h, c, e

            # Read wavelength as a single value and convert to hv.
            photon_energy = (h * c / e) / (wavelength_data.as_single_value(rtol=1e-2) * 1e-6)
            print(f'Obtained photon energy {photon_energy:.3f} keV from {use_xgm_device}')
        except ValueError:
            warning('XGM source available but photon energy varies greater than 1%, '
                 'photon rounding disabled!')
            round_photons = False
    else:
        warning('Neither explicit photon energy nor XGM device configured, photon rounding disabled!')
        round_photons = False
 elif round_photons:
    print(f'Photon energy for rounding: {photon_energy:.3f} keV')
 ```

 %% Cell type:markdown id: tags:

 ## Data processing ##

 %% Cell type:code id: tags:

 ``` python
 agipd_corr = AgipdCorrections(
    mem_cells,
    cell_sel,
    h5_data_path=instrument_src,
    h5_index_path=index_src,
    corr_bools=corr_bools,
    gain_mode=gain_mode,
    comp_threads=os.cpu_count() // n_cores_files,
    train_ids=train_ids
 )

 agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
 agipd_corr.hg_hard_threshold = hg_hard_threshold
 agipd_corr.mg_hard_threshold = mg_hard_threshold

 agipd_corr.cm_dark_min = cm_dark_range[0]
 agipd_corr.cm_dark_max = cm_dark_range[1]
 agipd_corr.cm_dark_fraction = cm_dark_fraction
 agipd_corr.cm_n_itr = cm_n_itr
 agipd_corr.noisy_adc_threshold = noisy_adc_threshold
 agipd_corr.ff_gain = ff_gain
 agipd_corr.photon_energy = photon_energy

 agipd_corr.compress_fields = compress_fields
 if recast_image_data:
    agipd_corr.recast_image_fields['data'] = np.dtype(recast_image_data)
 ```

 %% Cell type:code id: tags:

 ``` python
 module_index_to_karabo_da = {mod: da for (mod, da) in zip(modules, karabo_da)}
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve calibration constants to RAM
 agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))

 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # NOTE: this notebook will not overwrite calibration metadata file
 const_yaml = metadata.get("retrieved-constants", {})

 def retrieve_constants(mod):
    """
    Retrieve calibration constants and load them to shared memory

    Metadata for constants is taken from yml file or retrieved from the DB
    """
    k_da = module_index_to_karabo_da[mod]
    # check if there is a yaml file in out_folder that has the device constants.
    if k_da in const_yaml:
        when = agipd_corr.initialize_from_yaml(k_da, const_yaml, mod)
        print(f"Found constants for {k_da} in calibration_metadata.yml")
    else:
        try:
            # TODO: replace with proper retrieval (as done in pre-correction)
            when = agipd_corr.initialize_from_db(
                karabo_id=karabo_id,
                karabo_da=k_da,
                cal_db_interface=cal_db_interface,
                creation_time=creation_time,
                memory_cells=mem_cells_db,
                bias_voltage=bias_voltage,
                photon_energy=9.2,
                gain_setting=gain_setting,
                acquisition_rate=acq_rate,
                integration_time=integration_time,
                module_idx=mod,
                only_dark=False,
            )
            print(f"Queried CalCat for {k_da}")
        except Exception as e:
            warning(f"Module: {k_da}, {e}")
            when = None
    return mod, when, k_da


 print(f'Preparing constants (FF: {agipd_corr.corr_bools.get("xray_corr", False)}, PC: {any(agipd_corr.pc_bools)}, '
      f'BLC: {any(agipd_corr.blc_bools)})')
 ts = perf_counter()
 with multiprocessing.Pool(processes=len(modules)) as pool:
    const_out = pool.map(retrieve_constants, modules)
 print(f"Constants were loaded in {perf_counter()-ts:.01f}s")
 ```

 %% Cell type:code id: tags:

 ``` python
 # allocate memory for images and hists
 n_images_max = mem_cells * 256
 data_shape = (n_images_max, 512, 128)
 agipd_corr.allocate_images(data_shape, n_cores_files)
 ```

 %% Cell type:code id: tags:

 ``` python
 def batches(l, batch_size):
    """Group a list into batches of (up to) batch_size elements"""
    start = 0
    while start < len(l):
        yield l[start:start + batch_size]
        start += batch_size
 ```

 %% Cell type:code id: tags:

 ``` python
 def imagewise_chunks(img_counts):
    """Break up the loaded data into chunks of up to chunk_size

    Yields (file data slot, start index, stop index)
    """


    for i_proc, n_img in enumerate(img_counts):
        n_chunks = math.ceil(n_img / chunk_size)
        for i in range(n_chunks):
            yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 if max_tasks_per_worker == -1:
    max_tasks_per_worker = None
 with multiprocessing.Pool(maxtasksperchild=max_tasks_per_worker) as pool:
    step_timer.done_step('Started pool')

    for file_batch in batches(file_list, n_cores_files):
        # TODO: Move some printed output to logging or similar
        print(f"Processing next {len(file_batch)} files")
        step_timer.start()
        img_counts = pool.starmap(
            agipd_corr.read_file,
            zip(range(len(file_batch)), file_batch, [not common_mode]*len(file_batch))
        )
        step_timer.done_step(f'Loading data from files')

        if img_counts == 0:
            # Skip any further processing and output if there are no images to
            # correct in this file.
            continue

        if mask_zero_std:
            # Evaluate zero-data-std mask
            pool.starmap(
                agipd_corr.mask_zero_std, itertools.product(
                    range(len(file_batch)),
                    np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
                )
            )
            step_timer.done_step('Mask 0 std')

        # Perform offset image-wise correction
        pool.starmap(agipd_corr.offset_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Offset correction")

        if blc_noise or blc_stripes or blc_hmatch:
            # Perform image-wise correction
            pool.starmap(agipd_corr.baseline_correction, imagewise_chunks(img_counts))
            step_timer.done_step("Base-line shift correction")

        if common_mode:
            # In common mode corrected is enabled.
            # Cell selection is only activated after common mode correction.
            # Perform cross-file correction parallel over asics
            image_files_idx = [i_proc for i_proc, n_img in enumerate(img_counts) if n_img > 0]
            pool.starmap(agipd_corr.cm_correction, itertools.product(
                image_files_idx, range(16)  # 16 ASICs per module
            ))
            step_timer.done_step("Common-mode correction")

            img_counts = pool.map(agipd_corr.apply_selected_pulses, image_files_idx)
            step_timer.done_step("Applying selected cells after common mode correction")

        # Perform image-wise correction"
        pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Gain corrections")

        # Save corrected data
        pool.starmap(agipd_corr.write_file, [
            (i_proc, file_name, str(out_folder / Path(file_name).name.replace("RAW", "CORR")))
            for i_proc, file_name in enumerate(file_batch)
        ])
        step_timer.done_step("Save")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Correction of {len(file_list)} files is finished")
 print(f"Total processing time {step_timer.timespan():.01f} s")
 print(f"Timing summary per batch of {n_cores_files} files:")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 # if the yml file contains "retrieved-constants", that means a leading
 # notebook got processed and the reporting would be generated from it.
 fst_print = True
 timestamps = {}

 for i, (modno, when, k_da) in enumerate(const_out):
    qm = module_index_to_qm(modno)

    if k_da not in const_yaml:
        if fst_print:
            print("Constants are retrieved with creation time: ")
            fst_print = False

        module_timestamps = {}

        print(f"{qm}:")
        for key, item in when.items():
            if hasattr(item, 'strftime'):
                item = item.strftime('%y-%m-%d %H:%M')
            when[key] = item
            print('{:.<12s}'.format(key), item)

        # Store few time stamps if exists
        # Add NA to keep array structure
        for key in ['Offset', 'SlopesPC', 'SlopesFF']:
            if when and key in when and when[key]:
                module_timestamps[key] = when[key]
            else:
                module_timestamps[key] = "NA"
        timestamps[qm] = module_timestamps

 seq = sequences[0] if sequences else 0

 if timestamps:
    with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fd:
        yaml.safe_dump({"time-summary": {f"S{seq}": timestamps}}, fd)
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print('Skipping plots')
    import sys
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.gca(projection='3d')

    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)
    Z = data.T

    # Plot the surface.
    ax.plot_surface(X, Y, Z, cmap=colormap.coolwarm, linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")


 def do_2d_plot(data, edges, y_axis, x_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),
              np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
                   norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 def get_trains_data(data_folder, source, include, detector_id, tid=None, modules=16, fillvalue=None):
    """Load single train for all module

    :param data_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param detector_id: The karabo id of the detector to get data for
    :param tid: Train Id to be loaded. First train is considered if None is given
    :param path: Path to find image data inside h5 file
    """
    run_data = RunDirectory(data_folder, include)
    if tid is not None:
        tid, data = run_data.select(
            f'{detector_id}/DET/*', source).train_from_id(tid, keep_dims=True)
    else:
        # A first full trainId for all available modules is of interest.
        tid, data = next(run_data.select(
            f'{detector_id}/DET/*', source).trains(require_all=True, keep_dims=True))

    stacked_data = stack_detector_data(
        train=data, data=source, fillvalue=fillvalue, modules=modules)

    return tid, stacked_data
 ```

 %% Cell type:code id: tags:

 ``` python
 if dinstance == "AGIPD500K":
    geom = AGIPD_500K2GGeometry.from_origin()
 else:
    geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
        (-525, 625),
        (-550, -10),
        (520, -160),
        (542.5, 475),
    ])
 ```

 %% Cell type:code id: tags:

 ``` python
 include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
 tid, corrected = get_trains_data(out_folder, 'image.data', include, karabo_id, modules=nmods)

 _, gains = get_trains_data(out_folder, 'image.gain', include, karabo_id, tid, modules=nmods)
 _, mask = get_trains_data(out_folder, 'image.mask', include, karabo_id, tid, modules=nmods)
 _, blshift = get_trains_data(out_folder, 'image.blShift', include, karabo_id, tid, modules=nmods)
 _, cellId = get_trains_data(out_folder, 'image.cellId', include, karabo_id, tid, modules=nmods)
 _, pulseId = get_trains_data(out_folder, 'image.pulseId', include, karabo_id, tid, modules=nmods, fillvalue=0)
 _, raw = get_trains_data(run_folder, 'image.data', include, karabo_id, tid, modules=nmods)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Analogue Gain ###

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(raw[:,0,...].flatten().astype(np.float32),
                                         raw[:,1,...].flatten().astype(np.float32),
                                         bins=(100, 100),
                                         range=[[4000, 8192], [4000, 8192]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Digitized Gain ###

 The following plot shows plots signal vs. digitized gain

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(corrected.flatten().astype(np.float32),
                                         gains.flatten().astype(np.float32), bins=(100, 3),
                                         range=[[-50, 8192], [0, 3]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Gain statistics in %")
 table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
          f'{gains[gains==1].size/gains.size*100:.03f}',
          f'{gains[gains==2].size/gains.size*100:.03f}']]
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["High", "Medium", "Low"])))
 ```

 %% Cell type:markdown id: tags:

 ### Intensity per Pulse ###

 %% Cell type:code id: tags:

 ``` python
 pulse_range = [np.min(pulseId[pulseId>=0]), np.max(pulseId[pulseId>=0])]

 # Modify pulse_range, if only one pulse is selected.
 if pulse_range[0] == pulse_range[1]:
    pulse_range = [0, pulse_range[1]+int(acq_rate)]

 mean_data = np.nanmean(corrected, axis=(2, 3))
 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100, int(pulse_range[1])),
                                      range=[[-50, 1000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")

 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100,  int(pulse_range[1])),
                                      range=[[-50, 200000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 ```

 %% Cell type:markdown id: tags:

 ### Baseline shift ###

 Estimated base-line shift with respect to the total ADU counts of corrected image.

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(blshift.flatten(), bins=100, log=True)
 _ = plt.xlabel('Baseline shift [ADU]')
 _ = plt.ylabel('Counts')
 _ = ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 10))
 corrected_ave = np.nansum(corrected, axis=(2, 3))
 plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)
 plt.xlim(-1, 1000)
 plt.grid()
 plt.xlabel('Illuminated corrected [MADU] ')
 _ = plt.ylabel('Estimated baseline shift [ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
 if cell_id_preview not in cellId[:, 0]:
    print(f"WARNING: The selected cell_id_preview value {cell_id_preview} is not available in the corrected data.")
    cell_id_preview = cellId[:, 0][0]
    cell_idx_preview = 0
    print(f"Previewing the first available cellId: {cell_id_preview}.")
 else:
    cell_idx_preview = np.where(cellId[:, 0] == cell_id_preview)[0][0]
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Raw preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown(f'Mean over images of the RAW data\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(raw[slice(*cell_sel.crange), 0, ...], axis=0)
    vmin, vmax = get_range(data, 5)
    ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 else:
    print("Skipping mean RAW preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'Single shot of the RAW data from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(raw[cell_idx_preview, 0, ...], 5)
 ax = geom.plot_data_fast(raw[cell_idx_preview, 0, ...], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Corrected preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown('### Mean CORRECTED Preview ###\n'))
    display(Markdown(f'A mean across train: {tid}\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(corrected, axis=0)
    vmin, vmax = get_range(data, 7)
    ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=-50, vmax=vmax)
 else:
    print("Skipping mean CORRECTED preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'A single shot of the CORRECTED image from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_idx_preview], 7, -50)
 vmin = - 50
 ax = geom.plot_data_fast(corrected[cell_idx_preview], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_idx_preview], 5, -50)
 nbins = np.int((vmax + 50) / 2)
 h = ax.hist(corrected[cell_idx_preview].flatten(),
            bins=nbins, range=(-50, vmax),
            histtype='stepfilled', log=True)
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected, 10, -100)
 vmax = np.nanmax(corrected)
 if vmax > 50000:
    vmax=50000
 nbins = np.int((vmax + 100) / 5)
 h = ax.hist(corrected.flatten(), bins=nbins,
            range=(-100, vmax), histtype='step', log=True, label = 'All')
 ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='High gain', color='green')
 ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Medium gain', color='red')
 ax.hist(corrected[gains == 2].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Low gain', color='yellow')
 ax.legend()
 ax.grid()
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Maximum GAIN Preview ###\n'))
 display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.max(gains, axis=0), ax=ax,
                         cmap="jet", vmin=-1, vmax=3)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'### Single Shot Bad Pixels ### \n'))
 display(Markdown(f'A single shot bad pixel map from cell {cell_id_preview} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(np.log2(mask[cell_idx_preview]), ax=ax, vmin=0, vmax=32, cmap="jet")
 ```

 %% Cell type:code id: tags:

 ``` python
 if round_photons:
    display(Markdown('### Photonization histograms ###'))

    x_preround = (agipd_corr.hist_bins_preround[1:] + agipd_corr.hist_bins_preround[:-1]) / 2
    x_postround = (agipd_corr.hist_bins_postround[1:] + agipd_corr.hist_bins_postround[:-1]) / 2
    x_photons = np.arange(0, (x_postround[-1] + 1) / photon_energy)

    fig, ax = plt.subplots(ncols=1, nrows=1, clear=True)
    ax.plot(x_preround, agipd_corr.shared_hist_preround, '.-', color='C0')
    ax.bar(x_postround, agipd_corr.shared_hist_postround, photon_energy, color='C1', alpha=0.5)
    ax.set_yscale('log')
    ax.set_ylim(0, max(agipd_corr.shared_hist_preround.max(), agipd_corr.shared_hist_postround.max())*3)
    ax.set_xlim(x_postround[0], x_postround[-1]+1)
    ax.set_xlabel('Photon energy / keV')
    ax.set_ylabel('Intensity')
    ax.vlines(x_photons * photon_energy, *ax.get_ylim(), color='k', linestyle='dashed')

    phx = ax.twiny()
    phx.set_xlim(x_postround[0] / photon_energy, (x_postround[-1]+1)/photon_energy)
    phx.set_xticks(x_photons)
    phx.set_xlabel('# Photons')
    pass
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train  ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(np.mean(mask>0, axis=0), vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train. Only Dark Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 cm = np.copy(mask)
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 ax = geom.plot_data_fast(np.mean(cm>0, axis=0),
                         vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

--- a/notebooks/AGIPD/AGIPD_Retrieve_Constants_Precorrection.ipynb
+++ b/notebooks/AGIPD/AGIPD_Retrieve_Constants_Precorrection.ipynb
 %% Cell type:markdown id: tags:

 # AGIPD Retrieving Constants Pre-correction #

 Author: European XFEL Detector Group, Version: 1.0

 Retrieving Required Constants for Offline Calibration of the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202030/p900119/raw" # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/AGIPD_"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 run = 80 # runs to process, required

 karabo_id = "SPB_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 ctrl_source_template = '{}/MDL/FPGA_COMP_TEST'  # path to control information
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 receiver_template = "{}CH0" # inset for receiver devices
 karabo_id_control = "SPB_IRU_AGIPD1M1" # karabo-id for control device

 # Parameters for calibration database.
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"

 slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants
 mem_cells = -1  # number of memory cells used, set to 0 to automatically infer
 bias_voltage = -1  # bias voltage, set to 0 to use stored value in slow data.
 acq_rate = -1.  # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = True # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 ```

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis
 # Here the hierarichy and dependencies for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_hmatch"] = blc_hmatch
 ```

 %% Cell type:code id: tags:

 ``` python
 import numpy as np
 from logging import warning
 from pathlib import Path
 from typing import Tuple

 import multiprocessing
 from datetime import timedelta
 from dateutil import parser
 from extra_data import RunDirectory

 from cal_tools.agipdlib import (
    AgipdCtrl,
    SnowResolution,
    assemble_constant_dict,
 )
 from cal_tools.enums import AgipdGainMode
 from cal_tools.tools import (
    calcat_creation_time,
    get_from_db,
    module_index_to_qm,
    CalibrationMetadata,
 )
 from iCalibrationDB import Conditions, Constants, Detectors
 ```

 %% Cell type:code id: tags:

 ``` python
 # slopes_ff_from_files left as str for now
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # Constant paths & timestamps are saved under retrieved-constants in calibration_metadata.yml
 retrieved_constants = metadata.setdefault("retrieved-constants", {})
 ```

 %% Cell type:code id: tags:

 ``` python
 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 offset = parser.parse(creation_date_offset)
 delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
 creation_time += delta
 print(f"Creation time: {creation_time}")

 print(f"Outputting to {out_folder}")
 out_folder.mkdir(parents=True, exist_ok=True)

 melt_snow = False if corr_bools["only_offset"] else SnowResolution.NONE
 ```

 %% Cell type:code id: tags:

 ``` python
 ctrl_src = ctrl_source_template.format(karabo_id_control)

 print(f"Detector in use is {karabo_id}")

 # Extracting Instrument string
 instrument = karabo_id.split("_")[0]
 # Evaluate detector instance for mapping
 if instrument == "SPB":
    nmods = 16
 elif instrument == "MID":
    nmods = 16
 elif instrument == "HED":
    nmods = 8

 print(f"Instrument {instrument}")

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
 ```

 %% Cell type:code id: tags:

 ``` python
 run_dc = RunDirectory(in_folder / f"r{run:04d}")
-
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)

-instr_dc = run_dc.select(instrument_src.format("*"), require_all=True)
+instr_dc = run_dc.select(instrument_src.format("*"))
+
+for m in modules:
+    # Remove empty sources from `instr_dc`
+    if instr_dc[instrument_src.format(m), 'image.data'].shape[0] == 0:
+        instr_dc = instr_dc.deselect(instrument_src.format(m))

-if not instr_dc.train_ids:
+if not instr_dc.all_sources:
    raise ValueError(f"No images found for {in_folder / f'r{run:04d}'}")
 ```

 %% Cell type:code id: tags:

 ``` python
 agipd_cond = AgipdCtrl(
    run_dc=run_dc,
    image_src=None,  # Not neededed, as we wont read mem_cells or acq_rate.
    ctrl_src=ctrl_src,
 )

 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == -1:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()
 if gain_mode == -1:
    gain_mode = agipd_cond.get_gain_mode()
 else:
    gain_mode = AgipdGainMode(gain_mode)
 ```

 %% Cell type:markdown id: tags:

 ## Retrieve Constants ##

 %% Cell type:code id: tags:

 ``` python
 pc_bools = [  # flags that points to the need for retrieving SlopesPC and BadPixelsPC constants.
    corr_bools.get("rel_gain"),
    corr_bools.get("adjust_mg_baseline"),
    corr_bools.get('blc_noise'),
    corr_bools.get('blc_hmatch'),
    corr_bools.get('blc_stripes'),
    melt_snow,
 ]
 ```

 %% Cell type:code id: tags:

 ``` python
 def retrieve_constants(
    k_da: str, idx: int
 ) -> Tuple[str, str, float, float, str, dict]:
    """
    Retrieve constants for a module.

    :return:
            k_da: karabo data aggregator.
            acq_rate: acquisition rate parameter.
            mem_cells: number of memory cells.
            mdata_dict: (DICT) dictionary with the metadata for the retrieved constants.
    """
    # check if this module has images to process.
    if instrument_src.format(idx) not in instr_dc.all_sources:
        print("ERROR: No raw images found for "
              f"{module_index_to_qm(idx)}({k_da}).")

        return None, k_da, None, None

    agipd_cond.image_src = instrument_src.format(idx)

    if mem_cells == -1:
        # Read value from fast data.
        local_mem_cells = agipd_cond.get_num_cells()
    else:
        # or use overriding notebook parameter.
        local_mem_cells = mem_cells

    if acq_rate == -1.:
        local_acq_rate = agipd_cond.get_acq_rate()
    else:
        local_acq_rate = acq_rate

    const_dict = assemble_constant_dict(
        corr_bools,
        pc_bools,
        local_mem_cells,
        bias_voltage,
        gain_setting,
        local_acq_rate,
        photon_energy=9.2,
        gain_mode=gain_mode,
        beam_energy=None,
        only_dark=False,
        integration_time=integration_time
    )

    # Retrieve multiple constants through an input dictionary
    # to return a dict of useful metadata.
    mdata_dict = dict()
    mdata_dict["constants"] = dict()
    mdata_dict["physical-detector-unit"] = None  # initialization

    for const_name, (const_init_fun, const_shape, (cond_type, cond_param)) in const_dict.items():  # noqa
        if gain_mode and const_name in ("ThresholdsDark",):
            continue

        # saving metadata in a dict
        const_mdata = dict()
        mdata_dict["constants"][const_name] = const_mdata

        if slopes_ff_from_files and const_name in ["SlopesFF", "BadPixelsFF"]:
            const_mdata["file-path"] = (
                f"{slopes_ff_from_files}/slopesff_bpmask_module_{module_index_to_qm(idx)}.h5")  # noqa
            const_mdata["creation-time"] = "00:00:00"
            continue

        if gain_mode and const_name in (
            "BadPixelsPC", "SlopesPC", "BadPixelsFF", "SlopesFF"
        ):
            param_copy = cond_param.copy()
            del param_copy["gain_mode"]
            condition = getattr(Conditions, cond_type).AGIPD(**param_copy)
        else:
            condition = getattr(Conditions, cond_type).AGIPD(**cond_param)

        _, mdata = get_from_db(
            karabo_id,
            k_da,
            getattr(Constants.AGIPD, const_name)(),
            condition,
            getattr(np, const_init_fun)(const_shape),
            cal_db_interface,
            creation_time,
            meta_only=True,
            verbosity=0,
        )
        mdata_const = mdata.calibration_constant_version
        # check if constant was sucessfully retrieved.
        if mdata.comm_db_success:
            const_mdata["file-path"] = (
                f"{mdata_const.hdf5path}" f"{mdata_const.filename}"
            )
            const_mdata["creation-time"] = f"{mdata_const.begin_at}"
            mdata_dict["physical-detector-unit"] = mdata_const.device_name
        else:
            const_mdata["file-path"] = const_dict[const_name][:2]
            const_mdata["creation-time"] = None

    return mdata_dict, k_da, local_acq_rate, local_mem_cells
 ```

 %% Cell type:code id: tags:

 ``` python
 inp = []
 da_to_qm = dict()
 for module_index, k_da in zip(modules, karabo_da):
    da_to_qm[k_da] = module_index_to_qm(module_index)
    if k_da in retrieved_constants:
        print(
            f"Constant for {k_da} already in calibration_metadata.yml, won't query again.")
        continue

    inp.append((k_da, module_index))
 ```

 %% Cell type:code id: tags:

 ``` python
 with multiprocessing.Pool(processes=nmods) as pool:
    results = pool.starmap(retrieve_constants, inp)
 ```

 %% Cell type:code id: tags:

 ``` python
 acq_rate_mods = []
 mem_cells_mods = []
 for md_dict, k_da, acq_rate, mem_cells in results:
    if acq_rate is None and mem_cells is None:
        continue
    md_dict, k_da, acq_rate, mem_cells
    retrieved_constants[k_da] = md_dict
    mem_cells_mods.append(mem_cells)
    acq_rate_mods.append(acq_rate)

 # Validate that mem_cells and acq_rate are the same for all modules.
 # TODO: Should a warning be enough?
 if len(set(mem_cells_mods)) != 1 or len(set(acq_rate_mods)) != 1:
    print(
        "WARNING: Number of memory cells or "
        "acquisition rate are not identical for all modules.\n"
        f"mem_cells: {mem_cells_mods}.\nacq_rate: {acq_rate_mods}.")

 # check if it is requested not to retrieve any constants from the database
 print("\nRetrieved constants for modules:",
        ', '.join([module_index_to_qm(x) for x in modules]))
 print(f"Operating conditions are:")
 print(f"• Bias voltage: {bias_voltage}")
 print(f"• Memory cells: {mem_cells}")
 print(f"• Acquisition rate: {acq_rate}")
 print(f"• Gain mode: {gain_mode.name}")
 print(f"• Gain setting: {gain_setting}")
 print(f"• Integration time: {integration_time}")
 print(f"• Photon Energy: 9.2")
 print("Constant metadata is saved under \"retrieved-constants\" in calibration_metadata.yml\n")
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Using constants with creation times:")
 timestamps = {}

 for k_da, module_name in da_to_qm.items():
    if k_da not in retrieved_constants.keys():
        continue
    module_timestamps = timestamps[module_name] = {}
    module_constants = retrieved_constants[k_da]

    print(f"{module_name}:")
    for cname, mdata in module_constants["constants"].items():
        if hasattr(mdata["creation-time"], 'strftime'):
            mdata["creation-time"] = mdata["creation-time"].strftime('%y-%m-%d %H:%M')
        print(f'{cname:.<12s}', mdata["creation-time"])

    for cname in ['Offset', 'SlopesPC', 'SlopesFF']:
        if cname in module_constants["constants"]:
            module_timestamps[cname] = module_constants["constants"][cname]["creation-time"]
        else:
            module_timestamps[cname] = "NA"

 time_summary = retrieved_constants.setdefault("time-summary", {})
 time_summary["SAll"] = timestamps

 metadata.save()
 ```

 %% Cell type:markdown id: tags:

 # AGIPD Retrieving Constants Pre-correction #

 Author: European XFEL Detector Group, Version: 1.0

 Retrieving Required Constants for Offline Calibration of the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202030/p900119/raw" # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/AGIPD_"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 run = 80 # runs to process, required

 karabo_id = "SPB_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 ctrl_source_template = '{}/MDL/FPGA_COMP_TEST'  # path to control information
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 receiver_template = "{}CH0" # inset for receiver devices
 karabo_id_control = "SPB_IRU_AGIPD1M1" # karabo-id for control device

 # Parameters for calibration database.
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"

 slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants
 mem_cells = -1  # number of memory cells used, set to 0 to automatically infer
 bias_voltage = -1  # bias voltage, set to 0 to use stored value in slow data.
 acq_rate = -1.  # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = True # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 ```

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis
 # Here the hierarichy and dependencies for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_hmatch"] = blc_hmatch
 ```

 %% Cell type:code id: tags:

 ``` python
 import numpy as np
 from logging import warning
 from pathlib import Path
 from typing import Tuple

 import multiprocessing
 from datetime import timedelta
 from dateutil import parser
 from extra_data import RunDirectory

 from cal_tools.agipdlib import (
    AgipdCtrl,
    SnowResolution,
    assemble_constant_dict,
 )
 from cal_tools.enums import AgipdGainMode
 from cal_tools.tools import (
    calcat_creation_time,
    get_from_db,
    module_index_to_qm,
    CalibrationMetadata,
 )
 from iCalibrationDB import Conditions, Constants, Detectors
 ```

 %% Cell type:code id: tags:

 ``` python
 # slopes_ff_from_files left as str for now
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # Constant paths & timestamps are saved under retrieved-constants in calibration_metadata.yml
 retrieved_constants = metadata.setdefault("retrieved-constants", {})
 ```

 %% Cell type:code id: tags:

 ``` python
 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 offset = parser.parse(creation_date_offset)
 delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
 creation_time += delta
 print(f"Creation time: {creation_time}")

 print(f"Outputting to {out_folder}")
 out_folder.mkdir(parents=True, exist_ok=True)

 melt_snow = False if corr_bools["only_offset"] else SnowResolution.NONE
 ```

 %% Cell type:code id: tags:

 ``` python
 ctrl_src = ctrl_source_template.format(karabo_id_control)

 print(f"Detector in use is {karabo_id}")

 # Extracting Instrument string
 instrument = karabo_id.split("_")[0]
 # Evaluate detector instance for mapping
 if instrument == "SPB":
    nmods = 16
 elif instrument == "MID":
    nmods = 16
 elif instrument == "HED":
    nmods = 8

 print(f"Instrument {instrument}")

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
 ```

 %% Cell type:code id: tags:

 ``` python
 run_dc = RunDirectory(in_folder / f"r{run:04d}")
-
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)

-instr_dc = run_dc.select(instrument_src.format("*"), require_all=True)
+instr_dc = run_dc.select(instrument_src.format("*"))
+
+for m in modules:
+    # Remove empty sources from `instr_dc`
+    if instr_dc[instrument_src.format(m), 'image.data'].shape[0] == 0:
+        instr_dc = instr_dc.deselect(instrument_src.format(m))

-if not instr_dc.train_ids:
+if not instr_dc.all_sources:
    raise ValueError(f"No images found for {in_folder / f'r{run:04d}'}")
 ```

 %% Cell type:code id: tags:

 ``` python
 agipd_cond = AgipdCtrl(
    run_dc=run_dc,
    image_src=None,  # Not neededed, as we wont read mem_cells or acq_rate.
    ctrl_src=ctrl_src,
 )

 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == -1:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()
 if gain_mode == -1:
    gain_mode = agipd_cond.get_gain_mode()
 else:
    gain_mode = AgipdGainMode(gain_mode)
 ```

 %% Cell type:markdown id: tags:

 ## Retrieve Constants ##

 %% Cell type:code id: tags:

 ``` python
 pc_bools = [  # flags that points to the need for retrieving SlopesPC and BadPixelsPC constants.
    corr_bools.get("rel_gain"),
    corr_bools.get("adjust_mg_baseline"),
    corr_bools.get('blc_noise'),
    corr_bools.get('blc_hmatch'),
    corr_bools.get('blc_stripes'),
    melt_snow,
 ]
 ```

 %% Cell type:code id: tags:

 ``` python
 def retrieve_constants(
    k_da: str, idx: int
 ) -> Tuple[str, str, float, float, str, dict]:
    """
    Retrieve constants for a module.

    :return:
            k_da: karabo data aggregator.
            acq_rate: acquisition rate parameter.
            mem_cells: number of memory cells.
            mdata_dict: (DICT) dictionary with the metadata for the retrieved constants.
    """
    # check if this module has images to process.
    if instrument_src.format(idx) not in instr_dc.all_sources:
        print("ERROR: No raw images found for "
              f"{module_index_to_qm(idx)}({k_da}).")

        return None, k_da, None, None

    agipd_cond.image_src = instrument_src.format(idx)

    if mem_cells == -1:
        # Read value from fast data.
        local_mem_cells = agipd_cond.get_num_cells()
    else:
        # or use overriding notebook parameter.
        local_mem_cells = mem_cells

    if acq_rate == -1.:
        local_acq_rate = agipd_cond.get_acq_rate()
    else:
        local_acq_rate = acq_rate

    const_dict = assemble_constant_dict(
        corr_bools,
        pc_bools,
        local_mem_cells,
        bias_voltage,
        gain_setting,
        local_acq_rate,
        photon_energy=9.2,
        gain_mode=gain_mode,
        beam_energy=None,
        only_dark=False,
        integration_time=integration_time
    )

    # Retrieve multiple constants through an input dictionary
    # to return a dict of useful metadata.
    mdata_dict = dict()
    mdata_dict["constants"] = dict()
    mdata_dict["physical-detector-unit"] = None  # initialization

    for const_name, (const_init_fun, const_shape, (cond_type, cond_param)) in const_dict.items():  # noqa
        if gain_mode and const_name in ("ThresholdsDark",):
            continue

        # saving metadata in a dict
        const_mdata = dict()
        mdata_dict["constants"][const_name] = const_mdata

        if slopes_ff_from_files and const_name in ["SlopesFF", "BadPixelsFF"]:
            const_mdata["file-path"] = (
                f"{slopes_ff_from_files}/slopesff_bpmask_module_{module_index_to_qm(idx)}.h5")  # noqa
            const_mdata["creation-time"] = "00:00:00"
            continue

        if gain_mode and const_name in (
            "BadPixelsPC", "SlopesPC", "BadPixelsFF", "SlopesFF"
        ):
            param_copy = cond_param.copy()
            del param_copy["gain_mode"]
            condition = getattr(Conditions, cond_type).AGIPD(**param_copy)
        else:
            condition = getattr(Conditions, cond_type).AGIPD(**cond_param)

        _, mdata = get_from_db(
            karabo_id,
            k_da,
            getattr(Constants.AGIPD, const_name)(),
            condition,
            getattr(np, const_init_fun)(const_shape),
            cal_db_interface,
            creation_time,
            meta_only=True,
            verbosity=0,
        )
        mdata_const = mdata.calibration_constant_version
        # check if constant was sucessfully retrieved.
        if mdata.comm_db_success:
            const_mdata["file-path"] = (
                f"{mdata_const.hdf5path}" f"{mdata_const.filename}"
            )
            const_mdata["creation-time"] = f"{mdata_const.begin_at}"
            mdata_dict["physical-detector-unit"] = mdata_const.device_name
        else:
            const_mdata["file-path"] = const_dict[const_name][:2]
            const_mdata["creation-time"] = None

    return mdata_dict, k_da, local_acq_rate, local_mem_cells
 ```

 %% Cell type:code id: tags:

 ``` python
 inp = []
 da_to_qm = dict()
 for module_index, k_da in zip(modules, karabo_da):
    da_to_qm[k_da] = module_index_to_qm(module_index)
    if k_da in retrieved_constants:
        print(
            f"Constant for {k_da} already in calibration_metadata.yml, won't query again.")
        continue

    inp.append((k_da, module_index))
 ```

 %% Cell type:code id: tags:

 ``` python
 with multiprocessing.Pool(processes=nmods) as pool:
    results = pool.starmap(retrieve_constants, inp)
 ```

 %% Cell type:code id: tags:

 ``` python
 acq_rate_mods = []
 mem_cells_mods = []
 for md_dict, k_da, acq_rate, mem_cells in results:
    if acq_rate is None and mem_cells is None:
        continue
    md_dict, k_da, acq_rate, mem_cells
    retrieved_constants[k_da] = md_dict
    mem_cells_mods.append(mem_cells)
    acq_rate_mods.append(acq_rate)

 # Validate that mem_cells and acq_rate are the same for all modules.
 # TODO: Should a warning be enough?
 if len(set(mem_cells_mods)) != 1 or len(set(acq_rate_mods)) != 1:
    print(
        "WARNING: Number of memory cells or "
        "acquisition rate are not identical for all modules.\n"
        f"mem_cells: {mem_cells_mods}.\nacq_rate: {acq_rate_mods}.")

 # check if it is requested not to retrieve any constants from the database
 print("\nRetrieved constants for modules:",
        ', '.join([module_index_to_qm(x) for x in modules]))
 print(f"Operating conditions are:")
 print(f"• Bias voltage: {bias_voltage}")
 print(f"• Memory cells: {mem_cells}")
 print(f"• Acquisition rate: {acq_rate}")
 print(f"• Gain mode: {gain_mode.name}")
 print(f"• Gain setting: {gain_setting}")
 print(f"• Integration time: {integration_time}")
 print(f"• Photon Energy: 9.2")
 print("Constant metadata is saved under \"retrieved-constants\" in calibration_metadata.yml\n")
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Using constants with creation times:")
 timestamps = {}

 for k_da, module_name in da_to_qm.items():
    if k_da not in retrieved_constants.keys():
        continue
    module_timestamps = timestamps[module_name] = {}
    module_constants = retrieved_constants[k_da]

    print(f"{module_name}:")
    for cname, mdata in module_constants["constants"].items():
        if hasattr(mdata["creation-time"], 'strftime'):
            mdata["creation-time"] = mdata["creation-time"].strftime('%y-%m-%d %H:%M')
        print(f'{cname:.<12s}', mdata["creation-time"])

    for cname in ['Offset', 'SlopesPC', 'SlopesFF']:
        if cname in module_constants["constants"]:
            module_timestamps[cname] = module_constants["constants"][cname]["creation-time"]
        else:
            module_timestamps[cname] = "NA"

 time_summary = retrieved_constants.setdefault("time-summary", {})
 time_summary["SAll"] = timestamps

 metadata.save()
 ```

--- a/notebooks/Gotthard2/Correction_Gotthard2_NBC.ipynb
+++ b/notebooks/Gotthard2/Correction_Gotthard2_NBC.ipynb
 %% Cell type:markdown id:bed7bd15-21d9-4735-82c1-c27c1a5e3346 tags:

 # Gotthard2 Offline Correction #

 Author: European XFEL Detector Group, Version: 1.0

 Offline Calibration for the Gothard2 Detector

 %% Cell type:code id:570322ed-f611-4fd1-b2ec-c12c13d55843 tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/FXE/202221/p003225/raw"  # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/gotthard2"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run = 50  # run to process, required
 sequences = [-1]  # sequences to correct, set to [-1] for all, range allowed
 sequences_per_node = 1  # number of sequence files per node if notebook executed through xfel-calibrate, set to 0 to not run SLURM parallel

 # Parameters used to access raw data.
 karabo_id = "FXE_XAD_G2XES"  # karabo prefix of Gotthard-II devices
 karabo_da = ["GH201"]  # data aggregators
 receiver_template = "RECEIVER"  # receiver template used to read INSTRUMENT keys.
 control_template = "CONTROL"  # control template used to read CONTROL keys.
 instrument_source_template = "{}/DET/{}:daqOutput"  # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = "{}/DET/{}"  # template for control source name (filled with karabo_id_control)
 karabo_id_control = ""  # Control karabo ID. Set to empty string to use the karabo-id

 # Parameters for calibration database.
-use_dir_creation_date = True  # use the creation data of the input dir for database queries.
 cal_db_interface = "tcp://max-exfl016:8016#8025"  # the database interface to use.
 cal_db_timeout = 180000  # timeout on caldb requests.
-overwrite_creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC.00 e.g. "2022-06-28 13:00:00.00"
+creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"

 # Parameters affecting corrected data.
 constants_file = ""  # Use constants in given constant file path. /gpfs/exfel/data/scratch/ahmedk/dont_remove/gotthard2/constants/calibration_constants_GH2.h5
 offset_correction = True  # apply offset correction. This can be disabled to only apply LUT or apply LUT and gain correction for non-linear differential results.
 gain_correction = True  # apply gain correction.
+chunks_data = 1  # HDF chunk size for pixel data in number of frames.

 # Parameter conditions.
 bias_voltage = -1  # Detector bias voltage, set to -1 to use value in raw file.
 exposure_time = -1.  # Detector exposure time, set to -1 to use value in raw file.
 exposure_period = -1.  # Detector exposure period, set to -1 to use value in raw file.
 acquisition_rate = -1.  # Detector acquisition rate (1.1/4.5), set to -1 to use value in raw file.
 single_photon = -1  # Detector single photon mode (High/Low CDS), set to -1 to use value in raw file.

 # Parameters for plotting
 skip_plots = False  # exit after writing corrected files
 pulse_idx_preview = 3  # pulse index to preview. The following even/odd pulse index is used for preview. # TODO: update to pulseId preview.


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id:6e9730d8-3908-41d7-abe2-d78e046d5de2 tags:

 ``` python
-import datetime
 import warnings
-from functools import partial
+from logging import warning

 import h5py
 import pasha as psh
 import numpy as np
 import matplotlib.pyplot as plt
 from IPython.display import Markdown, display
 from extra_data import RunDirectory, H5File
 from pathlib import Path

-from cal_tools import h5_copy_except
+import cal_tools.restful_config as rest_cfg
+from cal_tools.calcat_interface import CalCatError, GOTTHARD2_CalibrationData
+from cal_tools.files import DataFile
 from cal_tools.gotthard2 import gotthard2algs, gotthard2lib
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
-    get_constant_from_db_and_time,
-    get_dir_creation_date,
-    get_pdu_from_db,
+    calcat_creation_time,
    CalibrationMetadata,
 )
-from iCalibrationDB import Conditions, Constants
 from XFELDetAna.plotting.heatmap import heatmapPlot

 warnings.filterwarnings('ignore')

 %matplotlib inline
 ```

 %% Cell type:code id:d7c02c48-4429-42ea-a42e-de45366d7fa3 tags:

 ``` python
 in_folder = Path(in_folder)
 run_folder = in_folder / f"r{run:04d}"
 out_folder = Path(out_folder)
 out_folder.mkdir(parents=True, exist_ok=True)

 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # NOTE: this notebook will not overwrite calibration metadata file
 const_yaml = metadata.get("retrieved-constants", {})

 if not karabo_id_control:
    karabo_id_control = karabo_id

 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 ctrl_src = ctrl_source_template.format(karabo_id_control, control_template)

 print(f"Process modules: {karabo_da} for run {run}")

-creation_time = None
-if overwrite_creation_time:
-    creation_time = datetime.datetime.strptime(
-        overwrite_creation_time, "%Y-%m-%d %H:%M:%S.%f"
-    )
-elif use_dir_creation_date:
-    creation_time = get_dir_creation_date(in_folder, run)
-    print(f"Using {creation_time} as creation time")
+# Run's creation time:
+creation_time = calcat_creation_time(in_folder, run, creation_time)
+print(f"Creation time: {creation_time}")
 ```

 %% Cell type:code id:b5eb816e-b5f2-44ce-9907-0273d82341b6 tags:

 ``` python
 # Select only sequence files to process for the selected detector.
 if sequences == [-1]:
    possible_patterns = list(f"*{mod}*.h5" for mod in karabo_da)
 else:
    possible_patterns = list(
        f"*{mod}-S{s:05d}.h5" for mod in karabo_da for s in sequences
    )

 run_folder = Path(in_folder / f"r{run:04d}")
 seq_files = [
    f for f in run_folder.glob("*.h5") if any(f.match(p) for p in possible_patterns)
 ]

 seq_files = sorted(seq_files)

 if not seq_files:
    raise IndexError("No sequence files available for the selected sequences.")

 print(f"Processing a total of {len(seq_files)} sequence files")
 ```

 %% Cell type:code id:f9a8d1eb-ce6a-4ed0-abf4-4a6029734672 tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id:892172d8 tags:

 ``` python
 # Read slow data
 run_dc = RunDirectory(run_folder)
 g2ctrl = gotthard2lib.Gotthard2Ctrl(run_dc=run_dc, ctrl_src=ctrl_src)

 if bias_voltage == -1:
    bias_voltage = g2ctrl.get_bias_voltage()
 if exposure_time == -1:
    exposure_time = g2ctrl.get_exposure_time()
 if exposure_period == -1:
    exposure_period = g2ctrl.get_exposure_period()
 if acquisition_rate == -1:
    acquisition_rate = g2ctrl.get_acquisition_rate()
 if single_photon == -1:
    single_photon = g2ctrl.get_single_photon()

 print("Bias Voltage:", bias_voltage)
 print("Exposure Time:", exposure_time)
 print("Exposure Period:", exposure_period)
 print("Acquisition Rate:", acquisition_rate)
 print("Single Photon:", single_photon)
 ```

 %% Cell type:markdown id:8c852392-bb19-4c40-b2ce-3b787538a92d tags:

 ### Retrieving calibration constants

 %% Cell type:code id:5717d722 tags:

 ``` python
+da_to_pdu = {}
 # Used for old FXE (p003225) runs before adding Gotthard2 to CALCAT
 const_data = dict()
-if constants_file:
-    for mod in karabo_da:
-        const_data[mod] = dict()
-        # load constants temporarily using defined local paths.
-        with h5py.File(constants_file, "r") as cfile:
-            const_data[mod]["LUT"] = cfile["LUT"][()]
-            const_data[mod]["Offset"] = cfile["offset_map"][()].astype(np.float32)
-            const_data[mod]["RelativeGain"] = cfile["gain_map"][()].astype(np.float32)
-            const_data[mod]["Mask"] = cfile["bpix_ff"][()].astype(np.uint32)
-```
-
-%% Cell type:code id:1cdbe818 tags:

-``` python
-# Conditions iCalibrationDB object.
-condition = Conditions.Dark.Gotthard2(
-    bias_voltage=bias_voltage,
+g2_cal = GOTTHARD2_CalibrationData(
+    detector_name=karabo_id,
+    sensor_bias_voltage=bias_voltage,
    exposure_time=exposure_time,
    exposure_period=exposure_period,
-    single_photon=single_photon,
    acquisition_rate=acquisition_rate,
+    single_photon=single_photon,
+    event_at=creation_time,
+    client=rest_cfg.calibration_client(),
 )
+# Keep as long as it is essential to correct
+# RAW data (FXE p003225) before the data mapping was added to CALCAT.
+try:  # in case local constants are used with old RAW data. This can be removed in the future.
+    for mod_info in g2_cal.physical_detector_units.values():
+        da_to_pdu[mod_info["karabo_da"]] = mod_info["physical_name"]
+    db_modules = [da_to_pdu[da] for da in karabo_da]
+except CalCatError as e:
+    print(e)
+    db_modules = [None] * len(karabo_da)

-# TODO: Maybe this condition and previous cell can be removed later after the initial phase.
-if not constants_file:
-    # Prepare a dictionary of empty constants to loop on
-    # it's keys and initiate non-retrieved constants.
-    empty_lut = (np.arange(2 ** 12).astype(np.float64) * 2 ** 10 / 2 ** 12).astype(
-        np.uint16
-    )
-    empty_lut = np.stack(1280 * [np.stack([empty_lut] * 2)], axis=0)
-    empty_constants = {
-        "LUT": empty_lut,
-        "Offset": np.zeros((1280, 2, 3), dtype=np.float32),
-        "BadPixelsDark": np.zeros((1280, 2, 3), dtype=np.uint32),
-        "RelativeGain": np.ones((1280, 2, 3), dtype=np.float32),
-        "BadPixelsFF": np.zeros((1280, 2, 3), dtype=np.uint32),
-    }
-
+if constants_file:
    for mod in karabo_da:
        const_data[mod] = dict()
-        # Only used for printing timestamps within the loop.
-        when = dict()
-        # Check YAML file for constant metadata of file path and creation-time
-        if const_yaml:
+        # load constants temporarily using defined local paths.
+        with h5py.File(constants_file, "r") as cfile:
+            const_data[mod]["LUTGotthard2"] = cfile["LUT"][()]
+            const_data[mod]["OffsetGotthard2"] = cfile["offset_map"][()].astype(np.float32)
+            const_data[mod]["RelativeGainGotthard2"] = cfile["gain_map"][()].astype(np.float32)
+            const_data[mod]["Mask"] = cfile["bpix_ff"][()].astype(np.uint32)
+else:
+    if const_yaml:
+        const_data = dict()
+        for mod in karabo_da:
+            const_data[mod] = dict()
            for cname, mdata in const_yaml[mod]["constants"].items():
                const_data[mod][cname] = dict()
-                when[cname] = mdata["creation-time"]
-                if when[cname]:
-                    with h5py.File(mdata["file-path"], "r") as cf:
+                if mdata["creation-time"]:
+                    with h5py.File(mdata["path"], "r") as cf:
                        const_data[mod][cname] = np.copy(
-                            cf[f"{mdata['dataset-name']}/data"]
-                        )
-                else:
-                    const_data[mod][cname] = empty_constants[cname]
-        else:  # Retrieve constants from CALCAT. Missing YAML file or running notebook interactively.
-            for cname, cempty in empty_constants.items():
-                const_data[mod][cname] = dict()
-                const_data[mod][cname], when[cname] = get_constant_from_db_and_time(
-                    karabo_id=karabo_id,
-                    karabo_da=mod,
-                    cal_db_interface=cal_db_interface,
-                    creation_time=creation_time,
-                    timeout=cal_db_timeout,
-                    print_once=False,
-                    condition=condition,
-                    constant=getattr(Constants.Gotthard2, cname)(),
-                    empty_constant=cempty,
-                )
-        bpix = const_data[mod]["BadPixelsDark"]
-        bpix |= const_data[mod]["BadPixelsFF"]
+                            cf[f"{mdata['dataset']}/data"])
+    else:
+        mdata_dict = {"constants": dict()}
+
+        constant_names = ["LUTGotthard2", "OffsetGotthard2", "BadPixelsDarkGotthard2"]
+        if gain_correction:
+            constant_names += ["RelativeGainGotthard2", "BadPixelsFFGotthard2"]
+
+        # Retrieve metadata for all pnccd constants.
+        const_data = g2_cal.ndarray_map(constant_names)
+
+    # Validate the constants availability and raise/warn correspondingly.
+    for mod, calibrations in const_data.items():
+
+        dark_constants = {"LUTGotthard2"}
+        if offset_correction:
+            dark_constants |= {"OffsetGotthard2", "BadPixelsDarkGotthard2"}
+
+        missing_dark_constants = dark_constants - set(calibrations)
+        if missing_dark_constants:
+            karabo_da.remove(mod)
+            warning(f"Dark constants {missing_dark_constants} are not available to correct {mod}.")  # noqa
+
+        missing_gain_constants = {
+            "BadPixelsFFGotthard2", "RelativeGainGotthard2"} - set(calibrations)
+        if gain_correction and missing_gain_constants:
+            warning(f"Gain constants {missing_gain_constants} are not retrieved for mod {mod}."
+                    "Gain correction is disabled for this module")
+
+        # Create the mask array.
+        bpix = const_data[mod].get("BadPixelsDarkGotthard2")
+        if bpix is None:
+            bpix = np.zeros((1280, 2, 3), dtype=np.uint32)
+        if const_data[mod].get("BadPixelsFFGotthard2") is not None:
+            bpix |= const_data[mod]["BadPixelsFFGotthard2"]
        const_data[mod]["Mask"] = bpix

-        # Print timestamps for the retrieved constants.
-        print(f"Constants for module {mod}:")
-        for cname, ctime in when.items():
-            print(f"  {cname} injected at {ctime}")
-        del when
+        # Prepare empty arrays for missing constants.
+        if const_data[mod].get("OffsetGotthard2") is None:
+            const_data[mod]["OffsetGotthard2"] = np.zeros(
+                (1280, 2, 3), dtype=np.float32)
+
+        if const_data[mod].get("RelativeGainGotthard2") is None:
+            const_data[mod]["RelativeGainGotthard2"] = np.ones(
+                (1280, 2, 3), dtype=np.float32)
+        const_data[mod]["RelativeGainGotthard2"] = const_data[mod]["RelativeGainGotthard2"].astype(  # noqa
+            np.float32, copy=False)  # Old gain constants are not float32.
+
+if not karabo_da:
+    raise ValueError("Dark constants are not available for all modules.")
 ```

 %% Cell type:code id:23fcf7f4-351a-4df7-8829-d8497d94fecc tags:

 ``` python
 context = psh.ProcessContext(num_workers=23)
 ```

 %% Cell type:code id:daecd662-26d2-4cb8-aa70-383a579cf9f9 tags:

 ``` python
 def correct_train(wid, index, d):
    g = gain[index]
-    gotthard2algs.convert_to_10bit(d, const_data[mod]["LUT"], data_corr[index, ...])
+    gotthard2algs.convert_to_10bit(d, const_data[mod]["LUTGotthard2"], data_corr[index, ...])
    gotthard2algs.correct_train(
        data_corr[index, ...],
        mask[index, ...],
        g,
-        const_data[mod]["Offset"],
-        const_data[mod]["RelativeGain"].astype(np.float32, copy=False),
+        const_data[mod]["OffsetGotthard2"],
+        const_data[mod]["RelativeGainGotthard2"],
        const_data[mod]["Mask"],
        apply_offset=offset_correction,
        apply_gain=gain_correction,
    )
 ```

 %% Cell type:code id:f88c1aa6-a735-4b72-adce-b30162f5daea tags:

 ``` python
 for mod in karabo_da:
    # This is used in case receiver template consists of
    # karabo data aggregator index. e.g. detector at DETLAB
    instr_mod_src = instrument_src.format(mod[-2:])
    data_path = "INSTRUMENT/" + instr_mod_src + "/data"
    for raw_file in seq_files:
        step_timer.start()

        dc = H5File(raw_file)
        out_file = out_folder / raw_file.name.replace("RAW", "CORR")

        # Select module INSTRUMENT source and deselect empty trains.
        dc = dc.select(instr_mod_src, require_all=True)
        data = dc[instr_mod_src, "data.adc"].ndarray()
        gain = dc[instr_mod_src, "data.gain"].ndarray()
        step_timer.done_step("preparing raw data")
        dshape = data.shape

        step_timer.start()

        # Allocate shared arrays.
        data_corr = context.alloc(shape=dshape, dtype=np.float32)
        mask = context.alloc(shape=dshape, dtype=np.uint32)
        context.map(correct_train, data)
        step_timer.done_step("Correcting one sequence file")

        step_timer.start()

        # Provided PSI gain map has 0 values. Set inf values to nan.
        # TODO: This can maybe be removed after creating XFEL gain maps.?
        data_corr[np.isinf(data_corr)] = np.nan
-        # Create CORR files and add corrected data sources.
-        # Exclude raw data images (data/adc)
-        with h5py.File(out_file, "w") as ofile:
-            # Copy RAW non-calibrated sources.
-            with h5py.File(raw_file, "r") as sfile:
-                h5_copy_except.h5_copy_except_paths(sfile, ofile, [f"{data_path}/adc"])
-            # Create datasets with the available corrected data
-            ddset = ofile.create_dataset(
-                f"{data_path}/adc",
-                data=data_corr,
-                chunks=((1,) + dshape[1:]),  # 1 chunk == 1 image
-                dtype=np.float32,
+
+        # Create CORR files and add corrected data sections.
+        image_counts = dc[instrument_src, "data.adc"].data_counts(labelled=False)
+
+        with DataFile(out_file, "w") as ofile:
+            # Create INDEX datasets.
+            ofile.create_index(dc.train_ids, from_file=dc.files[0])
+            # Create METDATA datasets
+            ofile.create_metadata(
+                like=dc,
+                sequence=dc.run_metadata()["sequenceNumber"],
+                instrument_channels=(f"{instrument_src}/data",)
            )
+
+            # Create Instrument section to later add corrected datasets.
+            outp_source = ofile.create_instrument_source(instrument_src)
+
+            # Create count/first datasets at INDEX source.
+            outp_source.create_index(data=image_counts)
+
+            # Store uncorrected trainId in the corrected file.
+            outp_source.create_key(
+                    f"data.trainId", data=dc.train_ids,
+                    chunks=min(50, len(dc.train_ids))
+                )
+
            # Create datasets with the available corrected data
-            ddset = ofile.create_dataset(
-                f"{data_path}/mask",
-                data=mask,
-                chunks=((1,) + dshape[1:]),  # 1 chunk == 1 image
-                dtype=np.uint32,
-                compression="gzip",
-                compression_opts=1,
-                shuffle=True,
+            for field_name, field_data in {
+                "adc": data_corr,
+                "gain": gain,
+            }.items():
+                outp_source.create_key(
+                    f"data.{field_name}", data=field_data,
+                    chunks=((chunks_data,) + data_corr.shape[1:])
+            )
+
+            for field in ["bunchId", "memoryCell", "frameNumber", "timestamp"]:
+                outp_source.create_key(
+                    f"data.{field}", data=dc[instr_mod_src, f"data.{field}"].ndarray(),
+                    chunks=(chunks_data, data_corr.shape[1])
            )
+            outp_source.create_compressed_key(f"data.mask", data=mask)
+
        step_timer.done_step("Storing data")
 ```

 %% Cell type:code id:94b8e4d2-9f8c-4c23-a509-39238dd8435c tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id:0ccc7f7e-2a3f-4ac0-b854-7d505410d2fd tags:

 ``` python
 if skip_plots:
    print("Skipping plots")
    import sys

    sys.exit(0)
 ```

 %% Cell type:code id:ff203f77-3811-46f3-bf7d-226d2dcab13f tags:

 ``` python
 mod_dcs = {}
 first_seq_raw = seq_files[0]
 first_seq_corr = out_folder / first_seq_raw.name.replace("RAW", "CORR")
 for mod in karabo_da:
    mod_dcs[mod] = {}
    with H5File(first_seq_corr) as out_dc:
        tid, mod_dcs[mod]["train_corr_data"] = next(
            out_dc[instr_mod_src, "data.adc"].trains()
        )
    with H5File(first_seq_raw) as in_dc:
        train_dict = in_dc.train_from_id(tid)[1][instr_mod_src]
        mod_dcs[mod]["train_raw_data"] = train_dict["data.adc"]
        mod_dcs[mod]["train_raw_gain"] = train_dict["data.gain"]
 ```

-%% Cell type:code id:494b453a tags:
-
-``` python
-# Keep as long as it is essential to correct
-# RAW data (FXE p003225) before the data mapping was added to CALCAT.
-try:
-    db_modules = get_pdu_from_db(
-        karabo_id=karabo_id,
-        karabo_da=karabo_da,
-        constant=Constants.jungfrau.Offset(),
-        condition=condition,
-        cal_db_interface=cal_db_interface,
-        snapshot_at=creation_time,
-    )
-except RuntimeError:
-    print(
-        "No Physical detector units found for this"
-        " detector mapping at the RAW data creation time."
-    )
-    db_modules = [None] * len(karabo_da)
-```
-
 %% Cell type:code id:1b379438-eb1d-42b2-ac83-eb8cf88c46db tags:

 ``` python
 display(Markdown("### Mean RAW and CORRECTED across pulses for one train:"))
 display(Markdown(f"Train: {tid}"))

 step_timer.start()
 for mod, pdu in zip(karabo_da, db_modules):

    fig, ax = plt.subplots(figsize=(20, 10))
    raw_data = mod_dcs[mod]["train_raw_data"]
    im = ax.plot(np.mean(raw_data, axis=0))
    ax.set_title(f"RAW module {mod} ({pdu})")
    ax.set_xlabel("Strip #", size=20)
    ax.set_ylabel("12-bit ADC output", size=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    pass

    fig, ax = plt.subplots(figsize=(20, 10))
    corr_data = mod_dcs[mod]["train_corr_data"]
    im = ax.plot(np.mean(corr_data, axis=0))
    ax.set_title(f"CORRECTED module {mod} ({pdu})")
    ax.set_xlabel("Strip #", size=20)
    ax.set_ylabel("10-bit KeV. output", size=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    pass
 step_timer.done_step("Plotting mean data")
 ```

 %% Cell type:code id:58a6a276 tags:

 ``` python
 display(Markdown(f"### RAW and CORRECTED strips across pulses for train {tid}"))

 step_timer.start()
 for mod, pdu in zip(karabo_da, db_modules):
    for plt_data, dname in zip(
        ["train_raw_data", "train_corr_data"], ["RAW", "CORRECTED"]
    ):
        fig, ax = plt.subplots(figsize=(15, 20))
        plt.rcParams.update({"font.size": 20})

        heatmapPlot(
            mod_dcs[mod][plt_data],
            y_label="Pulses",
            x_label="Strips",
            title=f"{dname} module {mod} ({pdu})",
            use_axis=ax,
        )
        pass
 step_timer.done_step("Plotting RAW and CORRECTED data for one train")
 ```

 %% Cell type:code id:cd8f5e08-fcee-4bff-ba63-6452b3d892a2 tags:

 ``` python
 # Validate given "pulse_idx_preview"

 if pulse_idx_preview + 1 > data.shape[1]:
    print(
        f"WARNING: selected pulse_idx_preview {pulse_idx_preview} is not available in data."
        " Previewing 1st pulse."
    )
    pulse_idx_preview = 1

 if data.shape[1] == 1:
    odd_pulse = 1
    even_pulse = None
 else:
    odd_pulse = pulse_idx_preview if pulse_idx_preview % 2 else pulse_idx_preview + 1
    even_pulse = (
        pulse_idx_preview if not (pulse_idx_preview % 2) else pulse_idx_preview + 1
    )

 if pulse_idx_preview + 1 > data.shape[1]:
    pulse_idx_preview = 1
    if data.shape[1] > 1:
        pulse_idx_preview = 2
 ```

 %% Cell type:code id:e5f0d4d8-e32c-4f2c-8469-4ebbfd3f644c tags:

 ``` python
 display(Markdown("### RAW and CORRECTED even/odd pulses for one train:"))
 display(Markdown(f"Train: {tid}"))
 for mod, pdu in zip(karabo_da, db_modules):
    fig, ax = plt.subplots(figsize=(20, 20))
    raw_data = mod_dcs[mod]["train_raw_data"]
    corr_data = mod_dcs[mod]["train_corr_data"]

    ax.plot(raw_data[odd_pulse], label=f"Odd Pulse {odd_pulse}")
    if even_pulse:
        ax.plot(raw_data[even_pulse], label=f"Even Pulse {even_pulse}")

    ax.set_title(f"RAW module {mod} ({pdu})")
    ax.set_xlabel("Strip #", size=20)
    ax.set_ylabel("12-bit ADC RAW", size=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    ax.legend()
    pass

    fig, ax = plt.subplots(figsize=(20, 20))
    ax.plot(corr_data[odd_pulse], label=f"Odd Pulse {odd_pulse}")
    if even_pulse:
        ax.plot(corr_data[even_pulse], label=f"Even Pulse {even_pulse}")
    ax.set_title(f"CORRECTED module {mod} ({pdu})")
    ax.set_xlabel("Strip #", size=20)
    ax.set_ylabel("10-bit KeV CORRECTED", size=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    ax.legend()
    pass
 step_timer.done_step("Plotting RAW and CORRECTED odd/even pulses.")
 ```

 %% Cell type:markdown id:bed7bd15-21d9-4735-82c1-c27c1a5e3346 tags:

 # Gotthard2 Offline Correction #

 Author: European XFEL Detector Group, Version: 1.0

 Offline Calibration for the Gothard2 Detector

 %% Cell type:code id:570322ed-f611-4fd1-b2ec-c12c13d55843 tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/FXE/202221/p003225/raw"  # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/gotthard2"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run = 50  # run to process, required
 sequences = [-1]  # sequences to correct, set to [-1] for all, range allowed
 sequences_per_node = 1  # number of sequence files per node if notebook executed through xfel-calibrate, set to 0 to not run SLURM parallel

 # Parameters used to access raw data.
 karabo_id = "FXE_XAD_G2XES"  # karabo prefix of Gotthard-II devices
 karabo_da = ["GH201"]  # data aggregators
 receiver_template = "RECEIVER"  # receiver template used to read INSTRUMENT keys.
 control_template = "CONTROL"  # control template used to read CONTROL keys.
 instrument_source_template = "{}/DET/{}:daqOutput"  # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = "{}/DET/{}"  # template for control source name (filled with karabo_id_control)
 karabo_id_control = ""  # Control karabo ID. Set to empty string to use the karabo-id

 # Parameters for calibration database.
-use_dir_creation_date = True  # use the creation data of the input dir for database queries.
 cal_db_interface = "tcp://max-exfl016:8016#8025"  # the database interface to use.
 cal_db_timeout = 180000  # timeout on caldb requests.
-overwrite_creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC.00 e.g. "2022-06-28 13:00:00.00"
+creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"

 # Parameters affecting corrected data.
 constants_file = ""  # Use constants in given constant file path. /gpfs/exfel/data/scratch/ahmedk/dont_remove/gotthard2/constants/calibration_constants_GH2.h5
 offset_correction = True  # apply offset correction. This can be disabled to only apply LUT or apply LUT and gain correction for non-linear differential results.
 gain_correction = True  # apply gain correction.
+chunks_data = 1  # HDF chunk size for pixel data in number of frames.

 # Parameter conditions.
 bias_voltage = -1  # Detector bias voltage, set to -1 to use value in raw file.
 exposure_time = -1.  # Detector exposure time, set to -1 to use value in raw file.
 exposure_period = -1.  # Detector exposure period, set to -1 to use value in raw file.
 acquisition_rate = -1.  # Detector acquisition rate (1.1/4.5), set to -1 to use value in raw file.
 single_photon = -1  # Detector single photon mode (High/Low CDS), set to -1 to use value in raw file.

 # Parameters for plotting
 skip_plots = False  # exit after writing corrected files
 pulse_idx_preview = 3  # pulse index to preview. The following even/odd pulse index is used for preview. # TODO: update to pulseId preview.


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id:6e9730d8-3908-41d7-abe2-d78e046d5de2 tags:

 ``` python
-import datetime
 import warnings
-from functools import partial
+from logging import warning

 import h5py
 import pasha as psh
 import numpy as np
 import matplotlib.pyplot as plt
 from IPython.display import Markdown, display
 from extra_data import RunDirectory, H5File
 from pathlib import Path

-from cal_tools import h5_copy_except
+import cal_tools.restful_config as rest_cfg
+from cal_tools.calcat_interface import CalCatError, GOTTHARD2_CalibrationData
+from cal_tools.files import DataFile
 from cal_tools.gotthard2 import gotthard2algs, gotthard2lib
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
-    get_constant_from_db_and_time,
-    get_dir_creation_date,
-    get_pdu_from_db,
+    calcat_creation_time,
    CalibrationMetadata,
 )
-from iCalibrationDB import Conditions, Constants
 from XFELDetAna.plotting.heatmap import heatmapPlot

 warnings.filterwarnings('ignore')

 %matplotlib inline
 ```

 %% Cell type:code id:d7c02c48-4429-42ea-a42e-de45366d7fa3 tags:

 ``` python
 in_folder = Path(in_folder)
 run_folder = in_folder / f"r{run:04d}"
 out_folder = Path(out_folder)
 out_folder.mkdir(parents=True, exist_ok=True)

 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # NOTE: this notebook will not overwrite calibration metadata file
 const_yaml = metadata.get("retrieved-constants", {})

 if not karabo_id_control:
    karabo_id_control = karabo_id

 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 ctrl_src = ctrl_source_template.format(karabo_id_control, control_template)

 print(f"Process modules: {karabo_da} for run {run}")

-creation_time = None
-if overwrite_creation_time:
-    creation_time = datetime.datetime.strptime(
-        overwrite_creation_time, "%Y-%m-%d %H:%M:%S.%f"
-    )
-elif use_dir_creation_date:
-    creation_time = get_dir_creation_date(in_folder, run)
-    print(f"Using {creation_time} as creation time")
+# Run's creation time:
+creation_time = calcat_creation_time(in_folder, run, creation_time)
+print(f"Creation time: {creation_time}")
 ```

 %% Cell type:code id:b5eb816e-b5f2-44ce-9907-0273d82341b6 tags:

 ``` python
 # Select only sequence files to process for the selected detector.
 if sequences == [-1]:
    possible_patterns = list(f"*{mod}*.h5" for mod in karabo_da)
 else:
    possible_patterns = list(
        f"*{mod}-S{s:05d}.h5" for mod in karabo_da for s in sequences
    )

 run_folder = Path(in_folder / f"r{run:04d}")
 seq_files = [
    f for f in run_folder.glob("*.h5") if any(f.match(p) for p in possible_patterns)
 ]

 seq_files = sorted(seq_files)

 if not seq_files:
    raise IndexError("No sequence files available for the selected sequences.")

 print(f"Processing a total of {len(seq_files)} sequence files")
 ```

 %% Cell type:code id:f9a8d1eb-ce6a-4ed0-abf4-4a6029734672 tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id:892172d8 tags:

 ``` python
 # Read slow data
 run_dc = RunDirectory(run_folder)
 g2ctrl = gotthard2lib.Gotthard2Ctrl(run_dc=run_dc, ctrl_src=ctrl_src)

 if bias_voltage == -1:
    bias_voltage = g2ctrl.get_bias_voltage()
 if exposure_time == -1:
    exposure_time = g2ctrl.get_exposure_time()
 if exposure_period == -1:
    exposure_period = g2ctrl.get_exposure_period()
 if acquisition_rate == -1:
    acquisition_rate = g2ctrl.get_acquisition_rate()
 if single_photon == -1:
    single_photon = g2ctrl.get_single_photon()

 print("Bias Voltage:", bias_voltage)
 print("Exposure Time:", exposure_time)
 print("Exposure Period:", exposure_period)
 print("Acquisition Rate:", acquisition_rate)
 print("Single Photon:", single_photon)
 ```

 %% Cell type:markdown id:8c852392-bb19-4c40-b2ce-3b787538a92d tags:

 ### Retrieving calibration constants

 %% Cell type:code id:5717d722 tags:

 ``` python
+da_to_pdu = {}
 # Used for old FXE (p003225) runs before adding Gotthard2 to CALCAT
 const_data = dict()
-if constants_file:
-    for mod in karabo_da:
-        const_data[mod] = dict()
-        # load constants temporarily using defined local paths.
-        with h5py.File(constants_file, "r") as cfile:
-            const_data[mod]["LUT"] = cfile["LUT"][()]
-            const_data[mod]["Offset"] = cfile["offset_map"][()].astype(np.float32)
-            const_data[mod]["RelativeGain"] = cfile["gain_map"][()].astype(np.float32)
-            const_data[mod]["Mask"] = cfile["bpix_ff"][()].astype(np.uint32)
-```
-
-%% Cell type:code id:1cdbe818 tags:

-``` python
-# Conditions iCalibrationDB object.
-condition = Conditions.Dark.Gotthard2(
-    bias_voltage=bias_voltage,
+g2_cal = GOTTHARD2_CalibrationData(
+    detector_name=karabo_id,
+    sensor_bias_voltage=bias_voltage,
    exposure_time=exposure_time,
    exposure_period=exposure_period,
-    single_photon=single_photon,
    acquisition_rate=acquisition_rate,
+    single_photon=single_photon,
+    event_at=creation_time,
+    client=rest_cfg.calibration_client(),
 )
+# Keep as long as it is essential to correct
+# RAW data (FXE p003225) before the data mapping was added to CALCAT.
+try:  # in case local constants are used with old RAW data. This can be removed in the future.
+    for mod_info in g2_cal.physical_detector_units.values():
+        da_to_pdu[mod_info["karabo_da"]] = mod_info["physical_name"]
+    db_modules = [da_to_pdu[da] for da in karabo_da]
+except CalCatError as e:
+    print(e)
+    db_modules = [None] * len(karabo_da)

-# TODO: Maybe this condition and previous cell can be removed later after the initial phase.
-if not constants_file:
-    # Prepare a dictionary of empty constants to loop on
-    # it's keys and initiate non-retrieved constants.
-    empty_lut = (np.arange(2 ** 12).astype(np.float64) * 2 ** 10 / 2 ** 12).astype(
-        np.uint16
-    )
-    empty_lut = np.stack(1280 * [np.stack([empty_lut] * 2)], axis=0)
-    empty_constants = {
-        "LUT": empty_lut,
-        "Offset": np.zeros((1280, 2, 3), dtype=np.float32),
-        "BadPixelsDark": np.zeros((1280, 2, 3), dtype=np.uint32),
-        "RelativeGain": np.ones((1280, 2, 3), dtype=np.float32),
-        "BadPixelsFF": np.zeros((1280, 2, 3), dtype=np.uint32),
-    }
-
+if constants_file:
    for mod in karabo_da:
        const_data[mod] = dict()
-        # Only used for printing timestamps within the loop.
-        when = dict()
-        # Check YAML file for constant metadata of file path and creation-time
-        if const_yaml:
+        # load constants temporarily using defined local paths.
+        with h5py.File(constants_file, "r") as cfile:
+            const_data[mod]["LUTGotthard2"] = cfile["LUT"][()]
+            const_data[mod]["OffsetGotthard2"] = cfile["offset_map"][()].astype(np.float32)
+            const_data[mod]["RelativeGainGotthard2"] = cfile["gain_map"][()].astype(np.float32)
+            const_data[mod]["Mask"] = cfile["bpix_ff"][()].astype(np.uint32)
+else:
+    if const_yaml:
+        const_data = dict()
+        for mod in karabo_da:
+            const_data[mod] = dict()
            for cname, mdata in const_yaml[mod]["constants"].items():
                const_data[mod][cname] = dict()
-                when[cname] = mdata["creation-time"]
-                if when[cname]:
-                    with h5py.File(mdata["file-path"], "r") as cf:
+                if mdata["creation-time"]:
+                    with h5py.File(mdata["path"], "r") as cf:
                        const_data[mod][cname] = np.copy(
-                            cf[f"{mdata['dataset-name']}/data"]
-                        )
-                else:
-                    const_data[mod][cname] = empty_constants[cname]
-        else:  # Retrieve constants from CALCAT. Missing YAML file or running notebook interactively.
-            for cname, cempty in empty_constants.items():
-                const_data[mod][cname] = dict()
-                const_data[mod][cname], when[cname] = get_constant_from_db_and_time(
-                    karabo_id=karabo_id,
-                    karabo_da=mod,
-                    cal_db_interface=cal_db_interface,
-                    creation_time=creation_time,
-                    timeout=cal_db_timeout,
-                    print_once=False,
-                    condition=condition,
-                    constant=getattr(Constants.Gotthard2, cname)(),
-                    empty_constant=cempty,
-                )
-        bpix = const_data[mod]["BadPixelsDark"]
-        bpix |= const_data[mod]["BadPixelsFF"]
+                            cf[f"{mdata['dataset']}/data"])
+    else:
+        mdata_dict = {"constants": dict()}
+
+        constant_names = ["LUTGotthard2", "OffsetGotthard2", "BadPixelsDarkGotthard2"]
+        if gain_correction:
+            constant_names += ["RelativeGainGotthard2", "BadPixelsFFGotthard2"]
+
+        # Retrieve metadata for all pnccd constants.
+        const_data = g2_cal.ndarray_map(constant_names)
+
+    # Validate the constants availability and raise/warn correspondingly.
+    for mod, calibrations in const_data.items():
+
+        dark_constants = {"LUTGotthard2"}
+        if offset_correction:
+            dark_constants |= {"OffsetGotthard2", "BadPixelsDarkGotthard2"}
+
+        missing_dark_constants = dark_constants - set(calibrations)
+        if missing_dark_constants:
+            karabo_da.remove(mod)
+            warning(f"Dark constants {missing_dark_constants} are not available to correct {mod}.")  # noqa
+
+        missing_gain_constants = {
+            "BadPixelsFFGotthard2", "RelativeGainGotthard2"} - set(calibrations)
+        if gain_correction and missing_gain_constants:
+            warning(f"Gain constants {missing_gain_constants} are not retrieved for mod {mod}."
+                    "Gain correction is disabled for this module")
+
+        # Create the mask array.
+        bpix = const_data[mod].get("BadPixelsDarkGotthard2")
+        if bpix is None:
+            bpix = np.zeros((1280, 2, 3), dtype=np.uint32)
+        if const_data[mod].get("BadPixelsFFGotthard2") is not None:
+            bpix |= const_data[mod]["BadPixelsFFGotthard2"]
        const_data[mod]["Mask"] = bpix

-        # Print timestamps for the retrieved constants.
-        print(f"Constants for module {mod}:")
-        for cname, ctime in when.items():
-            print(f"  {cname} injected at {ctime}")
-        del when
+        # Prepare empty arrays for missing constants.
+        if const_data[mod].get("OffsetGotthard2") is None:
+            const_data[mod]["OffsetGotthard2"] = np.zeros(
+                (1280, 2, 3), dtype=np.float32)
+
+        if const_data[mod].get("RelativeGainGotthard2") is None:
+            const_data[mod]["RelativeGainGotthard2"] = np.ones(
+                (1280, 2, 3), dtype=np.float32)
+        const_data[mod]["RelativeGainGotthard2"] = const_data[mod]["RelativeGainGotthard2"].astype(  # noqa
+            np.float32, copy=False)  # Old gain constants are not float32.
+
+if not karabo_da:
+    raise ValueError("Dark constants are not available for all modules.")
 ```

 %% Cell type:code id:23fcf7f4-351a-4df7-8829-d8497d94fecc tags:

 ``` python
 context = psh.ProcessContext(num_workers=23)
 ```

 %% Cell type:code id:daecd662-26d2-4cb8-aa70-383a579cf9f9 tags:

 ``` python
 def correct_train(wid, index, d):
    g = gain[index]
-    gotthard2algs.convert_to_10bit(d, const_data[mod]["LUT"], data_corr[index, ...])
+    gotthard2algs.convert_to_10bit(d, const_data[mod]["LUTGotthard2"], data_corr[index, ...])
    gotthard2algs.correct_train(
        data_corr[index, ...],
        mask[index, ...],
        g,
-        const_data[mod]["Offset"],
-        const_data[mod]["RelativeGain"].astype(np.float32, copy=False),
+        const_data[mod]["OffsetGotthard2"],
+        const_data[mod]["RelativeGainGotthard2"],
        const_data[mod]["Mask"],
        apply_offset=offset_correction,
        apply_gain=gain_correction,
    )
 ```

 %% Cell type:code id:f88c1aa6-a735-4b72-adce-b30162f5daea tags:

 ``` python
 for mod in karabo_da:
    # This is used in case receiver template consists of
    # karabo data aggregator index. e.g. detector at DETLAB
    instr_mod_src = instrument_src.format(mod[-2:])
    data_path = "INSTRUMENT/" + instr_mod_src + "/data"
    for raw_file in seq_files:
        step_timer.start()

        dc = H5File(raw_file)
        out_file = out_folder / raw_file.name.replace("RAW", "CORR")

        # Select module INSTRUMENT source and deselect empty trains.
        dc = dc.select(instr_mod_src, require_all=True)
        data = dc[instr_mod_src, "data.adc"].ndarray()
        gain = dc[instr_mod_src, "data.gain"].ndarray()
        step_timer.done_step("preparing raw data")
        dshape = data.shape

        step_timer.start()

        # Allocate shared arrays.
        data_corr = context.alloc(shape=dshape, dtype=np.float32)
        mask = context.alloc(shape=dshape, dtype=np.uint32)
        context.map(correct_train, data)
        step_timer.done_step("Correcting one sequence file")

        step_timer.start()

        # Provided PSI gain map has 0 values. Set inf values to nan.
        # TODO: This can maybe be removed after creating XFEL gain maps.?
        data_corr[np.isinf(data_corr)] = np.nan
-        # Create CORR files and add corrected data sources.
-        # Exclude raw data images (data/adc)
-        with h5py.File(out_file, "w") as ofile:
-            # Copy RAW non-calibrated sources.
-            with h5py.File(raw_file, "r") as sfile:
-                h5_copy_except.h5_copy_except_paths(sfile, ofile, [f"{data_path}/adc"])
-            # Create datasets with the available corrected data
-            ddset = ofile.create_dataset(
-                f"{data_path}/adc",
-                data=data_corr,
-                chunks=((1,) + dshape[1:]),  # 1 chunk == 1 image
-                dtype=np.float32,
+
+        # Create CORR files and add corrected data sections.
+        image_counts = dc[instrument_src, "data.adc"].data_counts(labelled=False)
+
+        with DataFile(out_file, "w") as ofile:
+            # Create INDEX datasets.
+            ofile.create_index(dc.train_ids, from_file=dc.files[0])
+            # Create METDATA datasets
+            ofile.create_metadata(
+                like=dc,
+                sequence=dc.run_metadata()["sequenceNumber"],
+                instrument_channels=(f"{instrument_src}/data",)
            )
+
+            # Create Instrument section to later add corrected datasets.
+            outp_source = ofile.create_instrument_source(instrument_src)
+
+            # Create count/first datasets at INDEX source.
+            outp_source.create_index(data=image_counts)
+
+            # Store uncorrected trainId in the corrected file.
+            outp_source.create_key(
+                    f"data.trainId", data=dc.train_ids,
+                    chunks=min(50, len(dc.train_ids))
+                )
+
            # Create datasets with the available corrected data
-            ddset = ofile.create_dataset(
-                f"{data_path}/mask",
-                data=mask,
-                chunks=((1,) + dshape[1:]),  # 1 chunk == 1 image
-                dtype=np.uint32,
-                compression="gzip",
-                compression_opts=1,
-                shuffle=True,
+            for field_name, field_data in {
+                "adc": data_corr,
+                "gain": gain,
+            }.items():
+                outp_source.create_key(
+                    f"data.{field_name}", data=field_data,
+                    chunks=((chunks_data,) + data_corr.shape[1:])
+            )
+
+            for field in ["bunchId", "memoryCell", "frameNumber", "timestamp"]:
+                outp_source.create_key(
+                    f"data.{field}", data=dc[instr_mod_src, f"data.{field}"].ndarray(),
+                    chunks=(chunks_data, data_corr.shape[1])
            )
+            outp_source.create_compressed_key(f"data.mask", data=mask)
+
        step_timer.done_step("Storing data")
 ```

 %% Cell type:code id:94b8e4d2-9f8c-4c23-a509-39238dd8435c tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id:0ccc7f7e-2a3f-4ac0-b854-7d505410d2fd tags:

 ``` python
 if skip_plots:
    print("Skipping plots")
    import sys

    sys.exit(0)
 ```

 %% Cell type:code id:ff203f77-3811-46f3-bf7d-226d2dcab13f tags:

 ``` python
 mod_dcs = {}
 first_seq_raw = seq_files[0]
 first_seq_corr = out_folder / first_seq_raw.name.replace("RAW", "CORR")
 for mod in karabo_da:
    mod_dcs[mod] = {}
    with H5File(first_seq_corr) as out_dc:
        tid, mod_dcs[mod]["train_corr_data"] = next(
            out_dc[instr_mod_src, "data.adc"].trains()
        )
    with H5File(first_seq_raw) as in_dc:
        train_dict = in_dc.train_from_id(tid)[1][instr_mod_src]
        mod_dcs[mod]["train_raw_data"] = train_dict["data.adc"]
        mod_dcs[mod]["train_raw_gain"] = train_dict["data.gain"]
 ```

-%% Cell type:code id:494b453a tags:
-
-``` python
-# Keep as long as it is essential to correct
-# RAW data (FXE p003225) before the data mapping was added to CALCAT.
-try:
-    db_modules = get_pdu_from_db(
-        karabo_id=karabo_id,
-        karabo_da=karabo_da,
-        constant=Constants.jungfrau.Offset(),
-        condition=condition,
-        cal_db_interface=cal_db_interface,
-        snapshot_at=creation_time,
-    )
-except RuntimeError:
-    print(
-        "No Physical detector units found for this"
-        " detector mapping at the RAW data creation time."
-    )
-    db_modules = [None] * len(karabo_da)
-```
-
 %% Cell type:code id:1b379438-eb1d-42b2-ac83-eb8cf88c46db tags:

 ``` python
 display(Markdown("### Mean RAW and CORRECTED across pulses for one train:"))
 display(Markdown(f"Train: {tid}"))

 step_timer.start()
 for mod, pdu in zip(karabo_da, db_modules):

    fig, ax = plt.subplots(figsize=(20, 10))
    raw_data = mod_dcs[mod]["train_raw_data"]
    im = ax.plot(np.mean(raw_data, axis=0))
    ax.set_title(f"RAW module {mod} ({pdu})")
    ax.set_xlabel("Strip #", size=20)
    ax.set_ylabel("12-bit ADC output", size=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    pass

    fig, ax = plt.subplots(figsize=(20, 10))
    corr_data = mod_dcs[mod]["train_corr_data"]
    im = ax.plot(np.mean(corr_data, axis=0))
    ax.set_title(f"CORRECTED module {mod} ({pdu})")
    ax.set_xlabel("Strip #", size=20)
    ax.set_ylabel("10-bit KeV. output", size=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    pass
 step_timer.done_step("Plotting mean data")
 ```

 %% Cell type:code id:58a6a276 tags:

 ``` python
 display(Markdown(f"### RAW and CORRECTED strips across pulses for train {tid}"))

 step_timer.start()
 for mod, pdu in zip(karabo_da, db_modules):
    for plt_data, dname in zip(
        ["train_raw_data", "train_corr_data"], ["RAW", "CORRECTED"]
    ):
        fig, ax = plt.subplots(figsize=(15, 20))
        plt.rcParams.update({"font.size": 20})

        heatmapPlot(
            mod_dcs[mod][plt_data],
            y_label="Pulses",
            x_label="Strips",
            title=f"{dname} module {mod} ({pdu})",
            use_axis=ax,
        )
        pass
 step_timer.done_step("Plotting RAW and CORRECTED data for one train")
 ```

 %% Cell type:code id:cd8f5e08-fcee-4bff-ba63-6452b3d892a2 tags:

 ``` python
 # Validate given "pulse_idx_preview"

 if pulse_idx_preview + 1 > data.shape[1]:
    print(
        f"WARNING: selected pulse_idx_preview {pulse_idx_preview} is not available in data."
        " Previewing 1st pulse."
    )
    pulse_idx_preview = 1

 if data.shape[1] == 1:
    odd_pulse = 1
    even_pulse = None
 else:
    odd_pulse = pulse_idx_preview if pulse_idx_preview % 2 else pulse_idx_preview + 1
    even_pulse = (
        pulse_idx_preview if not (pulse_idx_preview % 2) else pulse_idx_preview + 1
    )

 if pulse_idx_preview + 1 > data.shape[1]:
    pulse_idx_preview = 1
    if data.shape[1] > 1:
        pulse_idx_preview = 2
 ```

 %% Cell type:code id:e5f0d4d8-e32c-4f2c-8469-4ebbfd3f644c tags:

 ``` python
 display(Markdown("### RAW and CORRECTED even/odd pulses for one train:"))
 display(Markdown(f"Train: {tid}"))
 for mod, pdu in zip(karabo_da, db_modules):
    fig, ax = plt.subplots(figsize=(20, 20))
    raw_data = mod_dcs[mod]["train_raw_data"]
    corr_data = mod_dcs[mod]["train_corr_data"]

    ax.plot(raw_data[odd_pulse], label=f"Odd Pulse {odd_pulse}")
    if even_pulse:
        ax.plot(raw_data[even_pulse], label=f"Even Pulse {even_pulse}")

    ax.set_title(f"RAW module {mod} ({pdu})")
    ax.set_xlabel("Strip #", size=20)
    ax.set_ylabel("12-bit ADC RAW", size=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    ax.legend()
    pass

    fig, ax = plt.subplots(figsize=(20, 20))
    ax.plot(corr_data[odd_pulse], label=f"Odd Pulse {odd_pulse}")
    if even_pulse:
        ax.plot(corr_data[even_pulse], label=f"Even Pulse {even_pulse}")
    ax.set_title(f"CORRECTED module {mod} ({pdu})")
    ax.set_xlabel("Strip #", size=20)
    ax.set_ylabel("10-bit KeV CORRECTED", size=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    ax.legend()
    pass
 step_timer.done_step("Plotting RAW and CORRECTED odd/even pulses.")
 ```

--- a/notebooks/Gotthard2/Gotthard2_retrieve_constants_precorrection_NBC.ipynb
+++ b/notebooks/Gotthard2/Gotthard2_retrieve_constants_precorrection_NBC.ipynb
 %% Cell type:markdown id: tags:

 # GOTTHARD2 Retrieving Constants Pre-correction #

 Author: European XFEL Detector Group, Version: 1.0

 Retrieving Required Constants for Offline Calibration of the Gotthard2 Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/FXE/202221/p003225/raw"  # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/gotthard2"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run = 50  # run to process, required

 # Parameters used to access raw data.
 karabo_id = "FXE_XAD_G2XES"  # karabo prefix of Gotthard-II devices
 karabo_da = ["GH201"]  # data aggregators
 receiver_template = "RECEIVER"  # receiver template used to read INSTRUMENT keys.
 control_template = "CONTROL"  # control template used to read CONTROL keys.
 instrument_source_template = "{}/DET/{}:daqOutput"  # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = "{}/DET/{}"  # template for control source name (filled with karabo_id_control)
 karabo_id_control = ""  # Control karabo ID. Set to empty string to use the karabo-id

 # Parameters for calibration database.
-use_dir_creation_date = True  # use the creation data of the input dir for database queries.
-cal_db_interface = "tcp://max-exfl017:8017#8025"  # the database interface to use.
+cal_db_interface = "tcp://max-exfl016:8017#8025"  # the database interface to use.
 cal_db_timeout = 180000  # timeout on caldb requests.
-overwrite_creation_time = "2022-06-28 13:00:00.00"  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC.00 e.g. "2022-06-28 13:00:00.00"
+creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"

 # Parameters affecting corrected data.
-constants_file = ""#/gpfs/exfel/data/scratch/ahmedk/dont_remove/gotthard2/constants/calibration_constants_GH2.h5"  # Retrieve constants from local.
+constants_file = ""  # /gpfs/exfel/data/scratch/ahmedk/dont_remove/gotthard2/constants/calibration_constants_GH2.h5"  # Retrieve constants from local.
 offset_correction = True  # apply offset correction. This can be disabled to only apply LUT or apply LUT and gain correction for non-linear differential results.
 gain_correction = True  # apply gain correction.

 # Parameter conditions.
 bias_voltage = -1  # Detector bias voltage, set to -1 to use value in raw file.
 exposure_time = -1.  # Detector exposure time, set to -1 to use value in raw file.
 exposure_period = -1.  # Detector exposure period, set to -1 to use value in raw file.
-acquisition_rate = 1.1  # Detector acquisition rate (1.1/4.5), set to -1 to use value in raw file.
-single_photon = 0  # Detector single photon mode (High/Low CDS), set to -1 to use value in raw file.
+acquisition_rate = -1.  # Detector acquisition rate (1.1/4.5), set to -1 to use value in raw file.
+single_photon = -1  # Detector single photon mode (High/Low CDS), set to -1 to use value in raw file.

 if constants_file:
    print("Skipping constant retrieval. Specified constants_file is used.")
    import sys

    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
-import datetime
-from functools import partial
+from logging import warning

-import multiprocessing
 from extra_data import RunDirectory
 from pathlib import Path

+import cal_tools.restful_config as rest_cfg
+from cal_tools.calcat_interface import GOTTHARD2_CalibrationData
 from cal_tools.gotthard2 import gotthard2lib
 from cal_tools.tools import (
-    get_dir_creation_date,
-    get_from_db,
+    calcat_creation_time,
    CalibrationMetadata,
 )
-from iCalibrationDB import Conditions, Constants
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 run_folder = in_folder / f"r{run:04d}"
 out_folder = Path(out_folder)
 out_folder.mkdir(parents=True, exist_ok=True)
 metadata = CalibrationMetadata(metadata_folder or out_folder)
+# Constant paths are saved under retrieved-constants in calibration_metadata.yml
+retrieved_constants = metadata.setdefault("retrieved-constants", {})

 if not karabo_id_control:
    karabo_id_control = karabo_id

 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 ctrl_src = ctrl_source_template.format(karabo_id_control, control_template)

 print(f"Retrieve constants for modules: {karabo_da} for run {run}")

-creation_time = None
-if overwrite_creation_time:
-    creation_time = datetime.datetime.strptime(
-        overwrite_creation_time, "%Y-%m-%d %H:%M:%S.%f"
-    )
-elif use_dir_creation_date:
-    creation_time = get_dir_creation_date(in_folder, run)
-    print(f"Using {creation_time} as creation time")
+# Run's creation time:
+creation_time = calcat_creation_time(in_folder, run, creation_time)
+print(f"Creation time: {creation_time}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read slow data
 run_dc = RunDirectory(run_folder)
 g2ctrl = gotthard2lib.Gotthard2Ctrl(run_dc=run_dc, ctrl_src=ctrl_src)

 if bias_voltage == -1:
    bias_voltage = g2ctrl.get_bias_voltage()
 if exposure_time == -1:
    exposure_time = g2ctrl.get_exposure_time()
 if exposure_period == -1:
    exposure_period = g2ctrl.get_exposure_period()
 if acquisition_rate == -1:
    acquisition_rate = g2ctrl.get_acquisition_rate()
 if single_photon == -1:
    single_photon = g2ctrl.get_single_photon()

 print("Bias Voltage:", bias_voltage)
 print("Exposure Time:", exposure_time)
 print("Exposure Period:", exposure_period)
 print("Acquisition Rate:", acquisition_rate)
 print("Single Photon:", single_photon)
 ```

 %% Cell type:code id: tags:

 ``` python
-condition = Conditions.Dark.Gotthard2(
-    bias_voltage=bias_voltage,
+g2_cal = GOTTHARD2_CalibrationData(
+    detector_name=karabo_id,
+    sensor_bias_voltage=bias_voltage,
    exposure_time=exposure_time,
    exposure_period=exposure_period,
-    single_photon=single_photon,
    acquisition_rate=acquisition_rate,
+    single_photon=single_photon,
+    event_at=creation_time,
+    client=rest_cfg.calibration_client(),
 )

-def get_constants_for_module(mod: str):
-    """Get calibration constants for given module for Gotthard2."""
-    retrieval_function = partial(
-        get_from_db,
-        karabo_id=karabo_id,
-        karabo_da=mod,
-        cal_db_interface=cal_db_interface,
-        creation_time=creation_time,
-        timeout=cal_db_timeout,
-        verbosity=1,
-        meta_only=True,
-        load_data=False,
-        empty_constant=None
-    )
-
-    mdata_dict = dict()
-    mdata_dict["constants"] = dict()
-    constants = [
-        "LUT", "Offset", "BadPixelsDark",
-        "RelativeGain", "BadPixelsFF",
-    ]
-    for cname in constants:
-        mdata_dict["constants"][cname] = dict()
-        if not gain_correction and cname in ["BadPixelsFF", "RelativeGain"]:
-            continue
-        _, mdata = retrieval_function(
-            condition=condition,
-            constant=getattr(Constants.Gotthard2, cname)(),
-        )
-        mdata_const = mdata.calibration_constant_version
-        const_mdata = mdata_dict["constants"][cname]
-        # check if constant was successfully retrieved.
-        if mdata.comm_db_success:
-            const_mdata["file-path"] = (
-                f"{mdata_const.hdf5path}" f"{mdata_const.filename}"
-            )
-            const_mdata["dataset-name"] = mdata_const.h5path
-            const_mdata["creation-time"] = f"{mdata_const.begin_at}"
-            mdata_dict["physical-detector-unit"] = mdata_const.device_name
-        else:
-            const_mdata["file-path"] = None
-            const_mdata["creation-time"] = None
-    return mdata_dict, mod
+mdata_dict = {"constants": dict()}

-with multiprocessing.Pool() as pool:
-    results = pool.map(get_constants_for_module, karabo_da)
+constant_names = ["LUTGotthard2", "OffsetGotthard2", "BadPixelsDarkGotthard2"]
+if gain_correction:
+    constant_names += ["RelativeGainGotthard2", "BadPixelsFFGotthard2"]
+
+# Retrieve metadata for all pnccd constants.
+g2_metadata = g2_cal.metadata(constant_names)
+
+missing_dark_modules = set()
+# Validate the constants availability and raise/warn correspondingly.
+for mod, ccv_dict in g2_metadata.items():
+
+    dark_constants = {"LUTGotthard2"}
+    if offset_correction:
+        dark_constants |= {"OffsetGotthard2", "BadPixelsDarkGotthard2"}
+    missing_dark_constants = dark_constants - set(ccv_dict)
+
+    if missing_dark_constants:
+        warning(f"Dark constants {missing_dark_constants} are not available to correct {mod}")
+        missing_dark_modules.add(mod)
+
+    missing_gain_constants = {"BadPixelsFFGotthard2", "RelativeGainGotthard2"} - set(ccv_dict)
+    if gain_correction and missing_gain_constants:
+        warning(f"Gain constants {missing_gain_constants} are not retrieved for {mod}")
+
+if missing_dark_modules == set(karabo_da):
+    raise ValueError(f"{missing_dark_constants} constants are not available for all modules.")
+
+# Add constants metadata in retrieved_constants dict.
+for mod, ccv_dict in g2_metadata.items():
+    mod_dict = retrieved_constants.setdefault(mod, dict())
+    const_dict = mod_dict.setdefault("constants", dict())
+    for cname, ccv_metadata in ccv_dict.items():
+        const_dict[cname] = {
+                "path": str(g2_cal.caldb_root / ccv_metadata["path"]),
+                "dataset": ccv_metadata["dataset"],
+                "creation-time": ccv_metadata["begin_validity_at"],
+                "ccv_id": ccv_metadata["ccv_id"],
+            }
+    mod_dict["physical-name"] = ccv_metadata["physical_name"]
+
+print(f"Stored retrieved constants in {metadata.filename}")
 ```

 %% Cell type:code id: tags:

 ``` python
-# Constant paths are saved under retrieved-constants in calibration_metadata.yml
-retrieved_constants = metadata.setdefault("retrieved-constants", {})
 timestamps = dict()

-for md_dict, mod in results:
-    if mod in retrieved_constants:
-        print(f"Constant for {mod} already in calibration_metadata.yml, won't query again.")  # noqa
-        continue
-    retrieved_constants[mod] = md_dict
+for mod in karabo_da:
    module_timestamps = timestamps[mod] = dict()
-
+    module_constants = retrieved_constants[mod]
    print(f"Module: {mod}:")
-    for cname, mdata in md_dict["constants"].items():
-        if hasattr(mdata["creation-time"], 'strftime'):
-            mdata["creation-time"] = mdata["creation-time"].strftime('%y-%m-%d %H:%M')
+    for cname, mdata in module_constants["constants"].items():
        print(f'{cname:.<12s}', mdata["creation-time"])

-    for cname in ["Offset", "BadPixelsDark", "RelativeGain", "BadPixelsFF"]:
-        if cname in md_dict["constants"]:
-            module_timestamps[cname] = md_dict["constants"][cname]["creation-time"]
+    for cname in ["OffsetGotthard2", "BadPixelsDarkGotthard2", "RelativeGainGotthard2", "BadPixelsFFGotthard2"]:
+        if cname in module_constants["constants"]:
+            module_timestamps[cname] = module_constants["constants"][cname]["creation-time"]
        else:
            module_timestamps[cname] = "NA"

-time_summary = retrieved_constants.setdefault("time-summary", {})
-time_summary["SAll"] = timestamps
+retrieved_constants["time-summary"] = timestamps

 metadata.save()
 ```

 %% Cell type:markdown id: tags:

 # GOTTHARD2 Retrieving Constants Pre-correction #

 Author: European XFEL Detector Group, Version: 1.0

 Retrieving Required Constants for Offline Calibration of the Gotthard2 Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/FXE/202221/p003225/raw"  # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/gotthard2"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run = 50  # run to process, required

 # Parameters used to access raw data.
 karabo_id = "FXE_XAD_G2XES"  # karabo prefix of Gotthard-II devices
 karabo_da = ["GH201"]  # data aggregators
 receiver_template = "RECEIVER"  # receiver template used to read INSTRUMENT keys.
 control_template = "CONTROL"  # control template used to read CONTROL keys.
 instrument_source_template = "{}/DET/{}:daqOutput"  # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = "{}/DET/{}"  # template for control source name (filled with karabo_id_control)
 karabo_id_control = ""  # Control karabo ID. Set to empty string to use the karabo-id

 # Parameters for calibration database.
-use_dir_creation_date = True  # use the creation data of the input dir for database queries.
-cal_db_interface = "tcp://max-exfl017:8017#8025"  # the database interface to use.
+cal_db_interface = "tcp://max-exfl016:8017#8025"  # the database interface to use.
 cal_db_timeout = 180000  # timeout on caldb requests.
-overwrite_creation_time = "2022-06-28 13:00:00.00"  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC.00 e.g. "2022-06-28 13:00:00.00"
+creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"

 # Parameters affecting corrected data.
-constants_file = ""#/gpfs/exfel/data/scratch/ahmedk/dont_remove/gotthard2/constants/calibration_constants_GH2.h5"  # Retrieve constants from local.
+constants_file = ""  # /gpfs/exfel/data/scratch/ahmedk/dont_remove/gotthard2/constants/calibration_constants_GH2.h5"  # Retrieve constants from local.
 offset_correction = True  # apply offset correction. This can be disabled to only apply LUT or apply LUT and gain correction for non-linear differential results.
 gain_correction = True  # apply gain correction.

 # Parameter conditions.
 bias_voltage = -1  # Detector bias voltage, set to -1 to use value in raw file.
 exposure_time = -1.  # Detector exposure time, set to -1 to use value in raw file.
 exposure_period = -1.  # Detector exposure period, set to -1 to use value in raw file.
-acquisition_rate = 1.1  # Detector acquisition rate (1.1/4.5), set to -1 to use value in raw file.
-single_photon = 0  # Detector single photon mode (High/Low CDS), set to -1 to use value in raw file.
+acquisition_rate = -1.  # Detector acquisition rate (1.1/4.5), set to -1 to use value in raw file.
+single_photon = -1  # Detector single photon mode (High/Low CDS), set to -1 to use value in raw file.

 if constants_file:
    print("Skipping constant retrieval. Specified constants_file is used.")
    import sys

    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
-import datetime
-from functools import partial
+from logging import warning

-import multiprocessing
 from extra_data import RunDirectory
 from pathlib import Path

+import cal_tools.restful_config as rest_cfg
+from cal_tools.calcat_interface import GOTTHARD2_CalibrationData
 from cal_tools.gotthard2 import gotthard2lib
 from cal_tools.tools import (
-    get_dir_creation_date,
-    get_from_db,
+    calcat_creation_time,
    CalibrationMetadata,
 )
-from iCalibrationDB import Conditions, Constants
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 run_folder = in_folder / f"r{run:04d}"
 out_folder = Path(out_folder)
 out_folder.mkdir(parents=True, exist_ok=True)
 metadata = CalibrationMetadata(metadata_folder or out_folder)
+# Constant paths are saved under retrieved-constants in calibration_metadata.yml
+retrieved_constants = metadata.setdefault("retrieved-constants", {})

 if not karabo_id_control:
    karabo_id_control = karabo_id

 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 ctrl_src = ctrl_source_template.format(karabo_id_control, control_template)

 print(f"Retrieve constants for modules: {karabo_da} for run {run}")

-creation_time = None
-if overwrite_creation_time:
-    creation_time = datetime.datetime.strptime(
-        overwrite_creation_time, "%Y-%m-%d %H:%M:%S.%f"
-    )
-elif use_dir_creation_date:
-    creation_time = get_dir_creation_date(in_folder, run)
-    print(f"Using {creation_time} as creation time")
+# Run's creation time:
+creation_time = calcat_creation_time(in_folder, run, creation_time)
+print(f"Creation time: {creation_time}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read slow data
 run_dc = RunDirectory(run_folder)
 g2ctrl = gotthard2lib.Gotthard2Ctrl(run_dc=run_dc, ctrl_src=ctrl_src)

 if bias_voltage == -1:
    bias_voltage = g2ctrl.get_bias_voltage()
 if exposure_time == -1:
    exposure_time = g2ctrl.get_exposure_time()
 if exposure_period == -1:
    exposure_period = g2ctrl.get_exposure_period()
 if acquisition_rate == -1:
    acquisition_rate = g2ctrl.get_acquisition_rate()
 if single_photon == -1:
    single_photon = g2ctrl.get_single_photon()

 print("Bias Voltage:", bias_voltage)
 print("Exposure Time:", exposure_time)
 print("Exposure Period:", exposure_period)
 print("Acquisition Rate:", acquisition_rate)
 print("Single Photon:", single_photon)
 ```

 %% Cell type:code id: tags:

 ``` python
-condition = Conditions.Dark.Gotthard2(
-    bias_voltage=bias_voltage,
+g2_cal = GOTTHARD2_CalibrationData(
+    detector_name=karabo_id,
+    sensor_bias_voltage=bias_voltage,
    exposure_time=exposure_time,
    exposure_period=exposure_period,
-    single_photon=single_photon,
    acquisition_rate=acquisition_rate,
+    single_photon=single_photon,
+    event_at=creation_time,
+    client=rest_cfg.calibration_client(),
 )

-def get_constants_for_module(mod: str):
-    """Get calibration constants for given module for Gotthard2."""
-    retrieval_function = partial(
-        get_from_db,
-        karabo_id=karabo_id,
-        karabo_da=mod,
-        cal_db_interface=cal_db_interface,
-        creation_time=creation_time,
-        timeout=cal_db_timeout,
-        verbosity=1,
-        meta_only=True,
-        load_data=False,
-        empty_constant=None
-    )
-
-    mdata_dict = dict()
-    mdata_dict["constants"] = dict()
-    constants = [
-        "LUT", "Offset", "BadPixelsDark",
-        "RelativeGain", "BadPixelsFF",
-    ]
-    for cname in constants:
-        mdata_dict["constants"][cname] = dict()
-        if not gain_correction and cname in ["BadPixelsFF", "RelativeGain"]:
-            continue
-        _, mdata = retrieval_function(
-            condition=condition,
-            constant=getattr(Constants.Gotthard2, cname)(),
-        )
-        mdata_const = mdata.calibration_constant_version
-        const_mdata = mdata_dict["constants"][cname]
-        # check if constant was successfully retrieved.
-        if mdata.comm_db_success:
-            const_mdata["file-path"] = (
-                f"{mdata_const.hdf5path}" f"{mdata_const.filename}"
-            )
-            const_mdata["dataset-name"] = mdata_const.h5path
-            const_mdata["creation-time"] = f"{mdata_const.begin_at}"
-            mdata_dict["physical-detector-unit"] = mdata_const.device_name
-        else:
-            const_mdata["file-path"] = None
-            const_mdata["creation-time"] = None
-    return mdata_dict, mod
+mdata_dict = {"constants": dict()}

-with multiprocessing.Pool() as pool:
-    results = pool.map(get_constants_for_module, karabo_da)
+constant_names = ["LUTGotthard2", "OffsetGotthard2", "BadPixelsDarkGotthard2"]
+if gain_correction:
+    constant_names += ["RelativeGainGotthard2", "BadPixelsFFGotthard2"]
+
+# Retrieve metadata for all pnccd constants.
+g2_metadata = g2_cal.metadata(constant_names)
+
+missing_dark_modules = set()
+# Validate the constants availability and raise/warn correspondingly.
+for mod, ccv_dict in g2_metadata.items():
+
+    dark_constants = {"LUTGotthard2"}
+    if offset_correction:
+        dark_constants |= {"OffsetGotthard2", "BadPixelsDarkGotthard2"}
+    missing_dark_constants = dark_constants - set(ccv_dict)
+
+    if missing_dark_constants:
+        warning(f"Dark constants {missing_dark_constants} are not available to correct {mod}")
+        missing_dark_modules.add(mod)
+
+    missing_gain_constants = {"BadPixelsFFGotthard2", "RelativeGainGotthard2"} - set(ccv_dict)
+    if gain_correction and missing_gain_constants:
+        warning(f"Gain constants {missing_gain_constants} are not retrieved for {mod}")
+
+if missing_dark_modules == set(karabo_da):
+    raise ValueError(f"{missing_dark_constants} constants are not available for all modules.")
+
+# Add constants metadata in retrieved_constants dict.
+for mod, ccv_dict in g2_metadata.items():
+    mod_dict = retrieved_constants.setdefault(mod, dict())
+    const_dict = mod_dict.setdefault("constants", dict())
+    for cname, ccv_metadata in ccv_dict.items():
+        const_dict[cname] = {
+                "path": str(g2_cal.caldb_root / ccv_metadata["path"]),
+                "dataset": ccv_metadata["dataset"],
+                "creation-time": ccv_metadata["begin_validity_at"],
+                "ccv_id": ccv_metadata["ccv_id"],
+            }
+    mod_dict["physical-name"] = ccv_metadata["physical_name"]
+
+print(f"Stored retrieved constants in {metadata.filename}")
 ```

 %% Cell type:code id: tags:

 ``` python
-# Constant paths are saved under retrieved-constants in calibration_metadata.yml
-retrieved_constants = metadata.setdefault("retrieved-constants", {})
 timestamps = dict()

-for md_dict, mod in results:
-    if mod in retrieved_constants:
-        print(f"Constant for {mod} already in calibration_metadata.yml, won't query again.")  # noqa
-        continue
-    retrieved_constants[mod] = md_dict
+for mod in karabo_da:
    module_timestamps = timestamps[mod] = dict()
-
+    module_constants = retrieved_constants[mod]
    print(f"Module: {mod}:")
-    for cname, mdata in md_dict["constants"].items():
-        if hasattr(mdata["creation-time"], 'strftime'):
-            mdata["creation-time"] = mdata["creation-time"].strftime('%y-%m-%d %H:%M')
+    for cname, mdata in module_constants["constants"].items():
        print(f'{cname:.<12s}', mdata["creation-time"])

-    for cname in ["Offset", "BadPixelsDark", "RelativeGain", "BadPixelsFF"]:
-        if cname in md_dict["constants"]:
-            module_timestamps[cname] = md_dict["constants"][cname]["creation-time"]
+    for cname in ["OffsetGotthard2", "BadPixelsDarkGotthard2", "RelativeGainGotthard2", "BadPixelsFFGotthard2"]:
+        if cname in module_constants["constants"]:
+            module_timestamps[cname] = module_constants["constants"][cname]["creation-time"]
        else:
            module_timestamps[cname] = "NA"

-time_summary = retrieved_constants.setdefault("time-summary", {})
-time_summary["SAll"] = timestamps
+retrieved_constants["time-summary"] = timestamps

 metadata.save()
 ```

--- a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Jungfrau Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the Jungfrau Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202130/p900204/raw"  # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/remove"  # the folder to output to, required
 run = 91  # run to process, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1]  # sequences to correct, set to [-1] for all, range allowed
 sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel

 # Parameters used to access raw data.
 karabo_id = "SPB_IRDA_JF4M"  # karabo prefix of Jungfrau devices
 karabo_da = ['JNGFR01', 'JNGFR02', 'JNGFR03', 'JNGFR04', 'JNGFR05', 'JNGFR06', 'JNGFR07', 'JNGFR08']  # data aggregators
 receiver_template = "JNGFR{:02d}"  # Detector receiver template for accessing raw data files. e.g. "JNGFR{:02d}"
 instrument_source_template = '{}/DET/{}:daqOutput'  # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = '{}/DET/CONTROL'  # template for control source name (filled with karabo_id_control)
 karabo_id_control = ""  # if control is on a different ID, set to empty string if it is the same a karabo-id

 # Parameters for calibration database.
 use_dir_creation_date = True  # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8017#8025" # the database interface to use
 cal_db_timeout = 180000  # timeout on caldb requests

 # Parameters affecting corrected data.
 relative_gain = True  # do relative gain correction.
 strixel_sensor = False  # reordering for strixel detector layout.
 strixel_double_norm = 2.0  # normalization to use for double-size pixels, only applied for strixel sensors.
 limit_trains = 0  # ONLY FOR TESTING. process only first N trains, Use 0 to process all.
 chunks_ids = 32  # HDF chunk size for memoryCell and frameNumber.
 chunks_data = 1  # HDF chunk size for pixel data in number of frames.

 # Parameters for retrieving calibration constants
 manual_slow_data = False  # if true, use manually entered bias_voltage, integration_time, gain_setting, and gain_mode values
 integration_time = 4.96  # integration time in us, will be overwritten by value in file
 gain_setting = 0  # 0 for dynamic gain, 1 for dynamic HG0, will be overwritten by value in file
 gain_mode = 0  # 0 for runs with dynamic gain setting, 1 for fixgain. It will be overwritten by value in file, if manual_slow_data is set to True.
 mem_cells = -1  # Set mem_cells to -1 to automatically use the value stored in RAW data.
 bias_voltage = 180  # will be overwritten by value in file

 # Parameters for plotting
 skip_plots = False  # exit after writing corrected files
 plot_trains = 500  # Number of trains to plot for RAW and CORRECTED plots. Set to -1 to automatically plot all trains.
 cell_id_preview = 15  # cell Id used for preview in single-shot plots

 # Parameters for ROI selection and reduction
 roi_definitions = [-1]  # List with groups of 6 values defining ROIs, e.g. [3, 120, 180, 200, 550, -2] for module 3 (JNGFR03), slice 120:180, 200:550, average along axis -2 (slow scan, or -1 for fast scan)

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
+import fnmatch
 import multiprocessing
 import sys
 import warnings
 from functools import partial
 from logging import warning
 from pathlib import Path

 import h5py
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
 import pasha as psh
 import tabulate
 from IPython.display import Latex, Markdown, display
-from extra_data import H5File, RunDirectory, by_id, components
+from extra_data import DataCollection, H5File, RunDirectory, by_id, components
 from extra_geom import JUNGFRAUGeometry
 from matplotlib.colors import LogNorm

 from cal_tools import h5_copy_except
 from cal_tools.jungfraulib import JungfrauCtrl
 from cal_tools.enums import BadPixels
 from cal_tools.files import DataFile
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    get_constant_from_db_and_time,
    get_dir_creation_date,
    get_pdu_from_db,
    map_seq_files,
    CalibrationMetadata,
 )
 from iCalibrationDB import Conditions, Constants

 warnings.filterwarnings('ignore')

 matplotlib.use('agg')
 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_folder = in_folder / f'r{run:04d}'
 run_dc = RunDirectory(run_folder)
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)

 out_folder.mkdir(parents=True, exist_ok=True)

 print(f"Run is: {run}")
 print(f"Instrument H5File source: {instrument_src}")
 print(f"Process modules: {karabo_da}")

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time} as creation time")

 if karabo_id_control == "":
    karabo_id_control = karabo_id

 if any(axis_no not in {-2, -1, 2, 3} for axis_no in roi_definitions[5::6]):
    print("ROI averaging must be on axis 2/3 (or equivalently -2/-1). "
          f"Axis numbers given: {roi_definitions[5::6]}")
    sys.exit(1)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read available sequence files to correct.
 mapped_files, num_seq_files = map_seq_files(
    run_folder, karabo_da, sequences)

 if not len(mapped_files):
    raise IndexError(
        "No sequence files available to correct for the selected sequences and karabo_da.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Processing a total of {num_seq_files} sequence files")
 table = []
 fi = 0
 for kda, sfiles in mapped_files.items():
    for k, f in enumerate(sfiles):
        if k == 0:
            table.append((fi, kda, k, f))
        else:
            table.append((fi, "", k,  f))
        fi += 1
 md = display(Latex(tabulate.tabulate(
    table, tablefmt='latex',
    headers=["#", "module", "# module", "file"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 ctrl_src = ctrl_source_template.format(karabo_id_control)
 ctrl_data = JungfrauCtrl(run_dc, ctrl_src)

 if mem_cells < 0:
    memory_cells, sc_start = ctrl_data.get_memory_cells()

    mem_cells_name = "single cell" if memory_cells == 1 else "burst"
    print(f"Run is in {mem_cells_name} mode.\nStorage cell start: {sc_start:02d}")
 else:
    memory_cells = mem_cells
    mem_cells_name = "single cell" if memory_cells == 1 else "burst"
    print(f"Run is in manually set to {mem_cells_name} mode. With {memory_cells} memory cells")

 if not manual_slow_data:
    integration_time = ctrl_data.get_integration_time()
    bias_voltage = ctrl_data.get_bias_voltage()
    gain_setting = ctrl_data.get_gain_setting()
    gain_mode = ctrl_data.get_gain_mode()

 print(f"Integration time is {integration_time} us")
 print(f"Gain setting is {gain_setting} (run settings: {ctrl_data.run_settings})")
 print(f"Gain mode is {gain_mode} ({ctrl_data.run_mode})")
 print(f"Bias voltage is {bias_voltage} V")
 print(f"Number of memory cells are {memory_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 if strixel_sensor:
    from cal_tools.jfstrixel import STRIXEL_SHAPE as strixel_frame_shape, double_pixel_indices, to_strixel
    Ydouble, Xdouble = double_pixel_indices()
    print('Strixel sensor transformation enabled')
 ```

 %% Cell type:markdown id: tags:

 ### Retrieving calibration constants ###

 %% Cell type:code id: tags:

 ``` python
 condition = Conditions.Dark.jungfrau(
    memory_cells=memory_cells,
    bias_voltage=bias_voltage,
    integration_time=integration_time,
    gain_setting=gain_setting,
    gain_mode=gain_mode,
 )

 empty_constants = {
    "Offset": np.zeros((512, 1024, memory_cells, 3), dtype=np.float32),
    "BadPixelsDark": np.zeros((512, 1024, memory_cells, 3), dtype=np.uint32),
    "RelativeGain": None,
    "BadPixelsFF": None,
 }
 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # NOTE: this notebook will not overwrite calibration metadata file
 const_yaml = metadata.get("retrieved-constants", {})

 def get_constants_for_module(karabo_da: str):
    """ Get calibration constants for given module of Jungfrau

    :return:
        offset_map (offset map),
        mask (mask of bad pixels),
        gain_map (map of relative gain factors),
        db_module (name of DB module),
        when (dictionary: constant - creation time)
    """

    when = dict()
    const_data = dict()

    if const_yaml:
        for cname, mdata in const_yaml[karabo_da]["constants"].items():
            const_data[cname] = dict()
            when[cname] = mdata["creation-time"]
            if when[cname]:
                with h5py.File(mdata["file-path"], "r") as cf:
                    const_data[cname] = np.copy(
                        cf[f"{mdata['dataset-name']}/data"])
            else:
                const_data[cname] = empty_constants[cname]
    else:
        retrieval_function = partial(
            get_constant_from_db_and_time,
            karabo_id=karabo_id,
            karabo_da=karabo_da,
            cal_db_interface=cal_db_interface,
            creation_time=creation_time,
            timeout=cal_db_timeout,
            print_once=False,
        )

        for cname, cempty in empty_constants.items():
            const_data[cname], when[cname] = retrieval_function(
                condition=condition,
                constant=getattr(Constants.jungfrau, cname)(),
                empty_constant=cempty,
            )

    offset_map = const_data["Offset"]
    mask = const_data["BadPixelsDark"]
    gain_map = const_data["RelativeGain"]
    mask_ff = const_data["BadPixelsFF"]

    # Combine masks
    if mask_ff is not None:
        mask |= np.moveaxis(mask_ff, 0, 1)

    if memory_cells > 1:
        # move from x, y, cell, gain to cell, x, y, gain
        offset_map = np.moveaxis(offset_map, [0, 1], [1, 2])
        mask = np.moveaxis(mask, [0, 1], [1, 2])
    else:
        offset_map = np.squeeze(offset_map)
        mask = np.squeeze(mask)

    # masking double size pixels
    mask[..., [255, 256], :, :] |= BadPixels.NON_STANDARD_SIZE
    mask[..., [255, 256, 511, 512, 767, 768], :] |= BadPixels.NON_STANDARD_SIZE

    if gain_map is not None:
        if memory_cells > 1:
            gain_map = np.moveaxis(gain_map, [0, 2], [2, 0])
            # add extra empty cell constant
            b = np.ones(((1,)+gain_map.shape[1:]))
            gain_map = np.concatenate((gain_map, b), axis=0)
        else:
            gain_map = np.moveaxis(np.squeeze(gain_map), 1, 0)

    return offset_map, mask, gain_map, karabo_da, when

 with multiprocessing.Pool() as pool:
    r = pool.map(get_constants_for_module, karabo_da)

 # Print timestamps for the retrieved constants.
 constants = {}
 for offset_map, mask, gain_map, k_da, when in r:
    print(f'Constants for module {k_da}:')
    for const in when:
        print(f'  {const} injected at {when[const]}')

    if gain_map is None:
        print("No gain map found")
        relative_gain = False

    constants[k_da] = (offset_map, mask, gain_map)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Correct a chunk of images for offset and gain
 def correct_train(wid, index, d):
    d = d.astype(np.float32)  # [cells, x, y]
    g = gain[index]

    # Copy gain over first to keep it at the original 3 for low gain.
    if strixel_sensor:
        to_strixel(g, out=gain_corr[index, ...])
    else:
        gain_corr[index, ...] = g

    # Jungfrau gains 0[00], 1[01], 3[11]
    # Change low gain to 2 for indexing purposes.
    g[g==3] = 2

    # Select memory cells
    if memory_cells > 1:
        """
        Even though it is correct to assume that memory cells pattern
        can be the same across all trains (for one correction run
        taken with one acquisition), it is preferred to not assume
        this to account for exceptions that can happen.
        """
        m = memcells[index].copy()
        # 255 is a cell value pointing to no cell image data (image of 0 pixels).
        # Corresponding image will be corrected with constant of cell 0. To avoid values of 0.
        # This line is depending on not storing the modified memory cells in the corrected data.
        m[m==255] = 0

        offset_map_cell = offset_map[m, ...]  # [16 + empty cell, x, y]
        mask_cell = mask[m, ...]
    else:
        offset_map_cell = offset_map
        mask_cell = mask

    # Offset correction
    offset = np.choose(g, np.moveaxis(offset_map_cell, -1, 0))

    d -= offset

    # Gain correction
    if relative_gain:
        if memory_cells > 1:
            gain_map_cell = gain_map[m, ...]
        else:
            gain_map_cell = gain_map
        cal = np.choose(g, np.moveaxis(gain_map_cell, -1, 0))
        d /= cal

    msk = np.choose(g, np.moveaxis(mask_cell, -1, 0))

    if strixel_sensor:
        to_strixel(d, out=data_corr[index, ...])
        data_corr[index, :, Ydouble, Xdouble] /= strixel_double_norm
        to_strixel(msk, out=mask_corr[index, ...])
    else:
        data_corr[index, ...] = d
        mask_corr[index, ...] = msk
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()

 n_cpus = multiprocessing.cpu_count()
 context = psh.context.ProcessContext(num_workers=n_cpus)
 print(f"Using {n_cpus} workers for correction.")
 ```

 %% Cell type:code id: tags:

 ``` python
 def save_reduced_rois(ofile, data_corr, mask_corr, karabo_da):
    """If ROIs are defined for this karabo_da, reduce them and save to the output file"""
    rois_defined = 0
    module_no = int(karabo_da[-2:])
    params_source = f'{karabo_id}/ROIPROC/{karabo_da}'
    rois_source = f'{params_source}:output'
    if roi_definitions != [-1]:
        # Create Instrument and Control sections to later add datasets.
        outp_source = ofile.create_instrument_source(rois_source)
        ctrl_source = ofile.create_control_source(params_source)
    for i in range(len(roi_definitions) // 6):
        roi_module, a1, a2, b1, b2, mean_axis = roi_definitions[i*6 : (i+1)*6]
        if roi_module == module_no:
            rois_defined += 1
            # Apply the mask and average remaining pixels to 1D
            roi_data = data_corr[..., a1:a2, b1:b2].mean(
                axis=mean_axis, where=(mask_corr[..., a1:a2, b1:b2] == 0)
            )

            # Add roi corrected datasets
            outp_source.create_key(f'data.roi{rois_defined}.data', data=roi_data)

            # Add roi run control datasets.
            ctrl_source.create_run_key(f'roi{rois_defined}.region', np.array([[a1, a2, b1, b2]]))
            ctrl_source.create_run_key(f'roi{rois_defined}.reduce_axis', np.array([mean_axis]))

    if rois_defined:
        # Copy the index for the new source
        # Create count/first datasets at INDEX source.
        ofile.copy(f'INDEX/{karabo_id}/DET/{karabo_da}:daqOutput/data',
                   f'INDEX/{rois_source}/data')
        ntrains = ofile['INDEX/trainId'].shape[0]
        ctrl_source.create_index(ntrains)
 ```

 %% Cell type:markdown id: tags:

 ### Correcting RAW data ###

 %% Cell type:code id: tags:

 ``` python
 # Loop over modules
 empty_seq = 0
+corrected_files = []
 for local_karabo_da, mapped_files_module in mapped_files.items():
    instrument_src_kda = instrument_src.format(int(local_karabo_da[-2:]))

    for sequence_file in mapped_files_module:
        # Save corrected data in an output file with name
        # of corresponding raw sequence file.
        ofile_name = sequence_file.name.replace("RAW", "CORR")
        out_file = out_folder / ofile_name
+        corrected_files.append(ofile_name)

        # Load sequence file data collection, data.adc keydata,
        # the shape for data to later created arrays of the same shape,
        # and number of available trains to correct.
        seq_dc = H5File(sequence_file)
        seq_dc_adc = seq_dc[instrument_src_kda, "data.adc"]
        ishape = seq_dc_adc.shape  # input shape.
        corr_ntrains = ishape[0]  # number of available trains to correct.
        all_train_ids = seq_dc_adc.train_ids

        # Raise a WARNING if this sequence has no trains to correct.
        # Otherwise, print number of trains with no data.
        if corr_ntrains == 0:
            warning(f"No trains to correct for {sequence_file.name}: "
                 "Skipping the processing of this file.")
            empty_seq += 1
            continue
        elif len(all_train_ids) != corr_ntrains:
            print(f"{sequence_file.name} has {len(seq_dc_adc.train_ids) - corr_ntrains} "
                  "trains with missing data.")

        # For testing, limit corrected trains. i.e. Getting output faster.
        if limit_trains > 0:
            print(f"\nCorrected trains are limited to: {limit_trains} trains")
            corr_ntrains = min(corr_ntrains, limit_trains)

        print(f"\nNumber of corrected trains are: {corr_ntrains} for {ofile_name}")

        # Load constants from the constants dictionary.
        # These arrays are used by `correct_train()` function
        offset_map, mask, gain_map = constants[local_karabo_da]

        # Determine total output shape.
        if strixel_sensor:
            oshape = (*ishape[:-2], *strixel_frame_shape)
        else:
            oshape = ishape

        # Allocate shared arrays for corrected data. Used in `correct_train()`
        data_corr = context.alloc(shape=oshape, dtype=np.float32)
        gain_corr = context.alloc(shape=oshape, dtype=np.uint8)
        mask_corr = context.alloc(shape=oshape, dtype=np.uint32)

        step_timer.start()
        # Overwrite seq_dc after eliminating empty trains or/and applying limited images.
        seq_dc = seq_dc.select(
            instrument_src_kda, "*", require_all=True).select_trains(np.s_[:corr_ntrains])

        # Load raw images(adc), gain, memcells, and frame numbers.
        data = seq_dc[instrument_src_kda, "data.adc"].ndarray()
        gain = seq_dc[instrument_src_kda, "data.gain"].ndarray()
        memcells = seq_dc[instrument_src_kda, "data.memoryCell"].ndarray()
        frame_number = seq_dc[instrument_src_kda, "data.frameNumber"].ndarray()

        # Validate that the selected cell id to preview is available in raw data.
        if memory_cells > 1:
            # For plotting, assuming that memory cells are sorted the same for all trains.
            found_cells = memcells[0] == cell_id_preview
            if any(found_cells):
                cell_idx_preview = np.where(found_cells)[0][0]
            else:
                print(f"The selected cell_id_preview {cell_id_preview} is not available in burst mode. "
                      f"Previewing cell `{memcells[0]}`.")
                cell_idx_preview = 0
        else:
            cell_idx_preview = 0

        # Correct data per train
        context.map(correct_train, data)
        step_timer.done_step(f"Correction time.")

        step_timer.start()

        # Create CORR files and add corrected data sections.
        image_counts = seq_dc[instrument_src_kda, "data.adc"].data_counts(labelled=False)

        with DataFile(out_file, 'w') as outp_file:
            # Create INDEX datasets.
            outp_file.create_index(seq_dc.train_ids, from_file=seq_dc.files[0])

            # Create Instrument section to later add corrected datasets.
            outp_source = outp_file.create_instrument_source(instrument_src_kda)

            # Create count/first datasets at INDEX source.
            outp_source.create_index(data=image_counts)

            # RAW memoryCell and frameNumber are not corrected. But we are storing only
            # the values for the corrected trains.
            outp_source.create_key(
                "data.memoryCell", data=memcells,
                chunks=(min(chunks_ids, memcells.shape[0]), 1))
            outp_source.create_key(
                "data.frameNumber", data=frame_number,
                chunks=(min(chunks_ids, frame_number.shape[0]), 1))
            # Add main corrected `data.adc`` dataset and store corrected data.
            outp_source.create_key(
                "data.adc", data=data_corr,
                chunks=(min(chunks_data, data_corr.shape[0]), *oshape[1:]))
            outp_source.create_compressed_key(
                "data.gain", data=gain_corr)
            outp_source.create_compressed_key(
                "data.mask", data=mask_corr)

            # Temporary hotfix for FXE assuming this dataset is in corrected files.
            outp_source.create_key(
                "data.trainId", data=seq_dc.train_ids,
                chunks=(min(50, len(seq_dc.train_ids))))

            save_reduced_rois(outp_file, data_corr, mask_corr, local_karabo_da)

            # Create METDATA datasets
            outp_file.create_metadata(like=seq_dc)

        step_timer.done_step(f'Saving data time.')
 if empty_seq == sum([len(i) for i in mapped_files.values()]):
    warning("No valid trains for RAW data to correct.")
    sys.exit(0)
 ```

 %% Cell type:markdown id: tags:

 ### Processing time summary ###

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print('Skipping plots')
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Positions are given in pixels
 mod_width = (256 * 4) + (2 * 3)  # inc. 2px gaps between tiles
 mod_height = (256 * 2) + 2
 if karabo_id == "SPB_IRDA_JF4M":
    # The first 4 modules are rotated 180 degrees relative to the others.
    # We pass the bottom, beam-right corner of the module regardless of its
    # orientation, requiring a subtraction from the symmetric positions we'd
    # otherwise calculate.
    x_start, y_start = 1125, 1078
    module_pos = [
        (x_start - mod_width, y_start - mod_height - (i * (mod_height + 33)))
        for i in range(4)
    ] + [
        (-x_start, -y_start + (i * (mod_height + 33))) for i in range(4)
    ]
    orientations = [(-1, -1) for _ in range(4)] + [(1, 1) for _ in range(4)]
 elif karabo_id == "FXE_XAD_JF1M":
    module_pos = ((-mod_width//2, 33),(-mod_width//2, -mod_height -33))
    orientations = [(-1,-1), (1,1)]
 else:
    module_pos = ((-mod_width//2,-mod_height//2),)
    orientations = None

 geom = JUNGFRAUGeometry.from_module_positions(module_pos, orientations=orientations, asic_gap=0)
 ```

 %% Cell type:code id: tags:

 ``` python
 first_seq = 0 if sequences == [-1] else sequences[0]

-with RunDirectory(out_folder, f"*{run}*S{first_seq:05d}*") as corr_dc:
+corrected_files = [
+    out_folder / f for f in fnmatch.filter(corrected_files, f"*{run}*S{first_seq:05d}*")
+]
+with DataCollection.from_paths(corrected_files) as corr_dc:
    # Reading CORR data for plotting.
    jf_corr = components.JUNGFRAU(
        corr_dc,
        detector_name=karabo_id,
    ).select_trains(np.s_[:plot_trains])
    tid, jf_corr_data = next(iter(jf_corr.trains(require_all=True)))

 # Shape = [modules, trains, cells, x, y]
 # TODO: Fix the case if not all modules were requested to be corrected.
 # For example if only one modules was corrected. An assertion error is expected
 # at `geom.plot_data_fast`, while plotting corrected images.
 corrected = jf_corr.get_array("data.adc")[:, :, cell_idx_preview, ...].values
 corrected_train = jf_corr_data["data.adc"][
    :, cell_idx_preview, ...
 ].values  # loose the train axis.

 mask = jf_corr.get_array("data.mask")[:, :, cell_idx_preview, ...].values
 mask_train = jf_corr_data["data.mask"][:, cell_idx_preview, ...].values

 with RunDirectory(f"{in_folder}/r{run:04d}/", f"*S{first_seq:05d}*", _use_voview=False) as raw_dc:

    # Reading RAW data for plotting.
    jf_raw = components.JUNGFRAU(raw_dc, detector_name=karabo_id).select_trains(
        np.s_[:plot_trains]
    )

 raw = jf_raw.get_array("data.adc")[:, :, cell_idx_preview, ...].values
 raw_train = (
    jf_raw.select_trains(by_id[[tid]])
    .get_array("data.adc")[:, 0, cell_idx_preview, ...]
    .values
 )

 gain = jf_raw.get_array("data.gain")[:, :, cell_idx_preview, ...].values
 gain_train_cells = (
    jf_raw.select_trains(by_id[[tid]]).get_array("data.gain")[:, :, :, ...].values
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 db_modules = get_pdu_from_db(
    karabo_id=karabo_id,
    karabo_da=karabo_da,
    constant=Constants.jungfrau.Offset(),
    condition=condition,
    cal_db_interface=cal_db_interface,
    snapshot_at=creation_time,
 )
 ```

 %% Cell type:markdown id: tags:

 ### Mean RAW Preview

 %% Cell type:code id: tags:

 ``` python
 print(f"The per pixel mean of the first {raw.shape[1]} trains of the first sequence file")

 fig, ax = plt.subplots(figsize=(18, 10))
 raw_mean = np.mean(raw, axis=1)
 geom.plot_data_fast(
    raw_mean,
    ax=ax,
    vmin=min(0.75*np.median(raw_mean[raw_mean > 0]), 2000),
    vmax=max(1.5*np.median(raw_mean[raw_mean > 0]), 16000),
    cmap="jet",
    colorbar={'shrink': 1, 'pad': 0.01},
 )
 ax.set_title(f'{karabo_id} - Mean RAW', size=18)
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ### Mean CORRECTED Preview

 %% Cell type:code id: tags:

 ``` python
 print(f"The per pixel mean of the first {corrected.shape[1]} trains of the first sequence file")

 fig, ax = plt.subplots(figsize=(18, 10))
 corrected_mean = np.mean(corrected, axis=1)
 _corrected_vmin = min(0.75*np.median(corrected_mean[corrected_mean > 0]), -0.5)
 _corrected_vmax = max(2.*np.median(corrected_mean[corrected_mean > 0]), 100)

 mean_plot_kwargs = dict(
    vmin=_corrected_vmin, vmax=_corrected_vmax, cmap="jet"
 )

 if not strixel_sensor:
    geom.plot_data_fast(
        corrected_mean,
        ax=ax,
        colorbar={'shrink': 1, 'pad': 0.01},
        **mean_plot_kwargs
    )
 else:
    ax.imshow(corrected_mean.squeeze(), aspect=10, **mean_plot_kwargs)

 ax.set_title(f'{karabo_id} - Mean CORRECTED', size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(figsize=(18, 10))
 corrected_masked = corrected.copy()
 corrected_masked[mask != 0] = np.nan
 corrected_masked_mean = np.nanmean(corrected_masked, axis=1)
 del corrected_masked

 if not strixel_sensor:
    geom.plot_data_fast(
        corrected_masked_mean,
        ax=ax,
        colorbar={'shrink': 1, 'pad': 0.01},
        **mean_plot_kwargs
    )
 else:
    ax.imshow(corrected_mean.squeeze(), aspect=10, **mean_plot_kwargs)

 ax.set_title(f'{karabo_id} - Mean CORRECTED with mask', size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown((f"#### A single image from train {tid}")))

 fig, ax = plt.subplots(figsize=(18, 10))

 single_plot_kwargs = dict(
    vmin=min(0.75 * np.median(corrected_train[corrected_train > 0]), -0.5),
    vmax=max(2.0 * np.median(corrected_train[corrected_train > 0]), 100),
    cmap="jet"
 )

 if not strixel_sensor:
    geom.plot_data_fast(
        corrected_train,
        ax=ax,
        colorbar={"shrink": 1, "pad": 0.01},
        **single_plot_kwargs
    )
 else:
    ax.imshow(corrected_train.squeeze(), aspect=10, **single_plot_kwargs)

 ax.set_title(f"{karabo_id} - CORRECTED train: {tid}", size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis, title):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [
        np.min(edges[1]),
        np.max(edges[1]),
        np.min(edges[0]),
        np.max(edges[0]),
    ]

    im = ax.imshow(
        data[::-1, :],
        extent=extent,
        aspect="auto",
        norm=LogNorm(vmin=1, vmax=np.max(data))
    )
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_title(title)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:markdown id: tags:

 ### Gain Bit Value

 %% Cell type:code id: tags:

 ``` python
 for i, (pdu, mod) in enumerate(zip(db_modules, karabo_da)):
    h, ex, ey = np.histogram2d(
        raw[i].flatten(),
        gain[i].flatten(),
        bins=[100, 4],
        range=[[0, 10000], [0, 4]],
    )
    do_2d_plot(
        h,
        (ex, ey),
        "Signal (ADU)",
        "Gain Bit Value (high gain=0[00], medium gain=1[01], low gain=3[11])",
        f"Module {mod} ({pdu})",
    )
 ```

 %% Cell type:markdown id: tags:

 ## Signal Distribution ##

 %% Cell type:code id: tags:

 ``` python
 for i, (pdu, mod) in enumerate(zip(db_modules, karabo_da)):
    fig, axs = plt.subplots(nrows=2, ncols=1, figsize=(18, 10))
    corrected_flatten = corrected[i].flatten()
    for ax, hist_range in zip(axs, [(-100, 1000), (-1000, 10000)]):
        h = ax.hist(
            corrected_flatten,
            bins=1000,
            range=hist_range,
            log=True,
        )
        l = ax.set_xlabel("Signal (keV)")
        l = ax.set_ylabel("Counts")
        _ = ax.set_title(f'Module {mod} ({pdu})')
 ```

 %% Cell type:markdown id: tags:

 ### Maximum GAIN Preview

 %% Cell type:code id: tags:

 ``` python
 display(Markdown((f"#### The per pixel maximum of train {tid} of the GAIN data")))

 fig, ax = plt.subplots(figsize=(18, 10))
 gain_max = np.max(gain_train_cells, axis=(1, 2))
 geom.plot_data_fast(
    gain_max,
    ax=ax,
    cmap="jet",
    colorbar={'shrink': 1, 'pad': 0.01},
 )
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append(
        (item.name, f"{item.value:016b}"))
 md = display(Latex(tabulate.tabulate(
    table, tablefmt='latex',
    headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:markdown id: tags:

 ### Single Image Bad Pixels ###

 A single image bad pixel map for the first image of the first train

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f"#### Bad pixels image for train {tid}"))

 fig, ax = plt.subplots(figsize=(18, 10))
 if not strixel_sensor:
    geom.plot_data_fast(
        np.log2(mask_train),
        ax=ax,
        vmin=0, vmax=32, cmap="jet",
        colorbar={'shrink': 1, 'pad': 0.01},
    )
 else:
    ax.imshow(np.log2(mask_train).squeeze(), vmin=0, vmax=32, cmap='jet', aspect=10)

 plt.show()
 ```

 %% Cell type:markdown id: tags:

 # Jungfrau Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the Jungfrau Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202130/p900204/raw"  # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/remove"  # the folder to output to, required
 run = 91  # run to process, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1]  # sequences to correct, set to [-1] for all, range allowed
 sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel

 # Parameters used to access raw data.
 karabo_id = "SPB_IRDA_JF4M"  # karabo prefix of Jungfrau devices
 karabo_da = ['JNGFR01', 'JNGFR02', 'JNGFR03', 'JNGFR04', 'JNGFR05', 'JNGFR06', 'JNGFR07', 'JNGFR08']  # data aggregators
 receiver_template = "JNGFR{:02d}"  # Detector receiver template for accessing raw data files. e.g. "JNGFR{:02d}"
 instrument_source_template = '{}/DET/{}:daqOutput'  # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = '{}/DET/CONTROL'  # template for control source name (filled with karabo_id_control)
 karabo_id_control = ""  # if control is on a different ID, set to empty string if it is the same a karabo-id

 # Parameters for calibration database.
 use_dir_creation_date = True  # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8017#8025" # the database interface to use
 cal_db_timeout = 180000  # timeout on caldb requests

 # Parameters affecting corrected data.
 relative_gain = True  # do relative gain correction.
 strixel_sensor = False  # reordering for strixel detector layout.
 strixel_double_norm = 2.0  # normalization to use for double-size pixels, only applied for strixel sensors.
 limit_trains = 0  # ONLY FOR TESTING. process only first N trains, Use 0 to process all.
 chunks_ids = 32  # HDF chunk size for memoryCell and frameNumber.
 chunks_data = 1  # HDF chunk size for pixel data in number of frames.

 # Parameters for retrieving calibration constants
 manual_slow_data = False  # if true, use manually entered bias_voltage, integration_time, gain_setting, and gain_mode values
 integration_time = 4.96  # integration time in us, will be overwritten by value in file
 gain_setting = 0  # 0 for dynamic gain, 1 for dynamic HG0, will be overwritten by value in file
 gain_mode = 0  # 0 for runs with dynamic gain setting, 1 for fixgain. It will be overwritten by value in file, if manual_slow_data is set to True.
 mem_cells = -1  # Set mem_cells to -1 to automatically use the value stored in RAW data.
 bias_voltage = 180  # will be overwritten by value in file

 # Parameters for plotting
 skip_plots = False  # exit after writing corrected files
 plot_trains = 500  # Number of trains to plot for RAW and CORRECTED plots. Set to -1 to automatically plot all trains.
 cell_id_preview = 15  # cell Id used for preview in single-shot plots

 # Parameters for ROI selection and reduction
 roi_definitions = [-1]  # List with groups of 6 values defining ROIs, e.g. [3, 120, 180, 200, 550, -2] for module 3 (JNGFR03), slice 120:180, 200:550, average along axis -2 (slow scan, or -1 for fast scan)

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
+import fnmatch
 import multiprocessing
 import sys
 import warnings
 from functools import partial
 from logging import warning
 from pathlib import Path

 import h5py
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
 import pasha as psh
 import tabulate
 from IPython.display import Latex, Markdown, display
-from extra_data import H5File, RunDirectory, by_id, components
+from extra_data import DataCollection, H5File, RunDirectory, by_id, components
 from extra_geom import JUNGFRAUGeometry
 from matplotlib.colors import LogNorm

 from cal_tools import h5_copy_except
 from cal_tools.jungfraulib import JungfrauCtrl
 from cal_tools.enums import BadPixels
 from cal_tools.files import DataFile
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    get_constant_from_db_and_time,
    get_dir_creation_date,
    get_pdu_from_db,
    map_seq_files,
    CalibrationMetadata,
 )
 from iCalibrationDB import Conditions, Constants

 warnings.filterwarnings('ignore')

 matplotlib.use('agg')
 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_folder = in_folder / f'r{run:04d}'
 run_dc = RunDirectory(run_folder)
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)

 out_folder.mkdir(parents=True, exist_ok=True)

 print(f"Run is: {run}")
 print(f"Instrument H5File source: {instrument_src}")
 print(f"Process modules: {karabo_da}")

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time} as creation time")

 if karabo_id_control == "":
    karabo_id_control = karabo_id

 if any(axis_no not in {-2, -1, 2, 3} for axis_no in roi_definitions[5::6]):
    print("ROI averaging must be on axis 2/3 (or equivalently -2/-1). "
          f"Axis numbers given: {roi_definitions[5::6]}")
    sys.exit(1)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read available sequence files to correct.
 mapped_files, num_seq_files = map_seq_files(
    run_folder, karabo_da, sequences)

 if not len(mapped_files):
    raise IndexError(
        "No sequence files available to correct for the selected sequences and karabo_da.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Processing a total of {num_seq_files} sequence files")
 table = []
 fi = 0
 for kda, sfiles in mapped_files.items():
    for k, f in enumerate(sfiles):
        if k == 0:
            table.append((fi, kda, k, f))
        else:
            table.append((fi, "", k,  f))
        fi += 1
 md = display(Latex(tabulate.tabulate(
    table, tablefmt='latex',
    headers=["#", "module", "# module", "file"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 ctrl_src = ctrl_source_template.format(karabo_id_control)
 ctrl_data = JungfrauCtrl(run_dc, ctrl_src)

 if mem_cells < 0:
    memory_cells, sc_start = ctrl_data.get_memory_cells()

    mem_cells_name = "single cell" if memory_cells == 1 else "burst"
    print(f"Run is in {mem_cells_name} mode.\nStorage cell start: {sc_start:02d}")
 else:
    memory_cells = mem_cells
    mem_cells_name = "single cell" if memory_cells == 1 else "burst"
    print(f"Run is in manually set to {mem_cells_name} mode. With {memory_cells} memory cells")

 if not manual_slow_data:
    integration_time = ctrl_data.get_integration_time()
    bias_voltage = ctrl_data.get_bias_voltage()
    gain_setting = ctrl_data.get_gain_setting()
    gain_mode = ctrl_data.get_gain_mode()

 print(f"Integration time is {integration_time} us")
 print(f"Gain setting is {gain_setting} (run settings: {ctrl_data.run_settings})")
 print(f"Gain mode is {gain_mode} ({ctrl_data.run_mode})")
 print(f"Bias voltage is {bias_voltage} V")
 print(f"Number of memory cells are {memory_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 if strixel_sensor:
    from cal_tools.jfstrixel import STRIXEL_SHAPE as strixel_frame_shape, double_pixel_indices, to_strixel
    Ydouble, Xdouble = double_pixel_indices()
    print('Strixel sensor transformation enabled')
 ```

 %% Cell type:markdown id: tags:

 ### Retrieving calibration constants ###

 %% Cell type:code id: tags:

 ``` python
 condition = Conditions.Dark.jungfrau(
    memory_cells=memory_cells,
    bias_voltage=bias_voltage,
    integration_time=integration_time,
    gain_setting=gain_setting,
    gain_mode=gain_mode,
 )

 empty_constants = {
    "Offset": np.zeros((512, 1024, memory_cells, 3), dtype=np.float32),
    "BadPixelsDark": np.zeros((512, 1024, memory_cells, 3), dtype=np.uint32),
    "RelativeGain": None,
    "BadPixelsFF": None,
 }
 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # NOTE: this notebook will not overwrite calibration metadata file
 const_yaml = metadata.get("retrieved-constants", {})

 def get_constants_for_module(karabo_da: str):
    """ Get calibration constants for given module of Jungfrau

    :return:
        offset_map (offset map),
        mask (mask of bad pixels),
        gain_map (map of relative gain factors),
        db_module (name of DB module),
        when (dictionary: constant - creation time)
    """

    when = dict()
    const_data = dict()

    if const_yaml:
        for cname, mdata in const_yaml[karabo_da]["constants"].items():
            const_data[cname] = dict()
            when[cname] = mdata["creation-time"]
            if when[cname]:
                with h5py.File(mdata["file-path"], "r") as cf:
                    const_data[cname] = np.copy(
                        cf[f"{mdata['dataset-name']}/data"])
            else:
                const_data[cname] = empty_constants[cname]
    else:
        retrieval_function = partial(
            get_constant_from_db_and_time,
            karabo_id=karabo_id,
            karabo_da=karabo_da,
            cal_db_interface=cal_db_interface,
            creation_time=creation_time,
            timeout=cal_db_timeout,
            print_once=False,
        )

        for cname, cempty in empty_constants.items():
            const_data[cname], when[cname] = retrieval_function(
                condition=condition,
                constant=getattr(Constants.jungfrau, cname)(),
                empty_constant=cempty,
            )

    offset_map = const_data["Offset"]
    mask = const_data["BadPixelsDark"]
    gain_map = const_data["RelativeGain"]
    mask_ff = const_data["BadPixelsFF"]

    # Combine masks
    if mask_ff is not None:
        mask |= np.moveaxis(mask_ff, 0, 1)

    if memory_cells > 1:
        # move from x, y, cell, gain to cell, x, y, gain
        offset_map = np.moveaxis(offset_map, [0, 1], [1, 2])
        mask = np.moveaxis(mask, [0, 1], [1, 2])
    else:
        offset_map = np.squeeze(offset_map)
        mask = np.squeeze(mask)

    # masking double size pixels
    mask[..., [255, 256], :, :] |= BadPixels.NON_STANDARD_SIZE
    mask[..., [255, 256, 511, 512, 767, 768], :] |= BadPixels.NON_STANDARD_SIZE

    if gain_map is not None:
        if memory_cells > 1:
            gain_map = np.moveaxis(gain_map, [0, 2], [2, 0])
            # add extra empty cell constant
            b = np.ones(((1,)+gain_map.shape[1:]))
            gain_map = np.concatenate((gain_map, b), axis=0)
        else:
            gain_map = np.moveaxis(np.squeeze(gain_map), 1, 0)

    return offset_map, mask, gain_map, karabo_da, when

 with multiprocessing.Pool() as pool:
    r = pool.map(get_constants_for_module, karabo_da)

 # Print timestamps for the retrieved constants.
 constants = {}
 for offset_map, mask, gain_map, k_da, when in r:
    print(f'Constants for module {k_da}:')
    for const in when:
        print(f'  {const} injected at {when[const]}')

    if gain_map is None:
        print("No gain map found")
        relative_gain = False

    constants[k_da] = (offset_map, mask, gain_map)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Correct a chunk of images for offset and gain
 def correct_train(wid, index, d):
    d = d.astype(np.float32)  # [cells, x, y]
    g = gain[index]

    # Copy gain over first to keep it at the original 3 for low gain.
    if strixel_sensor:
        to_strixel(g, out=gain_corr[index, ...])
    else:
        gain_corr[index, ...] = g

    # Jungfrau gains 0[00], 1[01], 3[11]
    # Change low gain to 2 for indexing purposes.
    g[g==3] = 2

    # Select memory cells
    if memory_cells > 1:
        """
        Even though it is correct to assume that memory cells pattern
        can be the same across all trains (for one correction run
        taken with one acquisition), it is preferred to not assume
        this to account for exceptions that can happen.
        """
        m = memcells[index].copy()
        # 255 is a cell value pointing to no cell image data (image of 0 pixels).
        # Corresponding image will be corrected with constant of cell 0. To avoid values of 0.
        # This line is depending on not storing the modified memory cells in the corrected data.
        m[m==255] = 0

        offset_map_cell = offset_map[m, ...]  # [16 + empty cell, x, y]
        mask_cell = mask[m, ...]
    else:
        offset_map_cell = offset_map
        mask_cell = mask

    # Offset correction
    offset = np.choose(g, np.moveaxis(offset_map_cell, -1, 0))

    d -= offset

    # Gain correction
    if relative_gain:
        if memory_cells > 1:
            gain_map_cell = gain_map[m, ...]
        else:
            gain_map_cell = gain_map
        cal = np.choose(g, np.moveaxis(gain_map_cell, -1, 0))
        d /= cal

    msk = np.choose(g, np.moveaxis(mask_cell, -1, 0))

    if strixel_sensor:
        to_strixel(d, out=data_corr[index, ...])
        data_corr[index, :, Ydouble, Xdouble] /= strixel_double_norm
        to_strixel(msk, out=mask_corr[index, ...])
    else:
        data_corr[index, ...] = d
        mask_corr[index, ...] = msk
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()

 n_cpus = multiprocessing.cpu_count()
 context = psh.context.ProcessContext(num_workers=n_cpus)
 print(f"Using {n_cpus} workers for correction.")
 ```

 %% Cell type:code id: tags:

 ``` python
 def save_reduced_rois(ofile, data_corr, mask_corr, karabo_da):
    """If ROIs are defined for this karabo_da, reduce them and save to the output file"""
    rois_defined = 0
    module_no = int(karabo_da[-2:])
    params_source = f'{karabo_id}/ROIPROC/{karabo_da}'
    rois_source = f'{params_source}:output'
    if roi_definitions != [-1]:
        # Create Instrument and Control sections to later add datasets.
        outp_source = ofile.create_instrument_source(rois_source)
        ctrl_source = ofile.create_control_source(params_source)
    for i in range(len(roi_definitions) // 6):
        roi_module, a1, a2, b1, b2, mean_axis = roi_definitions[i*6 : (i+1)*6]
        if roi_module == module_no:
            rois_defined += 1
            # Apply the mask and average remaining pixels to 1D
            roi_data = data_corr[..., a1:a2, b1:b2].mean(
                axis=mean_axis, where=(mask_corr[..., a1:a2, b1:b2] == 0)
            )

            # Add roi corrected datasets
            outp_source.create_key(f'data.roi{rois_defined}.data', data=roi_data)

            # Add roi run control datasets.
            ctrl_source.create_run_key(f'roi{rois_defined}.region', np.array([[a1, a2, b1, b2]]))
            ctrl_source.create_run_key(f'roi{rois_defined}.reduce_axis', np.array([mean_axis]))

    if rois_defined:
        # Copy the index for the new source
        # Create count/first datasets at INDEX source.
        ofile.copy(f'INDEX/{karabo_id}/DET/{karabo_da}:daqOutput/data',
                   f'INDEX/{rois_source}/data')
        ntrains = ofile['INDEX/trainId'].shape[0]
        ctrl_source.create_index(ntrains)
 ```

 %% Cell type:markdown id: tags:

 ### Correcting RAW data ###

 %% Cell type:code id: tags:

 ``` python
 # Loop over modules
 empty_seq = 0
+corrected_files = []
 for local_karabo_da, mapped_files_module in mapped_files.items():
    instrument_src_kda = instrument_src.format(int(local_karabo_da[-2:]))

    for sequence_file in mapped_files_module:
        # Save corrected data in an output file with name
        # of corresponding raw sequence file.
        ofile_name = sequence_file.name.replace("RAW", "CORR")
        out_file = out_folder / ofile_name
+        corrected_files.append(ofile_name)

        # Load sequence file data collection, data.adc keydata,
        # the shape for data to later created arrays of the same shape,
        # and number of available trains to correct.
        seq_dc = H5File(sequence_file)
        seq_dc_adc = seq_dc[instrument_src_kda, "data.adc"]
        ishape = seq_dc_adc.shape  # input shape.
        corr_ntrains = ishape[0]  # number of available trains to correct.
        all_train_ids = seq_dc_adc.train_ids

        # Raise a WARNING if this sequence has no trains to correct.
        # Otherwise, print number of trains with no data.
        if corr_ntrains == 0:
            warning(f"No trains to correct for {sequence_file.name}: "
                 "Skipping the processing of this file.")
            empty_seq += 1
            continue
        elif len(all_train_ids) != corr_ntrains:
            print(f"{sequence_file.name} has {len(seq_dc_adc.train_ids) - corr_ntrains} "
                  "trains with missing data.")

        # For testing, limit corrected trains. i.e. Getting output faster.
        if limit_trains > 0:
            print(f"\nCorrected trains are limited to: {limit_trains} trains")
            corr_ntrains = min(corr_ntrains, limit_trains)

        print(f"\nNumber of corrected trains are: {corr_ntrains} for {ofile_name}")

        # Load constants from the constants dictionary.
        # These arrays are used by `correct_train()` function
        offset_map, mask, gain_map = constants[local_karabo_da]

        # Determine total output shape.
        if strixel_sensor:
            oshape = (*ishape[:-2], *strixel_frame_shape)
        else:
            oshape = ishape

        # Allocate shared arrays for corrected data. Used in `correct_train()`
        data_corr = context.alloc(shape=oshape, dtype=np.float32)
        gain_corr = context.alloc(shape=oshape, dtype=np.uint8)
        mask_corr = context.alloc(shape=oshape, dtype=np.uint32)

        step_timer.start()
        # Overwrite seq_dc after eliminating empty trains or/and applying limited images.
        seq_dc = seq_dc.select(
            instrument_src_kda, "*", require_all=True).select_trains(np.s_[:corr_ntrains])

        # Load raw images(adc), gain, memcells, and frame numbers.
        data = seq_dc[instrument_src_kda, "data.adc"].ndarray()
        gain = seq_dc[instrument_src_kda, "data.gain"].ndarray()
        memcells = seq_dc[instrument_src_kda, "data.memoryCell"].ndarray()
        frame_number = seq_dc[instrument_src_kda, "data.frameNumber"].ndarray()

        # Validate that the selected cell id to preview is available in raw data.
        if memory_cells > 1:
            # For plotting, assuming that memory cells are sorted the same for all trains.
            found_cells = memcells[0] == cell_id_preview
            if any(found_cells):
                cell_idx_preview = np.where(found_cells)[0][0]
            else:
                print(f"The selected cell_id_preview {cell_id_preview} is not available in burst mode. "
                      f"Previewing cell `{memcells[0]}`.")
                cell_idx_preview = 0
        else:
            cell_idx_preview = 0

        # Correct data per train
        context.map(correct_train, data)
        step_timer.done_step(f"Correction time.")

        step_timer.start()

        # Create CORR files and add corrected data sections.
        image_counts = seq_dc[instrument_src_kda, "data.adc"].data_counts(labelled=False)

        with DataFile(out_file, 'w') as outp_file:
            # Create INDEX datasets.
            outp_file.create_index(seq_dc.train_ids, from_file=seq_dc.files[0])

            # Create Instrument section to later add corrected datasets.
            outp_source = outp_file.create_instrument_source(instrument_src_kda)

            # Create count/first datasets at INDEX source.
            outp_source.create_index(data=image_counts)

            # RAW memoryCell and frameNumber are not corrected. But we are storing only
            # the values for the corrected trains.
            outp_source.create_key(
                "data.memoryCell", data=memcells,
                chunks=(min(chunks_ids, memcells.shape[0]), 1))
            outp_source.create_key(
                "data.frameNumber", data=frame_number,
                chunks=(min(chunks_ids, frame_number.shape[0]), 1))
            # Add main corrected `data.adc`` dataset and store corrected data.
            outp_source.create_key(
                "data.adc", data=data_corr,
                chunks=(min(chunks_data, data_corr.shape[0]), *oshape[1:]))
            outp_source.create_compressed_key(
                "data.gain", data=gain_corr)
            outp_source.create_compressed_key(
                "data.mask", data=mask_corr)

            # Temporary hotfix for FXE assuming this dataset is in corrected files.
            outp_source.create_key(
                "data.trainId", data=seq_dc.train_ids,
                chunks=(min(50, len(seq_dc.train_ids))))

            save_reduced_rois(outp_file, data_corr, mask_corr, local_karabo_da)

            # Create METDATA datasets
            outp_file.create_metadata(like=seq_dc)

        step_timer.done_step(f'Saving data time.')
 if empty_seq == sum([len(i) for i in mapped_files.values()]):
    warning("No valid trains for RAW data to correct.")
    sys.exit(0)
 ```

 %% Cell type:markdown id: tags:

 ### Processing time summary ###

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print('Skipping plots')
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Positions are given in pixels
 mod_width = (256 * 4) + (2 * 3)  # inc. 2px gaps between tiles
 mod_height = (256 * 2) + 2
 if karabo_id == "SPB_IRDA_JF4M":
    # The first 4 modules are rotated 180 degrees relative to the others.
    # We pass the bottom, beam-right corner of the module regardless of its
    # orientation, requiring a subtraction from the symmetric positions we'd
    # otherwise calculate.
    x_start, y_start = 1125, 1078
    module_pos = [
        (x_start - mod_width, y_start - mod_height - (i * (mod_height + 33)))
        for i in range(4)
    ] + [
        (-x_start, -y_start + (i * (mod_height + 33))) for i in range(4)
    ]
    orientations = [(-1, -1) for _ in range(4)] + [(1, 1) for _ in range(4)]
 elif karabo_id == "FXE_XAD_JF1M":
    module_pos = ((-mod_width//2, 33),(-mod_width//2, -mod_height -33))
    orientations = [(-1,-1), (1,1)]
 else:
    module_pos = ((-mod_width//2,-mod_height//2),)
    orientations = None

 geom = JUNGFRAUGeometry.from_module_positions(module_pos, orientations=orientations, asic_gap=0)
 ```

 %% Cell type:code id: tags:

 ``` python
 first_seq = 0 if sequences == [-1] else sequences[0]

-with RunDirectory(out_folder, f"*{run}*S{first_seq:05d}*") as corr_dc:
+corrected_files = [
+    out_folder / f for f in fnmatch.filter(corrected_files, f"*{run}*S{first_seq:05d}*")
+]
+with DataCollection.from_paths(corrected_files) as corr_dc:
    # Reading CORR data for plotting.
    jf_corr = components.JUNGFRAU(
        corr_dc,
        detector_name=karabo_id,
    ).select_trains(np.s_[:plot_trains])
    tid, jf_corr_data = next(iter(jf_corr.trains(require_all=True)))

 # Shape = [modules, trains, cells, x, y]
 # TODO: Fix the case if not all modules were requested to be corrected.
 # For example if only one modules was corrected. An assertion error is expected
 # at `geom.plot_data_fast`, while plotting corrected images.
 corrected = jf_corr.get_array("data.adc")[:, :, cell_idx_preview, ...].values
 corrected_train = jf_corr_data["data.adc"][
    :, cell_idx_preview, ...
 ].values  # loose the train axis.

 mask = jf_corr.get_array("data.mask")[:, :, cell_idx_preview, ...].values
 mask_train = jf_corr_data["data.mask"][:, cell_idx_preview, ...].values

 with RunDirectory(f"{in_folder}/r{run:04d}/", f"*S{first_seq:05d}*", _use_voview=False) as raw_dc:

    # Reading RAW data for plotting.
    jf_raw = components.JUNGFRAU(raw_dc, detector_name=karabo_id).select_trains(
        np.s_[:plot_trains]
    )

 raw = jf_raw.get_array("data.adc")[:, :, cell_idx_preview, ...].values
 raw_train = (
    jf_raw.select_trains(by_id[[tid]])
    .get_array("data.adc")[:, 0, cell_idx_preview, ...]
    .values
 )

 gain = jf_raw.get_array("data.gain")[:, :, cell_idx_preview, ...].values
 gain_train_cells = (
    jf_raw.select_trains(by_id[[tid]]).get_array("data.gain")[:, :, :, ...].values
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 db_modules = get_pdu_from_db(
    karabo_id=karabo_id,
    karabo_da=karabo_da,
    constant=Constants.jungfrau.Offset(),
    condition=condition,
    cal_db_interface=cal_db_interface,
    snapshot_at=creation_time,
 )
 ```

 %% Cell type:markdown id: tags:

 ### Mean RAW Preview

 %% Cell type:code id: tags:

 ``` python
 print(f"The per pixel mean of the first {raw.shape[1]} trains of the first sequence file")

 fig, ax = plt.subplots(figsize=(18, 10))
 raw_mean = np.mean(raw, axis=1)
 geom.plot_data_fast(
    raw_mean,
    ax=ax,
    vmin=min(0.75*np.median(raw_mean[raw_mean > 0]), 2000),
    vmax=max(1.5*np.median(raw_mean[raw_mean > 0]), 16000),
    cmap="jet",
    colorbar={'shrink': 1, 'pad': 0.01},
 )
 ax.set_title(f'{karabo_id} - Mean RAW', size=18)
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ### Mean CORRECTED Preview

 %% Cell type:code id: tags:

 ``` python
 print(f"The per pixel mean of the first {corrected.shape[1]} trains of the first sequence file")

 fig, ax = plt.subplots(figsize=(18, 10))
 corrected_mean = np.mean(corrected, axis=1)
 _corrected_vmin = min(0.75*np.median(corrected_mean[corrected_mean > 0]), -0.5)
 _corrected_vmax = max(2.*np.median(corrected_mean[corrected_mean > 0]), 100)

 mean_plot_kwargs = dict(
    vmin=_corrected_vmin, vmax=_corrected_vmax, cmap="jet"
 )

 if not strixel_sensor:
    geom.plot_data_fast(
        corrected_mean,
        ax=ax,
        colorbar={'shrink': 1, 'pad': 0.01},
        **mean_plot_kwargs
    )
 else:
    ax.imshow(corrected_mean.squeeze(), aspect=10, **mean_plot_kwargs)

 ax.set_title(f'{karabo_id} - Mean CORRECTED', size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(figsize=(18, 10))
 corrected_masked = corrected.copy()
 corrected_masked[mask != 0] = np.nan
 corrected_masked_mean = np.nanmean(corrected_masked, axis=1)
 del corrected_masked

 if not strixel_sensor:
    geom.plot_data_fast(
        corrected_masked_mean,
        ax=ax,
        colorbar={'shrink': 1, 'pad': 0.01},
        **mean_plot_kwargs
    )
 else:
    ax.imshow(corrected_mean.squeeze(), aspect=10, **mean_plot_kwargs)

 ax.set_title(f'{karabo_id} - Mean CORRECTED with mask', size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown((f"#### A single image from train {tid}")))

 fig, ax = plt.subplots(figsize=(18, 10))

 single_plot_kwargs = dict(
    vmin=min(0.75 * np.median(corrected_train[corrected_train > 0]), -0.5),
    vmax=max(2.0 * np.median(corrected_train[corrected_train > 0]), 100),
    cmap="jet"
 )

 if not strixel_sensor:
    geom.plot_data_fast(
        corrected_train,
        ax=ax,
        colorbar={"shrink": 1, "pad": 0.01},
        **single_plot_kwargs
    )
 else:
    ax.imshow(corrected_train.squeeze(), aspect=10, **single_plot_kwargs)

 ax.set_title(f"{karabo_id} - CORRECTED train: {tid}", size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis, title):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [
        np.min(edges[1]),
        np.max(edges[1]),
        np.min(edges[0]),
        np.max(edges[0]),
    ]

    im = ax.imshow(
        data[::-1, :],
        extent=extent,
        aspect="auto",
        norm=LogNorm(vmin=1, vmax=np.max(data))
    )
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_title(title)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:markdown id: tags:

 ### Gain Bit Value

 %% Cell type:code id: tags:

 ``` python
 for i, (pdu, mod) in enumerate(zip(db_modules, karabo_da)):
    h, ex, ey = np.histogram2d(
        raw[i].flatten(),
        gain[i].flatten(),
        bins=[100, 4],
        range=[[0, 10000], [0, 4]],
    )
    do_2d_plot(
        h,
        (ex, ey),
        "Signal (ADU)",
        "Gain Bit Value (high gain=0[00], medium gain=1[01], low gain=3[11])",
        f"Module {mod} ({pdu})",
    )
 ```

 %% Cell type:markdown id: tags:

 ## Signal Distribution ##

 %% Cell type:code id: tags:

 ``` python
 for i, (pdu, mod) in enumerate(zip(db_modules, karabo_da)):
    fig, axs = plt.subplots(nrows=2, ncols=1, figsize=(18, 10))
    corrected_flatten = corrected[i].flatten()
    for ax, hist_range in zip(axs, [(-100, 1000), (-1000, 10000)]):
        h = ax.hist(
            corrected_flatten,
            bins=1000,
            range=hist_range,
            log=True,
        )
        l = ax.set_xlabel("Signal (keV)")
        l = ax.set_ylabel("Counts")
        _ = ax.set_title(f'Module {mod} ({pdu})')
 ```

 %% Cell type:markdown id: tags:

 ### Maximum GAIN Preview

 %% Cell type:code id: tags:

 ``` python
 display(Markdown((f"#### The per pixel maximum of train {tid} of the GAIN data")))

 fig, ax = plt.subplots(figsize=(18, 10))
 gain_max = np.max(gain_train_cells, axis=(1, 2))
 geom.plot_data_fast(
    gain_max,
    ax=ax,
    cmap="jet",
    colorbar={'shrink': 1, 'pad': 0.01},
 )
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append(
        (item.name, f"{item.value:016b}"))
 md = display(Latex(tabulate.tabulate(
    table, tablefmt='latex',
    headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:markdown id: tags:

 ### Single Image Bad Pixels ###

 A single image bad pixel map for the first image of the first train

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f"#### Bad pixels image for train {tid}"))

 fig, ax = plt.subplots(figsize=(18, 10))
 if not strixel_sensor:
    geom.plot_data_fast(
        np.log2(mask_train),
        ax=ax,
        vmin=0, vmax=32, cmap="jet",
        colorbar={'shrink': 1, 'pad': 0.01},
    )
 else:
    ax.imshow(np.log2(mask_train).squeeze(), vmin=0, vmax=32, cmap='jet', aspect=10)

 plt.show()
 ```

--- a/notebooks/ePix100/Correction_ePix100_NBC.ipynb
+++ b/notebooks/ePix100/Correction_ePix100_NBC.ipynb
 %% Cell type:markdown id: tags:

 # ePix100 Data Correction

 Author: European XFEL Detector Group, Version: 2.0

 The following notebook provides data correction of images acquired with the ePix100 detector.

 The sequence of correction applied are:
 Offset --> Common Mode Noise --> Relative Gain --> Charge Sharing --> Absolute Gain.

 Offset, common mode and gain corrected data is saved to /data/image/pixels in the CORR files.

 If pattern classification is applied (charge sharing correction), this data will be saved to /data/image/pixels_classified, while the corresponding patterns will be saved to /data/image/patterns in the CORR files.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/HED/202202/p003121/raw" # input folder, required
 out_folder = ""  # output folder, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1]  # sequences to correct, set to -1 for all, range allowed
 sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
 run = 156  # which run to read data from, required

 # Parameters for accessing the raw data.
 karabo_id = "HED_IA1_EPX100-1"  # karabo karabo_id
 karabo_da = "EPIX01"  # data aggregators
 db_module = ""  # module id in the database
 receiver_template = "RECEIVER"  # detector receiver template for accessing raw data files
 path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5'  # the template to use to access data
 instrument_source_template = '{}/DET/{}:daqOutput'  # instrument detector data source in h5files

 # Parameters affecting writing corrected data.
 chunk_size_idim = 1  # H5 chunking size of output data
-
-# Only for testing
-limit_images = 0  # ONLY FOR TESTING. process only first N images, 0 - process all.
+limit_trains = 0  # Process only first N images, 0 - process all.

 # Parameters for the calibration database.
 cal_db_interface = "tcp://max-exfl016:8015#8025"  # calibration DB interface to use
 cal_db_timeout = 300000  # timeout on caldb requests
 creation_time = ""  # The timestamp to use with Calibration DBe. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41

 # Conditions for retrieving calibration constants.
 bias_voltage = 200  # bias voltage
 in_vacuum = False  # detector operated in vacuum
 integration_time = -1  # Detector integration time, Default value -1 to use the value from the slow data.
 fix_temperature = -1  # fixed temperature value in Kelvin, Default value -1 to use the value from files.
 gain_photon_energy = 8.048  # Photon energy used for gain calibration
 photon_energy = 0.  # Photon energy to calibrate in number of photons, 0 for calibration in keV

 # Flags to select type of applied corrections.
 pattern_classification = True  # do clustering.
 relative_gain = True  # Apply relative gain correction.
 absolute_gain = True  # Apply absolute gain correction (implies relative gain).
 common_mode = True  # Apply common mode correction.

 # Parameters affecting applied correction.
 cm_min_frac = 0.25  # No CM correction is performed if after masking the ratio of good pixels falls below this
 cm_noise_sigma = 5.  # CM correction noise standard deviation
 split_evt_primary_threshold = 7.  # primary threshold for split event correction
 split_evt_secondary_threshold = 5.  # secondary threshold for split event correction
 split_evt_mip_threshold = 1000.  # minimum ionizing particle threshold


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import tabulate
 import warnings
+from logging import warning
+from sys import exit

 import h5py
 import pasha as psh
 import numpy as np
 import matplotlib.pyplot as plt
 from IPython.display import Latex, display
 from extra_data import RunDirectory, H5File
 from pathlib import Path

+import cal_tools.restful_config as rest_cfg
 from XFELDetAna import xfelpyanatools as xana
 from XFELDetAna import xfelpycaltools as xcal
-from cal_tools import h5_copy_except
+from cal_tools.calcat_interface import EPIX100_CalibrationData
 from cal_tools.epix100 import epix100lib
+from cal_tools.files import DataFile
+from cal_tools.restful_config import restful_config
 from cal_tools.tools import (
    calcat_creation_time,
-    get_dir_creation_date,
-    get_constant_from_db,
-    load_specified_constants,
    CalibrationMetadata,
 )
 from cal_tools.step_timing import StepTimer
-from iCalibrationDB import (
-    Conditions,
-    Constants,
-)

 warnings.filterwarnings('ignore')

 prettyPlotting = True

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 x = 708  # rows of the ePix100
 y = 768  # columns of the ePix100

 if absolute_gain:
    relative_gain = True

 plot_unit = 'ADU'
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)

 out_folder.mkdir(parents=True, exist_ok=True)

 run_folder = in_folder / f"r{run:04d}"

 instrument_src = instrument_source_template.format(
    karabo_id, receiver_template)

 print(f"Correcting run: {run_folder}")
 print(f"Instrument H5File source: {instrument_src}")
 print(f"Data corrected files are stored at: {out_folder}")
 ```

 %% Cell type:code id: tags:

 ``` python
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f"Using {creation_time.isoformat()} as creation time")

 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # Constant paths are saved under retrieved-constants in calibration_metadata.yml.
 # NOTE: this notebook shouldn't overwrite calibration metadata file.
 const_yaml = metadata.get("retrieved-constants", {})
 ```

 %% Cell type:code id: tags:

 ``` python
 run_dc = RunDirectory(run_folder, _use_voview=False)

 seq_files = [Path(f.filename) for f in run_dc.select(f"*{karabo_id}*").files]

 # If a set of sequences requested to correct,
 # adapt seq_files list.
 if sequences != [-1]:
    seq_files = [f for f in seq_files if any(f.match(f"*-S{s:05d}.h5") for s in sequences)]

 if not len(seq_files):
    raise IndexError("No sequence files available for the selected sequences.")

 print(f"Processing a total of {len(seq_files)} sequence files")
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 sensorSize = [x, y]
 # Sensor area will be analysed according to blocksize
 blockSize = [sensorSize[0]//2, sensorSize[1]//2]
 xcal.defaultBlockSize = blockSize
 memoryCells = 1  # ePIX has no memory cells
 run_parallel = False

 # Read control data.
 ctrl_data = epix100lib.epix100Ctrl(
    run_dc=run_dc,
-    instrument_src=f"{karabo_id}/DET/{receiver_template}:daqOutput",
+    instrument_src=instrument_src,
    ctrl_src=f"{karabo_id}/DET/CONTROL",
    )

 if integration_time < 0:
    integration_time = ctrl_data.get_integration_time()
    integration_time_str_add = ""
 else:
    integration_time_str_add = "(manual input)"

 if fix_temperature < 0:
    temperature = ctrl_data.get_temprature()
    temperature_k = temperature + 273.15
    temp_str_add = ""
 else:
    temperature_k = fix_temperature
    temperature = fix_temperature - 273.15
    temp_str_add = "(manual input)"

 print(f"Bias voltage is {bias_voltage} V")
 print(f"Detector integration time is set to {integration_time} \u03BCs {integration_time_str_add}")
 print(f"Mean temperature: {temperature:0.2f}°C / {temperature_k:0.2f} K {temp_str_add}")
 print(f"Operated in vacuum: {in_vacuum}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Table of sequence files to process
 table = [(k, f) for k, f in enumerate(seq_files)]

 if len(table):
    md = display(Latex(tabulate.tabulate(
        table,
        tablefmt='latex',
        headers=["#", "file"]
    )))
 ```

 %% Cell type:markdown id: tags:

 ## Retrieving calibration constants

 As a first step, dark maps have to be loaded.

 %% Cell type:code id: tags:

 ``` python
-cond_dict = {
-    "bias_voltage": bias_voltage,
-    "integration_time": integration_time,
-    "temperature": temperature_k,
-    "in_vacuum": in_vacuum,
-}
-
-dark_condition = Conditions.Dark.ePix100(**cond_dict)
-
-# update conditions with illuminated conditins.
-cond_dict.update({
-        "photon_energy": gain_photon_energy
-    })
-
-illum_condition = Conditions.Illuminated.ePix100(**cond_dict)
-
-const_cond = {
-    "Offset": dark_condition,
-    "Noise": dark_condition,
-    "RelativeGain": illum_condition,
-}
-```
+constant_names = ["OffsetEPix100", "NoiseEPix100"]
+if relative_gain:
+    constant_names += ["RelativeGainEPix100"]

-%% Cell type:code id: tags:
+const_data = dict()

-``` python
-empty_constant = np.zeros((708, 768, 1), dtype=np.float32)
 if const_yaml:  #  Used while reproducing corrected data.
    print(f"Using stored constants in {metadata.filename}")
-    const_data, _ = load_specified_constants(const_yaml[karabo_da]["constants"])
-    for cname, cval in const_data.items():
-        if cval is None and cname != "RelativeGain":
-            const_data[cname] = empty_constant
-else:  # First correction attempt.
-    const_data = dict()
-    for cname, condition in const_cond.items():
-        # Avoid retrieving RelativeGain, if not needed for correction.
-        if cname == "RelativeGain" and not relative_gain:
-            const_data[cname] = None
-        else:
-            const_data[cname] = get_constant_from_db(
-                karabo_id=karabo_id,
-                karabo_da=karabo_da,
-                constant=getattr(Constants.ePix100, cname)(),
-                condition=condition,
-                empty_constant=None if cname == "RelativeGain" else empty_constant,
-                cal_db_interface=cal_db_interface,
-                creation_time=creation_time,
-                print_once=2,
-                timeout=cal_db_timeout
-    )
+    when = dict()
+    for cname, mdata in const_yaml[karabo_da]["constants"].items():
+        const_data[cname] = dict()
+        when[cname] = mdata["creation-time"]
+        if when[cname]:
+            with h5py.File(mdata["path"], "r") as cf:
+                const_data[cname] = np.copy(
+                    cf[f"{mdata['dataset']}/data"])
+else:
+    epix_cal = EPIX100_CalibrationData(
+        detector_name=karabo_id,
+        sensor_bias_voltage=bias_voltage,
+        integration_time=integration_time,
+        sensor_temperature=temperature_k,
+        in_vacuum=in_vacuum,
+        source_energy=gain_photon_energy,
+        event_at=creation_time,
+        client=rest_cfg.calibration_client(),
+    )
+    const_data = epix_cal.ndarray_map(calibrations=constant_names)[karabo_da]
+
+# Validate the constants availability and raise/warn correspondingly.
+missing_dark_constants = {"OffsetEPix100", "NoiseEPix100"} - set(const_data)
+if missing_dark_constants:
+    raise ValueError(
+        f"Dark constants {missing_dark_constants} are not available to correct {karabo_da}."
+        "No correction is performed!")
+
+if relative_gain and "RelativeGainEPix100" not in const_data.keys():
+    warning("RelativeGainEPix100 is not found in the calibration database.")
+    relative_gain = False
+    absolute_gain = False
 ```

 %% Cell type:code id: tags:

 ``` python
-if relative_gain and const_data.get("RelativeGain", None) is None:
-    print(
-        "WARNING: RelativeGain map is requested, but not found.\n"
-        "No gain correction will be applied"
-    )
-    relative_gain = False
-    absolute_gain = False
-
 # Initializing some parameters.
 hscale = 1
 stats = True
 hrange = np.array([-50, 1000])
 nbins = hrange[1] - hrange[0]
 commonModeBlockSize = [x//2, y//2]
 ```

 %% Cell type:code id: tags:

 ``` python
 histCalOffsetCor = xcal.HistogramCalculator(
    sensorSize,
    bins=nbins,
    range=hrange,
    parallel=run_parallel,
    nCells=memoryCells,
    blockSize=blockSize
 )

 # *****************Histogram Calculators****************** #
 histCalCor = xcal.HistogramCalculator(
    sensorSize,
    bins=1050,
    range=[-50, 1000],
    parallel=run_parallel,
    nCells=memoryCells,
    blockSize=blockSize
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 if common_mode:
    histCalCMCor = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize,
    )
    cmCorrectionB = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='block',
        nCells=memoryCells,
-        noiseMap=const_data['Noise'],
+        noiseMap=const_data['NoiseEPix100'],
        runParallel=run_parallel,
        parallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
    cmCorrectionR = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='row',
        nCells=memoryCells,
-        noiseMap=const_data['Noise'],
+        noiseMap=const_data['NoiseEPix100'],
        runParallel=run_parallel,
        parallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
    cmCorrectionC = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='col',
        nCells=memoryCells,
-        noiseMap=const_data['Noise'],
+        noiseMap=const_data['NoiseEPix100'],
        runParallel=run_parallel,
        parallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
 ```

 %% Cell type:code id: tags:

 ``` python
 if relative_gain:
-    gain_cnst = np.median(const_data["RelativeGain"])
+    gain_cnst = np.median(const_data["RelativeGainEPix100"])
    hscale = gain_cnst
    plot_unit = 'keV'
    if photon_energy > 0:
        plot_unit = '$\gamma$'
        hscale /= photon_energy

    gainCorrection = xcal.RelativeGainCorrection(
        sensorSize,
-        gain_cnst/const_data["RelativeGain"][..., None],
+        gain_cnst/const_data["RelativeGainEPix100"][..., None],
        nCells=memoryCells,
        parallel=run_parallel,
        blockSize=blockSize,
        gains=None,
    )

    histCalRelGainCor = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize
    )

    if absolute_gain:
        histCalAbsGainCor = xcal.HistogramCalculator(
            sensorSize,
            bins=nbins,
            range=hrange*hscale,
            parallel=run_parallel,
            nCells=memoryCells,
            blockSize=blockSize
        )
 ```

 %% Cell type:code id: tags:

 ``` python
 if pattern_classification :
    patternClassifier = xcal.PatternClassifier(
        [x, y],
-        const_data["Noise"],
+        const_data["NoiseEPix100"],
        split_evt_primary_threshold,
        split_evt_secondary_threshold,
        split_evt_mip_threshold,
        tagFirstSingles=0,
        nCells=memoryCells,
        allowElongated=False,
        blockSize=[x, y],
        parallel=run_parallel,
    )
    histCalCSCor = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize,
    )
    histCalGainCorClusters = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange*hscale,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize
    )
    histCalGainCorSingles = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange*hscale,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize
    )
 ```

 %% Cell type:markdown id: tags:

 ## Applying corrections

 %% Cell type:code id: tags:

 ``` python
 def correct_train(wid, index, tid, d):

-    d = d[pixel_data[0]][pixel_data[1]][..., np.newaxis].astype(np.float32)
+    d = d[..., np.newaxis].astype(np.float32)
    d = np.compress(
        np.any(d > 0, axis=(0, 1)), d, axis=2)

    # Offset correction.
-    d -= const_data["Offset"]
+    d -= const_data["OffsetEPix100"]

    histCalOffsetCor.fill(d)
    # Common Mode correction.
    if common_mode:
        # Block CM
        d = cmCorrectionB.correct(d)
        # Row CM
        d = cmCorrectionR.correct(d)
        # COL CM
        d = cmCorrectionC.correct(d)
        histCalCMCor.fill(d)

    # relative gain correction.
    if relative_gain:
        d = gainCorrection.correct(d)
        histCalRelGainCor.fill(d)

    """The gain correction is currently applying
    an absolute correction (not a relative correction
    as the implied by the name);
    it changes the scale (the unit of measurement)
    of the data from ADU to either keV or n_of_photons.
    But the pattern classification relies on comparing
-    data with the noise map, which is still in ADU.
+    data with the NoiseEPix100 map, which is still in ADU.

    The best solution is to do a relative gain
    correction first and apply the global absolute
    gain to the data at the end, after clustering.
    """

    if pattern_classification:

        d_clu, patterns = patternClassifier.classify(d)
-        d_clu[d_clu < (split_evt_primary_threshold*const_data["Noise"])] = 0
+        d_clu[d_clu < (split_evt_primary_threshold*const_data["NoiseEPix100"])] = 0

        data_clu[index, ...] = np.squeeze(d_clu)
        data_patterns[index, ...] = np.squeeze(patterns)

        histCalCSCor.fill(d_clu)

    # absolute gain correction
    # changes data from ADU to keV (or n. of photons)
    if absolute_gain:

        d = d * gain_cnst
        if photon_energy > 0:
            d /= photon_energy
        histCalAbsGainCor.fill(d)

        if pattern_classification:
            # Modify pattern classification.
            d_clu = d_clu * gain_cnst

            if photon_energy > 0:
                d_clu /= photon_energy

            data_clu[index, ...] = np.squeeze(d_clu)

            histCalGainCorClusters.fill(d_clu)

            d_sing = d_clu[patterns==100] # pattern 100 corresponds to single photons events
            if len(d_sing):
                histCalGainCorSingles.fill(d_sing)

    data[index, ...] = np.squeeze(d)
    histCalCor.fill(d)
 ```

 %% Cell type:code id: tags:

 ``` python
-pixel_data = (instrument_src, "data.image.pixels")
-
 # 10 is a number chosen after testing 1 ... 71 parallel threads
 context = psh.context.ThreadContext(num_workers=10)
 ```

 %% Cell type:code id: tags:

 ``` python
+empty_seq = 0
+
 for f in seq_files:

    seq_dc = H5File(f)
-
-    n_imgs = seq_dc.get_data_counts(*pixel_data).shape[0]
+    # Save corrected data in an output file with name
+    # of corresponding raw sequence file.
+    out_file = out_folder / f.name.replace("RAW", "CORR")

    # Data shape in seq_dc excluding trains with empty images.
-    dshape = seq_dc[pixel_data].shape
-    dataset_chunk = ((chunk_size_idim,) + dshape[1:])  # e.g. (1, pixels_x, pixels_y)
-
-    if n_imgs - dshape[0] != 0:
-        print(f"- WARNING: {f} has {n_imgs - dshape[0]} trains with empty data.")
+    ishape = seq_dc[instrument_src, "data.image.pixels"].shape
+    corr_ntrains = ishape[0]
+    all_train_ids = seq_dc.train_ids
+
+    # Raise a WARNING if this sequence has no trains to correct.
+    # Otherwise, print number of trains with no data.
+    if corr_ntrains == 0:
+        warning(f"No trains to correct for {f.name}: "
+                "Skipping the processing of this file.")
+        empty_seq += 1
+        continue
+    elif len(all_train_ids) != corr_ntrains:
+        print(f"{f.name} has {len(all_train_ids) - corr_ntrains} trains with missing data.")

    # This parameter is only used for testing.
-    if limit_images > 0:
-        n_imgs = min(n_imgs, limit_images)
+    if limit_trains > 0:
+        print(f"\nCorrected trains are limited to: {limit_trains} trains")
+        corr_ntrains = min(corr_ntrains, limit_trains)
+    oshape = (corr_ntrains, *ishape[1:])

-    data = context.alloc(shape=dshape, dtype=np.float32)
+    data = context.alloc(shape=oshape, dtype=np.float32)

    if pattern_classification:
-        data_clu = context.alloc(shape=dshape, dtype=np.float32)
-        data_patterns = context.alloc(shape=dshape, dtype=np.int32)
+        data_clu = context.alloc(shape=oshape, dtype=np.float32)
+        data_patterns = context.alloc(shape=oshape, dtype=np.int32)

-    step_timer.start()
+    step_timer.start()  # Correct data.

-    context.map(
-        correct_train, seq_dc.select(
-            *pixel_data, require_all=True).select_trains(np.s_[:n_imgs])
-    )
-    step_timer.done_step(f'Correcting {n_imgs} trains.')
+    # Overwrite seq_dc after eliminating empty trains or/and applying limited images.
+    seq_dc = seq_dc.select(
+        instrument_src, "*", require_all=True).select_trains(np.s_[:corr_ntrains])

-    # Store detector h5 information in the corrected file
-    # and deselect data to correct and store later.
-    step_timer.start()
+    pixel_data = seq_dc[instrument_src, "data.image.pixels"]
+    context.map(correct_train, pixel_data)

-    out_file = out_folder / f.name.replace("RAW", "CORR")
-    data_path = "INSTRUMENT/"+instrument_src+"/data/image"
-    pixels_path = f"{data_path}/pixels"
+    step_timer.done_step(f'Correcting {corr_ntrains} trains.')
+
+    step_timer.start()  # Write corrected data.
+
+    # Create CORR files and add corrected data sections.
+    image_counts = seq_dc[instrument_src, "data.image.pixels"].data_counts(labelled=False)
+
+    # Write corrected data.
+    with DataFile(out_file, "w") as ofile:
+        dataset_chunk = ((chunk_size_idim,) + oshape[1:])  # e.g. (1, pixels_x, pixels_y)
+
+        # Create INDEX datasets.
+        ofile.create_index(seq_dc.train_ids, from_file=seq_dc.files[0])
+        # Create METDATA datasets
+        ofile.create_metadata(
+            like=seq_dc,
+            sequence=seq_dc.run_metadata()["sequenceNumber"],
+            instrument_channels=(f'{instrument_src}/data',)
+        )
+        # Create Instrument section to later add corrected datasets.
+        outp_source = ofile.create_instrument_source(instrument_src)
+
+        # Create count/first datasets at INDEX source.
+        outp_source.create_index(data=image_counts)

-    # First copy all raw data source to the corrected file,
-    # while excluding the raw data image /data/image/pixels.
-    with h5py.File(out_file, 'w') as ofile:
-        # Copy RAW non-calibrated sources.
-        with h5py.File(f, 'r') as sfile:
-            h5_copy_except.h5_copy_except_paths(
-                sfile, ofile,
-                [pixels_path])
-
-        # Create dataset in CORR h5 file and add corrected images.
-        dataset = ofile.create_dataset(
-            pixels_path,
-            data=data,
-            chunks=dataset_chunk,
-            dtype=np.float32)
+        # Store uncorrected RAW image datasets for the corrected trains.

+        data_raw_fields = [  # /data/
+            "ambTemp", "analogCurr", "analogInputVolt", "backTemp",
+            "digitalInputVolt", "guardCurr", "relHumidity", "digitalCurr"
+        ]
+        for field in data_raw_fields:
+            field_arr = seq_dc[instrument_src, f"data.{field}"].ndarray()
+
+            outp_source.create_key(
+                f"data.{field}", data=field_arr,
+                chunks=(chunk_size_idim, *field_arr.shape[1:]))
+
+        image_raw_fields = [  # /data/image/
+            "binning", "bitsPerPixel", "dimTypes", "dims",
+            "encoding", "flipX", "flipY", "roiOffsets", "rotation",
+        ]
+        for field in image_raw_fields:
+            field_arr = seq_dc[instrument_src, f"data.image.{field}"].ndarray()
+
+            outp_source.create_key(
+                f"data.image.{field}", data=field_arr,
+                chunks=(chunk_size_idim, *field_arr.shape[1:]))
+
+        # Add main corrected `data.image.pixels` dataset and store corrected data.
+        outp_source.create_key(
+            "data.image.pixels", data=data, chunks=dataset_chunk)
+        outp_source.create_key(
+            "data.trainId", data=seq_dc.train_ids, chunks=min(50, len(seq_dc.train_ids)))
        if pattern_classification:
-            # Save /data/image/pixels_classified in corrected file.
-            datasetc = ofile.create_dataset(
-                f"{data_path}/pixels_classified",
-                data=data_clu,
-                chunks=dataset_chunk,
-                dtype=np.float32)
-
-            # Save /data/image/patterns in corrected file.
-            datasetp = ofile.create_dataset(
-                f"{data_path}/patterns",
-                data=data_patterns,
-                chunks=dataset_chunk,
-                dtype=np.int32)
+            # Add main corrected `data.image.pixels` dataset and store corrected data.
+            outp_source.create_key(
+                "data.image.pixels_classified", data=data_clu, chunks=dataset_chunk)
+            outp_source.create_key(
+                "data.image.patterns", data=data_patterns, chunks=dataset_chunk)

        step_timer.done_step('Storing data.')
+if empty_seq == len(seq_files):
+    warning("No valid trains for RAW data to correct.")
+    exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 ho, eo, co, so = histCalCor.get()

 d = [{
    'x': co,
    'y': ho,
    'y_err': np.sqrt(ho[:]),
    'drawstyle': 'steps-mid',
    'errorstyle': 'bars',
    'errorcoarsing': 2,
    'label': 'Total corr.'
 }]

 ho, eo, co, so = histCalOffsetCor.get()

 d.append({
    'x': co,
    'y': ho,
    'y_err': np.sqrt(ho[:]),
    'drawstyle': 'steps-mid',
    'errorstyle': 'bars',
    'errorcoarsing': 2,
    'label': 'Offset corr.'
 })

 if common_mode:
    ho, eo, co, so = histCalCMCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'CM corr.'
    })

 if relative_gain :
    ho, eo, co, so = histCalRelGainCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'Relative gain corr.'
    })

 if pattern_classification:
    ho, eo, co, so = histCalCSCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'Charge sharing corr.'
    })

 fig = xana.simplePlot(
    d, aspect=1, x_label=f'Energy (ADU)',
    y_label='Number of occurrences', figsize='2col',
    y_log=True, x_range=(-50, 500),
    legend='top-center-frame-2col',
 )
 plt.title(f'run {run} - {karabo_da}')
 plt.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 if absolute_gain :
    d=[]
    ho, eo, co, so = histCalAbsGainCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'Absolute gain corr.'
    })

    if pattern_classification:
        ho, eo, co, so = histCalGainCorClusters.get()
        d.append({
            'x': co,
            'y': ho,
            'y_err': np.sqrt(ho[:]),
            'drawstyle': 'steps-mid',
            'errorstyle': 'bars',
            'errorcoarsing': 2,
            'label': 'Charge sharing corr.'
        })

        ho, eo, co, so = histCalGainCorSingles.get()
        d.append({
            'x': co,
            'y': ho,
            'y_err': np.sqrt(ho[:]),
            'drawstyle': 'steps-mid',
            'errorstyle': 'bars',
            'errorcoarsing': 2,
            'label': 'Isolated photons (singles)'
        })

    fig = xana.simplePlot(
        d, aspect=1, x_label=f'Energy ({plot_unit})',
        y_label='Number of occurrences', figsize='2col',
        y_log=True,
        x_range=np.array((-50, 500))*hscale,
        legend='top-center-frame-2col',
    )
    plt.grid()
    plt.title(f'run {run} - {karabo_da}')
 ```

 %% Cell type:markdown id: tags:

 ## Mean Image of the corrected data

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 fig = xana.heatmapPlot(
    np.nanmedian(data, axis=0),
    x_label='Columns', y_label='Rows',
    lut_label=f'Signal ({plot_unit})',
    x_range=(0, y),
    y_range=(0, x),
    vmin=-50, vmax=50)
 step_timer.done_step(f'Plotting mean image of {data.shape[0]} trains.')
 ```

 %% Cell type:markdown id: tags:

 ## Single Shot of the corrected data

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 fig = xana.heatmapPlot(
    data[0, ...],
    x_label='Columns', y_label='Rows',
    lut_label=f'Signal ({plot_unit})',
    x_range=(0, y),
    y_range=(0, x),
    vmin=-50, vmax=50)
 step_timer.done_step(f'Plotting single shot of corrected data.')
 ```

 %% Cell type:markdown id: tags:

 # ePix100 Data Correction

 Author: European XFEL Detector Group, Version: 2.0

 The following notebook provides data correction of images acquired with the ePix100 detector.

 The sequence of correction applied are:
 Offset --> Common Mode Noise --> Relative Gain --> Charge Sharing --> Absolute Gain.

 Offset, common mode and gain corrected data is saved to /data/image/pixels in the CORR files.

 If pattern classification is applied (charge sharing correction), this data will be saved to /data/image/pixels_classified, while the corresponding patterns will be saved to /data/image/patterns in the CORR files.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/HED/202202/p003121/raw" # input folder, required
 out_folder = ""  # output folder, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1]  # sequences to correct, set to -1 for all, range allowed
 sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
 run = 156  # which run to read data from, required

 # Parameters for accessing the raw data.
 karabo_id = "HED_IA1_EPX100-1"  # karabo karabo_id
 karabo_da = "EPIX01"  # data aggregators
 db_module = ""  # module id in the database
 receiver_template = "RECEIVER"  # detector receiver template for accessing raw data files
 path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5'  # the template to use to access data
 instrument_source_template = '{}/DET/{}:daqOutput'  # instrument detector data source in h5files

 # Parameters affecting writing corrected data.
 chunk_size_idim = 1  # H5 chunking size of output data
-
-# Only for testing
-limit_images = 0  # ONLY FOR TESTING. process only first N images, 0 - process all.
+limit_trains = 0  # Process only first N images, 0 - process all.

 # Parameters for the calibration database.
 cal_db_interface = "tcp://max-exfl016:8015#8025"  # calibration DB interface to use
 cal_db_timeout = 300000  # timeout on caldb requests
 creation_time = ""  # The timestamp to use with Calibration DBe. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41

 # Conditions for retrieving calibration constants.
 bias_voltage = 200  # bias voltage
 in_vacuum = False  # detector operated in vacuum
 integration_time = -1  # Detector integration time, Default value -1 to use the value from the slow data.
 fix_temperature = -1  # fixed temperature value in Kelvin, Default value -1 to use the value from files.
 gain_photon_energy = 8.048  # Photon energy used for gain calibration
 photon_energy = 0.  # Photon energy to calibrate in number of photons, 0 for calibration in keV

 # Flags to select type of applied corrections.
 pattern_classification = True  # do clustering.
 relative_gain = True  # Apply relative gain correction.
 absolute_gain = True  # Apply absolute gain correction (implies relative gain).
 common_mode = True  # Apply common mode correction.

 # Parameters affecting applied correction.
 cm_min_frac = 0.25  # No CM correction is performed if after masking the ratio of good pixels falls below this
 cm_noise_sigma = 5.  # CM correction noise standard deviation
 split_evt_primary_threshold = 7.  # primary threshold for split event correction
 split_evt_secondary_threshold = 5.  # secondary threshold for split event correction
 split_evt_mip_threshold = 1000.  # minimum ionizing particle threshold


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import tabulate
 import warnings
+from logging import warning
+from sys import exit

 import h5py
 import pasha as psh
 import numpy as np
 import matplotlib.pyplot as plt
 from IPython.display import Latex, display
 from extra_data import RunDirectory, H5File
 from pathlib import Path

+import cal_tools.restful_config as rest_cfg
 from XFELDetAna import xfelpyanatools as xana
 from XFELDetAna import xfelpycaltools as xcal
-from cal_tools import h5_copy_except
+from cal_tools.calcat_interface import EPIX100_CalibrationData
 from cal_tools.epix100 import epix100lib
+from cal_tools.files import DataFile
+from cal_tools.restful_config import restful_config
 from cal_tools.tools import (
    calcat_creation_time,
-    get_dir_creation_date,
-    get_constant_from_db,
-    load_specified_constants,
    CalibrationMetadata,
 )
 from cal_tools.step_timing import StepTimer
-from iCalibrationDB import (
-    Conditions,
-    Constants,
-)

 warnings.filterwarnings('ignore')

 prettyPlotting = True

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 x = 708  # rows of the ePix100
 y = 768  # columns of the ePix100

 if absolute_gain:
    relative_gain = True

 plot_unit = 'ADU'
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)

 out_folder.mkdir(parents=True, exist_ok=True)

 run_folder = in_folder / f"r{run:04d}"

 instrument_src = instrument_source_template.format(
    karabo_id, receiver_template)

 print(f"Correcting run: {run_folder}")
 print(f"Instrument H5File source: {instrument_src}")
 print(f"Data corrected files are stored at: {out_folder}")
 ```

 %% Cell type:code id: tags:

 ``` python
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f"Using {creation_time.isoformat()} as creation time")

 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # Constant paths are saved under retrieved-constants in calibration_metadata.yml.
 # NOTE: this notebook shouldn't overwrite calibration metadata file.
 const_yaml = metadata.get("retrieved-constants", {})
 ```

 %% Cell type:code id: tags:

 ``` python
 run_dc = RunDirectory(run_folder, _use_voview=False)

 seq_files = [Path(f.filename) for f in run_dc.select(f"*{karabo_id}*").files]

 # If a set of sequences requested to correct,
 # adapt seq_files list.
 if sequences != [-1]:
    seq_files = [f for f in seq_files if any(f.match(f"*-S{s:05d}.h5") for s in sequences)]

 if not len(seq_files):
    raise IndexError("No sequence files available for the selected sequences.")

 print(f"Processing a total of {len(seq_files)} sequence files")
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 sensorSize = [x, y]
 # Sensor area will be analysed according to blocksize
 blockSize = [sensorSize[0]//2, sensorSize[1]//2]
 xcal.defaultBlockSize = blockSize
 memoryCells = 1  # ePIX has no memory cells
 run_parallel = False

 # Read control data.
 ctrl_data = epix100lib.epix100Ctrl(
    run_dc=run_dc,
-    instrument_src=f"{karabo_id}/DET/{receiver_template}:daqOutput",
+    instrument_src=instrument_src,
    ctrl_src=f"{karabo_id}/DET/CONTROL",
    )

 if integration_time < 0:
    integration_time = ctrl_data.get_integration_time()
    integration_time_str_add = ""
 else:
    integration_time_str_add = "(manual input)"

 if fix_temperature < 0:
    temperature = ctrl_data.get_temprature()
    temperature_k = temperature + 273.15
    temp_str_add = ""
 else:
    temperature_k = fix_temperature
    temperature = fix_temperature - 273.15
    temp_str_add = "(manual input)"

 print(f"Bias voltage is {bias_voltage} V")
 print(f"Detector integration time is set to {integration_time} \u03BCs {integration_time_str_add}")
 print(f"Mean temperature: {temperature:0.2f}°C / {temperature_k:0.2f} K {temp_str_add}")
 print(f"Operated in vacuum: {in_vacuum}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Table of sequence files to process
 table = [(k, f) for k, f in enumerate(seq_files)]

 if len(table):
    md = display(Latex(tabulate.tabulate(
        table,
        tablefmt='latex',
        headers=["#", "file"]
    )))
 ```

 %% Cell type:markdown id: tags:

 ## Retrieving calibration constants

 As a first step, dark maps have to be loaded.

 %% Cell type:code id: tags:

 ``` python
-cond_dict = {
-    "bias_voltage": bias_voltage,
-    "integration_time": integration_time,
-    "temperature": temperature_k,
-    "in_vacuum": in_vacuum,
-}
-
-dark_condition = Conditions.Dark.ePix100(**cond_dict)
-
-# update conditions with illuminated conditins.
-cond_dict.update({
-        "photon_energy": gain_photon_energy
-    })
-
-illum_condition = Conditions.Illuminated.ePix100(**cond_dict)
-
-const_cond = {
-    "Offset": dark_condition,
-    "Noise": dark_condition,
-    "RelativeGain": illum_condition,
-}
-```
+constant_names = ["OffsetEPix100", "NoiseEPix100"]
+if relative_gain:
+    constant_names += ["RelativeGainEPix100"]

-%% Cell type:code id: tags:
+const_data = dict()

-``` python
-empty_constant = np.zeros((708, 768, 1), dtype=np.float32)
 if const_yaml:  #  Used while reproducing corrected data.
    print(f"Using stored constants in {metadata.filename}")
-    const_data, _ = load_specified_constants(const_yaml[karabo_da]["constants"])
-    for cname, cval in const_data.items():
-        if cval is None and cname != "RelativeGain":
-            const_data[cname] = empty_constant
-else:  # First correction attempt.
-    const_data = dict()
-    for cname, condition in const_cond.items():
-        # Avoid retrieving RelativeGain, if not needed for correction.
-        if cname == "RelativeGain" and not relative_gain:
-            const_data[cname] = None
-        else:
-            const_data[cname] = get_constant_from_db(
-                karabo_id=karabo_id,
-                karabo_da=karabo_da,
-                constant=getattr(Constants.ePix100, cname)(),
-                condition=condition,
-                empty_constant=None if cname == "RelativeGain" else empty_constant,
-                cal_db_interface=cal_db_interface,
-                creation_time=creation_time,
-                print_once=2,
-                timeout=cal_db_timeout
-    )
+    when = dict()
+    for cname, mdata in const_yaml[karabo_da]["constants"].items():
+        const_data[cname] = dict()
+        when[cname] = mdata["creation-time"]
+        if when[cname]:
+            with h5py.File(mdata["path"], "r") as cf:
+                const_data[cname] = np.copy(
+                    cf[f"{mdata['dataset']}/data"])
+else:
+    epix_cal = EPIX100_CalibrationData(
+        detector_name=karabo_id,
+        sensor_bias_voltage=bias_voltage,
+        integration_time=integration_time,
+        sensor_temperature=temperature_k,
+        in_vacuum=in_vacuum,
+        source_energy=gain_photon_energy,
+        event_at=creation_time,
+        client=rest_cfg.calibration_client(),
+    )
+    const_data = epix_cal.ndarray_map(calibrations=constant_names)[karabo_da]
+
+# Validate the constants availability and raise/warn correspondingly.
+missing_dark_constants = {"OffsetEPix100", "NoiseEPix100"} - set(const_data)
+if missing_dark_constants:
+    raise ValueError(
+        f"Dark constants {missing_dark_constants} are not available to correct {karabo_da}."
+        "No correction is performed!")
+
+if relative_gain and "RelativeGainEPix100" not in const_data.keys():
+    warning("RelativeGainEPix100 is not found in the calibration database.")
+    relative_gain = False
+    absolute_gain = False
 ```

 %% Cell type:code id: tags:

 ``` python
-if relative_gain and const_data.get("RelativeGain", None) is None:
-    print(
-        "WARNING: RelativeGain map is requested, but not found.\n"
-        "No gain correction will be applied"
-    )
-    relative_gain = False
-    absolute_gain = False
-
 # Initializing some parameters.
 hscale = 1
 stats = True
 hrange = np.array([-50, 1000])
 nbins = hrange[1] - hrange[0]
 commonModeBlockSize = [x//2, y//2]
 ```

 %% Cell type:code id: tags:

 ``` python
 histCalOffsetCor = xcal.HistogramCalculator(
    sensorSize,
    bins=nbins,
    range=hrange,
    parallel=run_parallel,
    nCells=memoryCells,
    blockSize=blockSize
 )

 # *****************Histogram Calculators****************** #
 histCalCor = xcal.HistogramCalculator(
    sensorSize,
    bins=1050,
    range=[-50, 1000],
    parallel=run_parallel,
    nCells=memoryCells,
    blockSize=blockSize
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 if common_mode:
    histCalCMCor = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize,
    )
    cmCorrectionB = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='block',
        nCells=memoryCells,
-        noiseMap=const_data['Noise'],
+        noiseMap=const_data['NoiseEPix100'],
        runParallel=run_parallel,
        parallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
    cmCorrectionR = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='row',
        nCells=memoryCells,
-        noiseMap=const_data['Noise'],
+        noiseMap=const_data['NoiseEPix100'],
        runParallel=run_parallel,
        parallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
    cmCorrectionC = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='col',
        nCells=memoryCells,
-        noiseMap=const_data['Noise'],
+        noiseMap=const_data['NoiseEPix100'],
        runParallel=run_parallel,
        parallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
 ```

 %% Cell type:code id: tags:

 ``` python
 if relative_gain:
-    gain_cnst = np.median(const_data["RelativeGain"])
+    gain_cnst = np.median(const_data["RelativeGainEPix100"])
    hscale = gain_cnst
    plot_unit = 'keV'
    if photon_energy > 0:
        plot_unit = '$\gamma$'
        hscale /= photon_energy

    gainCorrection = xcal.RelativeGainCorrection(
        sensorSize,
-        gain_cnst/const_data["RelativeGain"][..., None],
+        gain_cnst/const_data["RelativeGainEPix100"][..., None],
        nCells=memoryCells,
        parallel=run_parallel,
        blockSize=blockSize,
        gains=None,
    )

    histCalRelGainCor = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize
    )

    if absolute_gain:
        histCalAbsGainCor = xcal.HistogramCalculator(
            sensorSize,
            bins=nbins,
            range=hrange*hscale,
            parallel=run_parallel,
            nCells=memoryCells,
            blockSize=blockSize
        )
 ```

 %% Cell type:code id: tags:

 ``` python
 if pattern_classification :
    patternClassifier = xcal.PatternClassifier(
        [x, y],
-        const_data["Noise"],
+        const_data["NoiseEPix100"],
        split_evt_primary_threshold,
        split_evt_secondary_threshold,
        split_evt_mip_threshold,
        tagFirstSingles=0,
        nCells=memoryCells,
        allowElongated=False,
        blockSize=[x, y],
        parallel=run_parallel,
    )
    histCalCSCor = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize,
    )
    histCalGainCorClusters = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange*hscale,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize
    )
    histCalGainCorSingles = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange*hscale,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize
    )
 ```

 %% Cell type:markdown id: tags:

 ## Applying corrections

 %% Cell type:code id: tags:

 ``` python
 def correct_train(wid, index, tid, d):

-    d = d[pixel_data[0]][pixel_data[1]][..., np.newaxis].astype(np.float32)
+    d = d[..., np.newaxis].astype(np.float32)
    d = np.compress(
        np.any(d > 0, axis=(0, 1)), d, axis=2)

    # Offset correction.
-    d -= const_data["Offset"]
+    d -= const_data["OffsetEPix100"]

    histCalOffsetCor.fill(d)
    # Common Mode correction.
    if common_mode:
        # Block CM
        d = cmCorrectionB.correct(d)
        # Row CM
        d = cmCorrectionR.correct(d)
        # COL CM
        d = cmCorrectionC.correct(d)
        histCalCMCor.fill(d)

    # relative gain correction.
    if relative_gain:
        d = gainCorrection.correct(d)
        histCalRelGainCor.fill(d)

    """The gain correction is currently applying
    an absolute correction (not a relative correction
    as the implied by the name);
    it changes the scale (the unit of measurement)
    of the data from ADU to either keV or n_of_photons.
    But the pattern classification relies on comparing
-    data with the noise map, which is still in ADU.
+    data with the NoiseEPix100 map, which is still in ADU.

    The best solution is to do a relative gain
    correction first and apply the global absolute
    gain to the data at the end, after clustering.
    """

    if pattern_classification:

        d_clu, patterns = patternClassifier.classify(d)
-        d_clu[d_clu < (split_evt_primary_threshold*const_data["Noise"])] = 0
+        d_clu[d_clu < (split_evt_primary_threshold*const_data["NoiseEPix100"])] = 0

        data_clu[index, ...] = np.squeeze(d_clu)
        data_patterns[index, ...] = np.squeeze(patterns)

        histCalCSCor.fill(d_clu)

    # absolute gain correction
    # changes data from ADU to keV (or n. of photons)
    if absolute_gain:

        d = d * gain_cnst
        if photon_energy > 0:
            d /= photon_energy
        histCalAbsGainCor.fill(d)

        if pattern_classification:
            # Modify pattern classification.
            d_clu = d_clu * gain_cnst

            if photon_energy > 0:
                d_clu /= photon_energy

            data_clu[index, ...] = np.squeeze(d_clu)

            histCalGainCorClusters.fill(d_clu)

            d_sing = d_clu[patterns==100] # pattern 100 corresponds to single photons events
            if len(d_sing):
                histCalGainCorSingles.fill(d_sing)

    data[index, ...] = np.squeeze(d)
    histCalCor.fill(d)
 ```

 %% Cell type:code id: tags:

 ``` python
-pixel_data = (instrument_src, "data.image.pixels")
-
 # 10 is a number chosen after testing 1 ... 71 parallel threads
 context = psh.context.ThreadContext(num_workers=10)
 ```

 %% Cell type:code id: tags:

 ``` python
+empty_seq = 0
+
 for f in seq_files:

    seq_dc = H5File(f)
-
-    n_imgs = seq_dc.get_data_counts(*pixel_data).shape[0]
+    # Save corrected data in an output file with name
+    # of corresponding raw sequence file.
+    out_file = out_folder / f.name.replace("RAW", "CORR")

    # Data shape in seq_dc excluding trains with empty images.
-    dshape = seq_dc[pixel_data].shape
-    dataset_chunk = ((chunk_size_idim,) + dshape[1:])  # e.g. (1, pixels_x, pixels_y)
-
-    if n_imgs - dshape[0] != 0:
-        print(f"- WARNING: {f} has {n_imgs - dshape[0]} trains with empty data.")
+    ishape = seq_dc[instrument_src, "data.image.pixels"].shape
+    corr_ntrains = ishape[0]
+    all_train_ids = seq_dc.train_ids
+
+    # Raise a WARNING if this sequence has no trains to correct.
+    # Otherwise, print number of trains with no data.
+    if corr_ntrains == 0:
+        warning(f"No trains to correct for {f.name}: "
+                "Skipping the processing of this file.")
+        empty_seq += 1
+        continue
+    elif len(all_train_ids) != corr_ntrains:
+        print(f"{f.name} has {len(all_train_ids) - corr_ntrains} trains with missing data.")

    # This parameter is only used for testing.
-    if limit_images > 0:
-        n_imgs = min(n_imgs, limit_images)
+    if limit_trains > 0:
+        print(f"\nCorrected trains are limited to: {limit_trains} trains")
+        corr_ntrains = min(corr_ntrains, limit_trains)
+    oshape = (corr_ntrains, *ishape[1:])

-    data = context.alloc(shape=dshape, dtype=np.float32)
+    data = context.alloc(shape=oshape, dtype=np.float32)

    if pattern_classification:
-        data_clu = context.alloc(shape=dshape, dtype=np.float32)
-        data_patterns = context.alloc(shape=dshape, dtype=np.int32)
+        data_clu = context.alloc(shape=oshape, dtype=np.float32)
+        data_patterns = context.alloc(shape=oshape, dtype=np.int32)

-    step_timer.start()
+    step_timer.start()  # Correct data.

-    context.map(
-        correct_train, seq_dc.select(
-            *pixel_data, require_all=True).select_trains(np.s_[:n_imgs])
-    )
-    step_timer.done_step(f'Correcting {n_imgs} trains.')
+    # Overwrite seq_dc after eliminating empty trains or/and applying limited images.
+    seq_dc = seq_dc.select(
+        instrument_src, "*", require_all=True).select_trains(np.s_[:corr_ntrains])

-    # Store detector h5 information in the corrected file
-    # and deselect data to correct and store later.
-    step_timer.start()
+    pixel_data = seq_dc[instrument_src, "data.image.pixels"]
+    context.map(correct_train, pixel_data)

-    out_file = out_folder / f.name.replace("RAW", "CORR")
-    data_path = "INSTRUMENT/"+instrument_src+"/data/image"
-    pixels_path = f"{data_path}/pixels"
+    step_timer.done_step(f'Correcting {corr_ntrains} trains.')
+
+    step_timer.start()  # Write corrected data.
+
+    # Create CORR files and add corrected data sections.
+    image_counts = seq_dc[instrument_src, "data.image.pixels"].data_counts(labelled=False)
+
+    # Write corrected data.
+    with DataFile(out_file, "w") as ofile:
+        dataset_chunk = ((chunk_size_idim,) + oshape[1:])  # e.g. (1, pixels_x, pixels_y)
+
+        # Create INDEX datasets.
+        ofile.create_index(seq_dc.train_ids, from_file=seq_dc.files[0])
+        # Create METDATA datasets
+        ofile.create_metadata(
+            like=seq_dc,
+            sequence=seq_dc.run_metadata()["sequenceNumber"],
+            instrument_channels=(f'{instrument_src}/data',)
+        )
+        # Create Instrument section to later add corrected datasets.
+        outp_source = ofile.create_instrument_source(instrument_src)
+
+        # Create count/first datasets at INDEX source.
+        outp_source.create_index(data=image_counts)

-    # First copy all raw data source to the corrected file,
-    # while excluding the raw data image /data/image/pixels.
-    with h5py.File(out_file, 'w') as ofile:
-        # Copy RAW non-calibrated sources.
-        with h5py.File(f, 'r') as sfile:
-            h5_copy_except.h5_copy_except_paths(
-                sfile, ofile,
-                [pixels_path])
-
-        # Create dataset in CORR h5 file and add corrected images.
-        dataset = ofile.create_dataset(
-            pixels_path,
-            data=data,
-            chunks=dataset_chunk,
-            dtype=np.float32)
+        # Store uncorrected RAW image datasets for the corrected trains.

+        data_raw_fields = [  # /data/
+            "ambTemp", "analogCurr", "analogInputVolt", "backTemp",
+            "digitalInputVolt", "guardCurr", "relHumidity", "digitalCurr"
+        ]
+        for field in data_raw_fields:
+            field_arr = seq_dc[instrument_src, f"data.{field}"].ndarray()
+
+            outp_source.create_key(
+                f"data.{field}", data=field_arr,
+                chunks=(chunk_size_idim, *field_arr.shape[1:]))
+
+        image_raw_fields = [  # /data/image/
+            "binning", "bitsPerPixel", "dimTypes", "dims",
+            "encoding", "flipX", "flipY", "roiOffsets", "rotation",
+        ]
+        for field in image_raw_fields:
+            field_arr = seq_dc[instrument_src, f"data.image.{field}"].ndarray()
+
+            outp_source.create_key(
+                f"data.image.{field}", data=field_arr,
+                chunks=(chunk_size_idim, *field_arr.shape[1:]))
+
+        # Add main corrected `data.image.pixels` dataset and store corrected data.
+        outp_source.create_key(
+            "data.image.pixels", data=data, chunks=dataset_chunk)
+        outp_source.create_key(
+            "data.trainId", data=seq_dc.train_ids, chunks=min(50, len(seq_dc.train_ids)))
        if pattern_classification:
-            # Save /data/image/pixels_classified in corrected file.
-            datasetc = ofile.create_dataset(
-                f"{data_path}/pixels_classified",
-                data=data_clu,
-                chunks=dataset_chunk,
-                dtype=np.float32)
-
-            # Save /data/image/patterns in corrected file.
-            datasetp = ofile.create_dataset(
-                f"{data_path}/patterns",
-                data=data_patterns,
-                chunks=dataset_chunk,
-                dtype=np.int32)
+            # Add main corrected `data.image.pixels` dataset and store corrected data.
+            outp_source.create_key(
+                "data.image.pixels_classified", data=data_clu, chunks=dataset_chunk)
+            outp_source.create_key(
+                "data.image.patterns", data=data_patterns, chunks=dataset_chunk)

        step_timer.done_step('Storing data.')
+if empty_seq == len(seq_files):
+    warning("No valid trains for RAW data to correct.")
+    exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 ho, eo, co, so = histCalCor.get()

 d = [{
    'x': co,
    'y': ho,
    'y_err': np.sqrt(ho[:]),
    'drawstyle': 'steps-mid',
    'errorstyle': 'bars',
    'errorcoarsing': 2,
    'label': 'Total corr.'
 }]

 ho, eo, co, so = histCalOffsetCor.get()

 d.append({
    'x': co,
    'y': ho,
    'y_err': np.sqrt(ho[:]),
    'drawstyle': 'steps-mid',
    'errorstyle': 'bars',
    'errorcoarsing': 2,
    'label': 'Offset corr.'
 })

 if common_mode:
    ho, eo, co, so = histCalCMCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'CM corr.'
    })

 if relative_gain :
    ho, eo, co, so = histCalRelGainCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'Relative gain corr.'
    })

 if pattern_classification:
    ho, eo, co, so = histCalCSCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'Charge sharing corr.'
    })

 fig = xana.simplePlot(
    d, aspect=1, x_label=f'Energy (ADU)',
    y_label='Number of occurrences', figsize='2col',
    y_log=True, x_range=(-50, 500),
    legend='top-center-frame-2col',
 )
 plt.title(f'run {run} - {karabo_da}')
 plt.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 if absolute_gain :
    d=[]
    ho, eo, co, so = histCalAbsGainCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'Absolute gain corr.'
    })

    if pattern_classification:
        ho, eo, co, so = histCalGainCorClusters.get()
        d.append({
            'x': co,
            'y': ho,
            'y_err': np.sqrt(ho[:]),
            'drawstyle': 'steps-mid',
            'errorstyle': 'bars',
            'errorcoarsing': 2,
            'label': 'Charge sharing corr.'
        })

        ho, eo, co, so = histCalGainCorSingles.get()
        d.append({
            'x': co,
            'y': ho,
            'y_err': np.sqrt(ho[:]),
            'drawstyle': 'steps-mid',
            'errorstyle': 'bars',
            'errorcoarsing': 2,
            'label': 'Isolated photons (singles)'
        })

    fig = xana.simplePlot(
        d, aspect=1, x_label=f'Energy ({plot_unit})',
        y_label='Number of occurrences', figsize='2col',
        y_log=True,
        x_range=np.array((-50, 500))*hscale,
        legend='top-center-frame-2col',
    )
    plt.grid()
    plt.title(f'run {run} - {karabo_da}')
 ```

 %% Cell type:markdown id: tags:

 ## Mean Image of the corrected data

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 fig = xana.heatmapPlot(
    np.nanmedian(data, axis=0),
    x_label='Columns', y_label='Rows',
    lut_label=f'Signal ({plot_unit})',
    x_range=(0, y),
    y_range=(0, x),
    vmin=-50, vmax=50)
 step_timer.done_step(f'Plotting mean image of {data.shape[0]} trains.')
 ```

 %% Cell type:markdown id: tags:

 ## Single Shot of the corrected data

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 fig = xana.heatmapPlot(
    data[0, ...],
    x_label='Columns', y_label='Rows',
    lut_label=f'Signal ({plot_unit})',
    x_range=(0, y),
    y_range=(0, x),
    vmin=-50, vmax=50)
 step_timer.done_step(f'Plotting single shot of corrected data.')
 ```

--- a/notebooks/ePix100/ePix100_retrieve_constants_precorrection.ipynb
+++ b/notebooks/ePix100/ePix100_retrieve_constants_precorrection.ipynb
 %% Cell type:markdown id: tags:

 # ePix100 retrieve constants precorrection

 Author: European XFEL Detector Group, Version: 1.0

 The following notebook provides constants for the selected ePix100 modules before executing correction on the selected sequence files.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/CALLAB/202031/p900113/raw"  # input folder, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/remove/epix_correct"  # output folder, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1]  # sequences to correct, set to -1 for all, range allowed
 run = 9988  # which run to read data from, required

 # Parameters for accessing the raw data.
 karabo_id = "MID_EXP_EPIX-1"  # Detector Karabo_ID
 karabo_da = "EPIX01"  # data aggregators
 receiver_template = "RECEIVER"  # detector receiver template for accessing raw data files
 instrument_source_template = '{}/DET/{}:daqOutput'  # instrument detector data source in h5files

 # Parameters for the calibration database.
 creation_time = ""  # The timestamp to use with Calibration DB. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41
 cal_db_interface = "tcp://max-exfl016:8015#8025"  # calibration DB interface to use
 cal_db_timeout = 300000  # timeout on CalibrationDB requests

 # Conditions for retrieving calibration constants.
 bias_voltage = 200  # bias voltage
 in_vacuum = False  # detector operated in vacuum
 fix_temperature = 290  # fixed temperature value in Kelvin. Default value -1 to use the value from files.
 integration_time = -1  # Detector integration time, Default value -1 to use the value from the slow data.
 gain_photon_energy = 9.0  # Photon energy used for gain calibration

 # Flags to select type of applied corrections.
 relative_gain = True  # Apply relative gain correction.
 ```

 %% Cell type:code id: tags:

 ``` python
+from logging import warning
+
 import numpy as np
 from extra_data import RunDirectory
 from pathlib import Path

+import cal_tools.restful_config as rest_cfg
+from cal_tools.calcat_interface import EPIX100_CalibrationData
 from cal_tools.epix100 import epix100lib
 from cal_tools.tools import (
    calcat_creation_time,
-    get_dir_creation_date,
-    get_from_db,
-    save_constant_metadata,
    CalibrationMetadata,
 )
-from iCalibrationDB import Conditions, Constants
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)

 out_folder.mkdir(parents=True, exist_ok=True)

 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # NOTE: this notebook will not overwrite calibration metadata file,
 # if it already contains details about which constants to use.
 retrieved_constants = metadata.setdefault("retrieved-constants", {})
-
-if karabo_da in retrieved_constants:
-    print(
-        f"Constant for {karabo_da} already in {metadata.filename}, won't query again."
-    )
-    import sys
-
-    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f"Using {creation_time.isoformat()} as creation time")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read control data.
 run_dc = RunDirectory(in_folder / f"r{run:04d}")

 ctrl_data = epix100lib.epix100Ctrl(
    run_dc=run_dc,
    instrument_src=f"{karabo_id}/DET/{receiver_template}:daqOutput",
    ctrl_src=f"{karabo_id}/DET/CONTROL",
    )

 if integration_time < 0:
    integration_time = ctrl_data.get_integration_time()
    integration_time_str_add = ""
 else:
    integration_time_str_add = "(manual input)"

 if fix_temperature < 0:
    temperature = ctrl_data.get_temprature()
    temperature_k = temperature + 273.15
    temp_str_add = ""
 else:
    temperature_k = fix_temperature
    temperature = fix_temperature - 273.15
    temp_str_add = "(manual input)"


 print(f"Bias voltage is {bias_voltage} V")
 print(f"Detector integration time is set to {integration_time} \u03BCs {integration_time_str_add}")
 print(f"Mean temperature: {temperature:0.2f}°C / {temperature_k:0.2f} K {temp_str_add}")
 print(f"Operated in vacuum: {in_vacuum}")
 ```

 %% Cell type:code id: tags:

 ``` python
-cond_dict = {
-    "bias_voltage": bias_voltage,
-    "integration_time": integration_time,
-    "temperature": temperature_k,
-    "in_vacuum": in_vacuum,
-}
-
-dark_condition = Conditions.Dark.ePix100(**cond_dict)
-
-# update conditions with illuminated conditions.
-cond_dict.update({"photon_energy": gain_photon_energy})
-
-illum_condition = Conditions.Illuminated.ePix100(**cond_dict)
-
-const_cond = {
-    "Offset": dark_condition,
-    "Noise": dark_condition,
-    "RelativeGain": illum_condition,
-}
-```
+epix_cal = EPIX100_CalibrationData(
+    detector_name=karabo_id,
+    sensor_bias_voltage=bias_voltage,
+    integration_time=integration_time,
+    sensor_temperature=temperature_k,
+    in_vacuum=in_vacuum,
+    source_energy=gain_photon_energy,
+    event_at=creation_time,
+    client=rest_cfg.calibration_client(),
+    )

-%% Cell type:code id: tags:
+mdata_dict = {"constants": dict()}

-``` python
-const_data = dict()
-mdata_dict = dict()
-mdata_dict["constants"] = dict()
-for cname, condition in const_cond.items():
-    # Avoid retrieving RelativeGain, if not needed for correction.
-    if cname == "RelativeGain" and not relative_gain:
-        const_data[cname] = None
-    else:
-        const_data[cname], mdata = get_from_db(
-            karabo_id=karabo_id,
-            karabo_da=karabo_da,
-            constant=getattr(Constants.ePix100, cname)(),
-            condition=condition,
-            empty_constant=None,
-            cal_db_interface=cal_db_interface,
-            creation_time=creation_time,
-            verbosity=2,
-            timeout=cal_db_timeout,
-            meta_only=True,
-        )
-    save_constant_metadata(mdata_dict["constants"], mdata, cname)
-mdata_dict["physical-detector-unit"] = mdata.calibration_constant_version.device_name
+constant_names = ["OffsetEPix100", "NoiseEPix100"]
+if relative_gain:
+    constant_names += ["RelativeGainEPix100"]
+
+# Retrieve metadata for all epix100 constants.
+
+epix_metadata = epix_cal.metadata(constant_names)[karabo_da]
+
+# Validate the constants availability and raise/warn correspondingly.
+missing_dark_constants = {"OffsetEPix100", "NoiseEPix100"} - set(epix_metadata)
+if missing_dark_constants:
+    raise ValueError(
+        f"Dark constants {missing_dark_constants} are not available to correct {karabo_da}.")
+
+if relative_gain and "RelativeGainEPix100" not in epix_metadata.keys():
+    warning("RelativeGainEPix100 is not found in CALCAT.")
+
+for cname, ccv_metadata in epix_metadata.items():
+    mdata_dict["constants"][cname] = {
+        "path": str(epix_cal.caldb_root / ccv_metadata["path"]),
+        "dataset": ccv_metadata["dataset"],
+        "creation-time": ccv_metadata["begin_validity_at"],
+        "ccv_id": ccv_metadata["ccv_id"],
+    }
+    print(f"Retrieved {cname} with creation-time: {ccv_metadata['begin_validity_at']}")
+
+mdata_dict["physical-name"] = ccv_metadata["physical_name"]
 retrieved_constants[karabo_da] = mdata_dict
 metadata.save()
 print(f"Stored retrieved constants in {metadata.filename}")
 ```

 %% Cell type:markdown id: tags:

 # ePix100 retrieve constants precorrection

 Author: European XFEL Detector Group, Version: 1.0

 The following notebook provides constants for the selected ePix100 modules before executing correction on the selected sequence files.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/CALLAB/202031/p900113/raw"  # input folder, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/remove/epix_correct"  # output folder, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1]  # sequences to correct, set to -1 for all, range allowed
 run = 9988  # which run to read data from, required

 # Parameters for accessing the raw data.
 karabo_id = "MID_EXP_EPIX-1"  # Detector Karabo_ID
 karabo_da = "EPIX01"  # data aggregators
 receiver_template = "RECEIVER"  # detector receiver template for accessing raw data files
 instrument_source_template = '{}/DET/{}:daqOutput'  # instrument detector data source in h5files

 # Parameters for the calibration database.
 creation_time = ""  # The timestamp to use with Calibration DB. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41
 cal_db_interface = "tcp://max-exfl016:8015#8025"  # calibration DB interface to use
 cal_db_timeout = 300000  # timeout on CalibrationDB requests

 # Conditions for retrieving calibration constants.
 bias_voltage = 200  # bias voltage
 in_vacuum = False  # detector operated in vacuum
 fix_temperature = 290  # fixed temperature value in Kelvin. Default value -1 to use the value from files.
 integration_time = -1  # Detector integration time, Default value -1 to use the value from the slow data.
 gain_photon_energy = 9.0  # Photon energy used for gain calibration

 # Flags to select type of applied corrections.
 relative_gain = True  # Apply relative gain correction.
 ```

 %% Cell type:code id: tags:

 ``` python
+from logging import warning
+
 import numpy as np
 from extra_data import RunDirectory
 from pathlib import Path

+import cal_tools.restful_config as rest_cfg
+from cal_tools.calcat_interface import EPIX100_CalibrationData
 from cal_tools.epix100 import epix100lib
 from cal_tools.tools import (
    calcat_creation_time,
-    get_dir_creation_date,
-    get_from_db,
-    save_constant_metadata,
    CalibrationMetadata,
 )
-from iCalibrationDB import Conditions, Constants
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)

 out_folder.mkdir(parents=True, exist_ok=True)

 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # NOTE: this notebook will not overwrite calibration metadata file,
 # if it already contains details about which constants to use.
 retrieved_constants = metadata.setdefault("retrieved-constants", {})
-
-if karabo_da in retrieved_constants:
-    print(
-        f"Constant for {karabo_da} already in {metadata.filename}, won't query again."
-    )
-    import sys
-
-    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f"Using {creation_time.isoformat()} as creation time")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read control data.
 run_dc = RunDirectory(in_folder / f"r{run:04d}")

 ctrl_data = epix100lib.epix100Ctrl(
    run_dc=run_dc,
    instrument_src=f"{karabo_id}/DET/{receiver_template}:daqOutput",
    ctrl_src=f"{karabo_id}/DET/CONTROL",
    )

 if integration_time < 0:
    integration_time = ctrl_data.get_integration_time()
    integration_time_str_add = ""
 else:
    integration_time_str_add = "(manual input)"

 if fix_temperature < 0:
    temperature = ctrl_data.get_temprature()
    temperature_k = temperature + 273.15
    temp_str_add = ""
 else:
    temperature_k = fix_temperature
    temperature = fix_temperature - 273.15
    temp_str_add = "(manual input)"


 print(f"Bias voltage is {bias_voltage} V")
 print(f"Detector integration time is set to {integration_time} \u03BCs {integration_time_str_add}")
 print(f"Mean temperature: {temperature:0.2f}°C / {temperature_k:0.2f} K {temp_str_add}")
 print(f"Operated in vacuum: {in_vacuum}")
 ```

 %% Cell type:code id: tags:

 ``` python
-cond_dict = {
-    "bias_voltage": bias_voltage,
-    "integration_time": integration_time,
-    "temperature": temperature_k,
-    "in_vacuum": in_vacuum,
-}
-
-dark_condition = Conditions.Dark.ePix100(**cond_dict)
-
-# update conditions with illuminated conditions.
-cond_dict.update({"photon_energy": gain_photon_energy})
-
-illum_condition = Conditions.Illuminated.ePix100(**cond_dict)
-
-const_cond = {
-    "Offset": dark_condition,
-    "Noise": dark_condition,
-    "RelativeGain": illum_condition,
-}
-```
+epix_cal = EPIX100_CalibrationData(
+    detector_name=karabo_id,
+    sensor_bias_voltage=bias_voltage,
+    integration_time=integration_time,
+    sensor_temperature=temperature_k,
+    in_vacuum=in_vacuum,
+    source_energy=gain_photon_energy,
+    event_at=creation_time,
+    client=rest_cfg.calibration_client(),
+    )

-%% Cell type:code id: tags:
+mdata_dict = {"constants": dict()}

-``` python
-const_data = dict()
-mdata_dict = dict()
-mdata_dict["constants"] = dict()
-for cname, condition in const_cond.items():
-    # Avoid retrieving RelativeGain, if not needed for correction.
-    if cname == "RelativeGain" and not relative_gain:
-        const_data[cname] = None
-    else:
-        const_data[cname], mdata = get_from_db(
-            karabo_id=karabo_id,
-            karabo_da=karabo_da,
-            constant=getattr(Constants.ePix100, cname)(),
-            condition=condition,
-            empty_constant=None,
-            cal_db_interface=cal_db_interface,
-            creation_time=creation_time,
-            verbosity=2,
-            timeout=cal_db_timeout,
-            meta_only=True,
-        )
-    save_constant_metadata(mdata_dict["constants"], mdata, cname)
-mdata_dict["physical-detector-unit"] = mdata.calibration_constant_version.device_name
+constant_names = ["OffsetEPix100", "NoiseEPix100"]
+if relative_gain:
+    constant_names += ["RelativeGainEPix100"]
+
+# Retrieve metadata for all epix100 constants.
+
+epix_metadata = epix_cal.metadata(constant_names)[karabo_da]
+
+# Validate the constants availability and raise/warn correspondingly.
+missing_dark_constants = {"OffsetEPix100", "NoiseEPix100"} - set(epix_metadata)
+if missing_dark_constants:
+    raise ValueError(
+        f"Dark constants {missing_dark_constants} are not available to correct {karabo_da}.")
+
+if relative_gain and "RelativeGainEPix100" not in epix_metadata.keys():
+    warning("RelativeGainEPix100 is not found in CALCAT.")
+
+for cname, ccv_metadata in epix_metadata.items():
+    mdata_dict["constants"][cname] = {
+        "path": str(epix_cal.caldb_root / ccv_metadata["path"]),
+        "dataset": ccv_metadata["dataset"],
+        "creation-time": ccv_metadata["begin_validity_at"],
+        "ccv_id": ccv_metadata["ccv_id"],
+    }
+    print(f"Retrieved {cname} with creation-time: {ccv_metadata['begin_validity_at']}")
+
+mdata_dict["physical-name"] = ccv_metadata["physical_name"]
 retrieved_constants[karabo_da] = mdata_dict
 metadata.save()
 print(f"Stored retrieved constants in {metadata.filename}")
 ```

--- a/notebooks/pnCCD/Correct_pnCCD_NBC.ipynb
+++ b/notebooks/pnCCD/Correct_pnCCD_NBC.ipynb
 %% Cell type:markdown id: tags:

 # pnCCD Data Correction #

 Authors: DET Group, Modified by Kiana Setoodehnia - Version 5.0

 The following notebook provides offset, common mode, relative gain, split events and pattern classification corrections of images acquired with the pnCCD. This notebook *does not* yet correct for charge transfer inefficiency.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SQS/202031/p900166/raw"  # input folder
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/remove/pnccd_correct"  # output folder
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run = 347  # which run to read data from
 sequences = [-1]  # sequences to correct, set to -1 for all, range allowed
 sequences_per_node = 1  # number of sequences running on the same slurm node.

 karabo_da = 'PNCCD01' # data aggregators
 karabo_id = "SQS_NQS_PNCCD1MP" # karabo prefix of PNCCD devices
 receiver_id = "PNCCD_FMT-0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5'  # the template to use to access data
 instrument_source_template = '{}/CAL/{}:output'  # template for data source name, will be filled with karabo_id and receiver_id.

 # Parameters affecting data correction.
 commonModeAxis = 0 # axis along which common mode will be calculated, 0 = row, and 1 = column
 commonModeBlockSize = [512, 512] # size of the detector in pixels for common mode calculations
 split_evt_primary_threshold = 4. # primary threshold for split event classification in terms of n sigma noise
 split_evt_secondary_threshold = 3. # secondary threshold for split event classification in terms of n sigma noise
 saturated_threshold = 32000.  # full well capacity in ADU

 # Conditions for retrieving calibration constants
 fix_temperature_top = 0. # fix temperature for top sensor in K, set to 0. to use value from slow data.
 fix_temperature_bot = 0. # fix temperature for bottom senspr in K, set to 0. to use value from slow data.
 gain = -1  # the detector's gain setting. Set to -1 to use the value from the slow data.
 bias_voltage = 0. # the detector's bias voltage. set to 0. to use value from slow data.
 integration_time = 70  # detector's integration time
 photon_energy = 1.6 # Al fluorescence in keV

 # Parameters for the calibration database.
 cal_db_interface = "tcp://max-exfl016:8015" # calibration DB interface to use
 cal_db_timeout = 300000 # timeout on caldb requests
 creation_time = ""  # The timestamp to use with Calibration DB. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41

 # Booleans for selecting corrections to apply.
 only_offset = False # Only, apply offset.
 common_mode = True # Apply common mode correction
 relgain = True # Apply relative gain correction
 pattern_classification = True  # classify split events

 # parameters affecting stored output data.
 chunk_size_idim = 1  # H5 chunking size of output data
-# ONLY FOR TESTING
-limit_images = 0  # this parameter is used for limiting number of images to correct from a sequence file. ONLY FOR TESTING.
+limit_trains = 0  # this parameter is used for limiting number of images to correct from a sequence file.


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Here the herarichy and dependability for correction booleans are defined
 corr_bools = {}

 corr_bools["only_offset"] = only_offset

 # Apply offset only.
 if not only_offset:
    corr_bools["relgain"] = relgain
    corr_bools["common_mode"] = common_mode
    corr_bools["pattern_class"] = pattern_classification
 ```

 %% Cell type:code id: tags:

 ``` python
-import datetime
 import os
+import sys
 import warnings
+from logging import warning
 from pathlib import Path
 warnings.filterwarnings('ignore')

 import h5py
 import matplotlib.pyplot as plt
 import numpy as np
 import pasha as psh
 from IPython.display import Markdown, display
 from extra_data import H5File, RunDirectory
 from prettytable import PrettyTable

 %matplotlib inline

 from XFELDetAna import xfelpyanatools as xana
 from XFELDetAna import xfelpycaltools as xcal
 from cal_tools import pnccdlib
+from cal_tools.files import DataFile
 from cal_tools.tools import (
    calcat_creation_time,
    get_dir_creation_date,
    get_constant_from_db_and_time,
    get_random_db_interface,
    load_specified_constants,
    CalibrationMetadata,
 )
 from cal_tools.step_timing import StepTimer
 from cal_tools import h5_copy_except
 from iCalibrationDB import Conditions, Constants
 from iCalibrationDB.detectors import DetectorTypes
 ```

 %% Cell type:code id: tags:

 ``` python
 # Calibration Database Settings, and Some Initial Run Parameters & Paths:
 display(Markdown('### Initial Settings and Paths'))

 # Sensor size and block size definitions (important for common mode and other corrections):
 pixels_x = 1024  # rows of pnCCD in pixels
 pixels_y = 1024  # columns of pnCCD in pixels
 in_folder = Path(in_folder)
 sensorSize = [pixels_x, pixels_y]
 # For xcal.HistogramCalculators.
 blockSize = [pixels_x//2, pixels_y//2]  # sensor area will be analysed according to blockSize.

 print(f"pnCCD size is: {pixels_x}x{pixels_y} pixels.")
 print(f'Calibration database interface selected: {cal_db_interface}')

 # Paths to the data:
 instrument_src = instrument_source_template.format(karabo_id, receiver_id)
 print(f"Instrument H5File source: {instrument_src}\n")

 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f"Creation time: {creation_time}")
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id: tags:

 ``` python
 run_dc = RunDirectory(in_folder / f"r{run:04d}", _use_voview=False)

 # Output Folder Creation:
 os.makedirs(out_folder, exist_ok=True)

 # NOTE: this notebook shouldn't overwrite calibration metadata file.
 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # Constant paths are saved under retrieved-constants in calibration_metadata.yml
 const_yaml = metadata.get("retrieved-constants", {})

 # extract control data
 step_timer.start()

 ctrl_data = pnccdlib.PnccdCtrl(run_dc, karabo_id)
 if bias_voltage == 0.:
    bias_voltage = ctrl_data.get_bias_voltage()
 if gain == -1:
    gain = ctrl_data.get_gain()
 if fix_temperature_top == 0:
    fix_temperature_top = ctrl_data.get_fix_temperature_top()
 if fix_temperature_bot == 0:
    fix_temperature_bot = ctrl_data.get_fix_temperature_bot()

 step_timer.done_step("Reading control parameters.")

 # Printing the Parameters Read from the Data File:
 display(Markdown('### Detector Parameters'))
 print(f"Bias voltage is {bias_voltage:0.1f} V.")
 print(f"Detector gain is set to 1/{int(gain)}.")
 print(f"Detector integration time is set to {integration_time} ms")
 print(f"Top pnCCD sensor is at temperature of {fix_temperature_top:0.2f} K")
 print(f"Bottom pnCCD sensor is at temperature of {fix_temperature_bot:0.2f} K")
 ```

 %% Cell type:code id: tags:

 ``` python
 seq_files = []
 for f in run_dc.select(instrument_src).files:
    fpath = Path(f.filename)
    if fpath.match(f"*{karabo_da}*.h5"):
        seq_files.append(fpath)
 if sequences != [-1]:
    seq_files = sorted([f for f in seq_files if any(f.match(f"*-S{s:05d}.h5") for s in sequences)])
 print(f"Processing a total of {len(seq_files)} sequence files:")
 print(*seq_files, sep='\n')
 ```

 %% Cell type:code id: tags:

 ``` python
 gain_k = [k for k, v in pnccdlib.VALID_GAINS.items() if v == gain][0]
 if gain_k == 'a':
    split_evt_mip_threshold = 1000. # MIP threshold in ADU for event classification (10 times average noise)

    # Each xcal.HistogramCalculator requires a total number of bins and a binning range. We define these
    # using a dictionary:

    # For all xcal histograms:

    Hist_Bin_Dict = {
        "bins": 35000, # number of bins
        "bin_range": [0, 35000]
    }

    # For the numpy histograms on the last cell of the notebook:
    Event_Bin_Dict = {
        "event_bins": 1000, # number of bins
        "b_range": [0, 35000] # bin range
    }
 elif gain_k == 'b':
    split_evt_mip_threshold = 270. # 10 times the average noise
    Hist_Bin_Dict = {
        "bins": 10000,
        "bin_range": [0, 10000]
    }
    Event_Bin_Dict = {
        "event_bins": 1000,
        "b_range": [0, 10000]
    }
 elif gain_k == 'c':
    split_evt_mip_threshold = 110. # 10 times the average noise
    Hist_Bin_Dict = {
        "bins": 3000,
        "bin_range": [0, 3000]
    }
    Event_Bin_Dict = {
        "event_bins": 1000,
        "b_range": [0, 3000]
    }
 elif gain_k == 'd':
    split_evt_mip_threshold = 90. # 10 times the average noise
    Hist_Bin_Dict = {
        "bins": 1000,
        "bin_range": [0, 1000]
    }
    Event_Bin_Dict = {
        "event_bins": 1000,
        "b_range": [0, 1000]
    }
 elif gain_k == 'e':
    split_evt_mip_threshold = 90. # 10 times the average noise
    Hist_Bin_Dict = {
        "bins": 500,
        "bin_range": [0, 500]
    }
    Event_Bin_Dict = {
        "event_bins": 500,
        "b_range": [0, 500]
    }
 else:
    split_evt_mip_threshold = 90. # 10 times the average noise
    Hist_Bin_Dict = {
        "bins": 220,
        "bin_range": [0, 220]
    }
    Event_Bin_Dict = {
        "event_bins": 220,
        "b_range": [0, 220]
    }

 bins = Hist_Bin_Dict["bins"]
 bin_range = Hist_Bin_Dict["bin_range"]
 event_bins = Event_Bin_Dict["event_bins"]
 b_range = Event_Bin_Dict["b_range"]
 ```

 %% Cell type:markdown id: tags:

 As a first step, dark constants have to be retrieved from the calibration database

 %% Cell type:code id: tags:

 ``` python
 display(Markdown("### Constants retrieval"))
 step_timer.start()

 conditions_dict = {
    "bias_voltage": bias_voltage,
    "integration_time": integration_time,
    "gain_setting": gain,
    "temperature": fix_temperature_top,
    "pixels_x": pixels_x,
    "pixels_y": pixels_y,
 }
 # Dark condition
 dark_condition = Conditions.Dark.CCD(**conditions_dict)
 # Add photon energy.
 conditions_dict.update({"photon_energy": photon_energy})
 illum_condition = Conditions.Illuminated.CCD(**conditions_dict)

 # A dictionary for initializing constants. {cname: empty constant array}
 empty_constants = {
    "Offset": np.zeros((pixels_x, pixels_y, 1), dtype=np.float32),
    "Noise": np.zeros((pixels_x, pixels_y, 1), dtype=np.float32),
    "BadPixelsDark": np.zeros((pixels_x, pixels_y, 1), dtype=np.uint32),
    "RelativeGain": np.zeros((pixels_x, pixels_y), dtype=np.float32),
 }

 if const_yaml:  #  Used while reproducing corrected data.
    print(f"Using stored constants in {metadata.filename}")
    constants, when = load_specified_constants(
        const_yaml[karabo_da]["constants"], empty_constants
    )
 else:
    constants = dict()
    when = dict()
    for cname, cempty in empty_constants.items():
        # No need for retrieving RelativeGain, if not used for correction.
        if not corr_bools.get("relgain") and cname == "RelativeGain":
            continue
        constants[cname], when[cname] = get_constant_from_db_and_time(
            karabo_id,
            karabo_da,
            constant=getattr(Constants.CCD(DetectorTypes.pnCCD), cname)(),
            condition=illum_condition if cname == "RelativeGain" else dark_condition,
            empty_constant=cempty,
            cal_db_interface=get_random_db_interface(cal_db_interface),
            creation_time=creation_time,
        )
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = xana.heatmapPlot(constants["Offset"][:,:,0], x_label='Columns', y_label='Rows', lut_label='Offset (ADU)',
                       aspect=1,
                       x_range=(0, pixels_y), y_range=(0, pixels_x), vmax=16000,
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Dark Offset Map')

 fig = xana.heatmapPlot(constants["Noise"][:,:,0], x_label='Columns', y_label='Rows',
                       lut_label='Corrected Noise (ADU)',
                       aspect=1, x_range=(0, pixels_y), y_range=(0, pixels_x),
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Dark Noise Map')

 fig = xana.heatmapPlot(np.log2(constants["BadPixelsDark"][:,:,0]), x_label='Columns', y_label='Rows',
                       lut_label='Bad Pixel Value (ADU)',
                       aspect=1, x_range=(0, pixels_y), y_range=(0, pixels_x),
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Dark Bad Pixels Map')

 if corr_bools.get('relgain'):
    fig = xana.heatmapPlot(constants["RelativeGain"], figsize=(8, 8), x_label='Columns', y_label='Rows',
                            lut_label='Relative Gain',
                            aspect=1, x_range=(0, pixels_y), y_range=(0, pixels_x), vmin=0.8, vmax=1.2,
                            panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                            panel_top_low_lim = 0.5, panel_top_high_lim = 1.5, panel_side_low_lim = 0.5,
                            panel_side_high_lim = 1.5,
                            title = f'Relative Gain Map for pnCCD (Gain = 1/{int(gain)})')
 step_timer.done_step("Constants retrieval")
 ```

 %% Cell type:code id: tags:

 ``` python
 #************************ Calculators ************************#
 if corr_bools.get('common_mode'):
    # Common Mode Correction Calculator:
    cmCorrection = xcal.CommonModeCorrection([pixels_x, pixels_y],
                                             commonModeBlockSize,
                                             commonModeAxis,
                                             parallel=False, dType=np.float32, stride=1,
                                             noiseMap=constants["Noise"].astype(np.float32), minFrac=0.25)

 if corr_bools.get('pattern_class'):
    # Pattern Classifier Calculator:
    # Left Hemisphere:
    patternClassifierLH = xcal.PatternClassifier([pixels_x, pixels_y//2],
                                                 constants["Noise"][:, :pixels_y//2],
                                                 split_evt_primary_threshold,
                                                 split_evt_secondary_threshold,
                                                 split_evt_mip_threshold,
                                                 tagFirstSingles=3, # track along y-axis, left to right (see
                                                 nCells=1, # split_event.py file in pydetlib/lib/src/
                                                 allowElongated=False, # XFELDetAna/algorithms)
                                                 blockSize=[pixels_x, pixels_y//2],
                                                 parallel=False)

    # Right Hemisphere:
    patternClassifierRH = xcal.PatternClassifier([pixels_x, pixels_y//2],
                                                 constants["Noise"][:, pixels_y//2:],
                                                 split_evt_primary_threshold,
                                                 split_evt_secondary_threshold,
                                                 split_evt_mip_threshold,
                                                 tagFirstSingles=4, # track along y-axis, right to left
                                                 nCells=1,
                                                 allowElongated=False,
                                                 blockSize=[pixels_x, pixels_y//2],
                                                 parallel=False)

    patternClassifierLH._imagesPerChunk = 1
    patternClassifierRH._imagesPerChunk = 1

    patternClassifierLH._noisemap = constants["Noise"][:, :pixels_x//2]
    patternClassifierRH._noisemap = constants["Noise"][:, pixels_x//2:]
    # Setting bad pixels:
    patternClassifierLH.setBadPixelMask(constants["BadPixelsDark"][:, :pixels_x//2] != 0)
    patternClassifierRH.setBadPixelMask(constants["BadPixelsDark"][:, pixels_x//2:] != 0)
 ```

 %% Cell type:code id: tags:

 ``` python
 #***************** Histogram Calculators ******************#
 # Will contain uncorrected data:
 histCalRaw = xcal.HistogramCalculator(sensorSize,
                                      bins=bins,
                                      range=bin_range,
                                      nCells=1,
                                      parallel=False,
                                      blockSize=blockSize)
 # Will contain offset corrected data:
 histCalOffsetCor = xcal.HistogramCalculator(sensorSize,
                                            bins=bins,
                                            range=bin_range,
                                            nCells=1,
                                            parallel=False,
                                            blockSize=blockSize)
 if corr_bools.get('common_mode'):
    # Will contain common mode corrected data:
    histCalCommonModeCor = xcal.HistogramCalculator(sensorSize,
                                                    bins=bins,
                                                    range=bin_range,
                                                    nCells=1,
                                                    parallel=False,
                                                    blockSize=blockSize)
 if corr_bools.get('pattern_class'):
    # Will contain split events pattern data:
    histCalPcorr = xcal.HistogramCalculator(sensorSize,
                                            bins=bins,
                                            range=bin_range,
                                            nCells=1,
                                            parallel=False,
                                            blockSize=blockSize)
    # Will contain singles events data:
    histCalPcorrS = xcal.HistogramCalculator(sensorSize,
                                             bins=bins,
                                             range=bin_range,
                                             nCells=1,
                                             parallel=False,
                                             blockSize=blockSize)
 if corr_bools.get('relgain'):
    # Will contain gain corrected data:
    histCalGainCor = xcal.HistogramCalculator(sensorSize,
                                              bins=bins,
                                              range=bin_range,
                                              nCells=1,
                                              parallel=False,
                                              blockSize=blockSize)
 ```

 %% Cell type:markdown id: tags:

 ## Applying corrections to the raw data

 %% Cell type:code id: tags:

 ``` python
 def offset_correction(wid, index, d):
    """offset correction.
    Equating bad pixels' values to np.nan,
    so that the pattern classifier ignores them:
    """
    d = d.copy()

    # TODO: To clear this up. Is it on purpose to save corrected data with nans?
    d[bpix != 0] = np.nan
    d -= offset  # offset correction

    # TODO: to clear this up. why save the badpixels map in the corrected data?
    bpix_data[index, ...] = bpix
    data[index, ...] = d

 def common_mode(wid, index, d):
    """common-mode correction.
    Discarding events caused by saturated pixels:
    """
    d = np.squeeze(cmCorrection.correct(d, cellTable=np.zeros(pixels_y, np.int32)))
    # we equate these values to np.nan so that the pattern classifier ignores them:
    d[d >= saturated_threshold] = np.nan
    data[index, ...] = d


 def gain_correction(wid, index, d):
    """relative gain correction."""
    d /= relativegain
    data[index, ...] = d


 def pattern_classification_correction(wid, index, d):
    """pattern classification correction.
    data set to save split event corrected images

    The calculation of the cluster map:]
    Dividing the data into left and right hemispheres:
    """

    # pattern classification on corrected data
    dataLH, patternsLH = patternClassifierLH.classify(d[:, :pixels_x//2])
    dataRH, patternsRH = patternClassifierRH.classify(d[:, pixels_x//2:])

    d[:, :pixels_x//2] = np.squeeze(dataLH)
    d[:, pixels_x//2:] = np.squeeze(dataRH)

    patterns = np.zeros(d.shape, patternsLH.dtype)
    patterns[:, :pixels_x//2] = np.squeeze(patternsLH)
    patterns[:, pixels_x//2:] = np.squeeze(patternsRH)
    d[d < split_evt_primary_threshold*noise] = 0
    data[index, ...] = d
    ptrn_data[index, ...] = patterns
    d[patterns != 100] = np.nan  # Discard doubles, triples, quadruple, clusters, first singles
    filtered_data[index, ...] = d
 ```

 %% Cell type:code id: tags:

 ``` python
 # 10 is a number chosen after testing 1 ... 71 parallel threads for a node with 72 cpus.
 parallel_num_threads = 10
 context = psh.context.ThreadContext(num_workers=parallel_num_threads)

 data_path = "INSTRUMENT/"+instrument_src+"/data/"

 offset = np.squeeze(constants["Offset"])
 noise = np.squeeze(constants["Noise"])
 bpix = np.squeeze(constants["BadPixelsDark"])
 relativegain = constants.get("RelativeGain")
 ```

 %% Cell type:code id: tags:

 ``` python
-def write_datasets(corr_arrays, ofile):
+def write_datasets(seq_dc, corr_arrays, out_file, instrument_src):
    """
    Creating datasets first then adding data.
    To have metadata together available at the start of the file,
-    so it's quick to see what the file contains
+    so it's quick to see what the file contains.
    """
-    comp_fields = ["gain", "patterns", "pixels_classified"]
-    img_grp = ofile[data_path]
+    # Create CORR files and add corrected data sections.
+    image_counts = seq_dc[instrument_src, "data.image"].data_counts(labelled=False)
+    dataset_chunk = ((chunk_size_idim,) + corr_arrays["pixels"].shape[1:])  # e.g. (1, pixels_x, pixels_y)
+    with DataFile(out_file, 'w') as ofile:
+
+        # Create INDEX datasets.
+        ofile.create_index(seq_dc.train_ids, from_file=seq_dc.files[0])
+        # Create METDATA datasets
+        ofile.create_metadata(
+            like=seq_dc,
+            sequence=seq_dc.run_metadata()["sequenceNumber"],
+            instrument_channels=(f"{instrument_src}/data",)
+        )
+
+        # Create Instrument section to later add corrected datasets.
+        outp_source = ofile.create_instrument_source(instrument_src)

-    for field, data in corr_arrays.items():
-        kw = dict(chunks=(chunk_size_idim, pixels_x, pixels_y))
-        if field in comp_fields:
-            kw["compression"] = "gzip"
+        # Create count/first datasets at INDEX source.
+        outp_source.create_index(data=image_counts)

-        img_grp.create_dataset(
-            field, shape=data.shape, dtype=data.dtype, **kw)
+        # Store uncorrected trainId in the corrected file.
+        outp_source.create_key(
+            f"data.trainId", data=seq_dc.train_ids,
+            chunks=min(50, len(seq_dc.train_ids))
+        )

-    for field, data in corr_arrays.items():
-        img_grp[field][:] = data
+        # TODO: gain dataset is just the RelativeGain constant
+        # and it doesn't make sense to write it into corrected data.
+        comp_fields = ["gain", "patterns", "pixels_classified"]
+
+        # TODO: to clear this up: why save corrected data
+        # in data/pixels rather than data/image.
+        for field, data in corr_arrays.items():
+            if field in comp_fields:  # Write compressed corrected data.
+                outp_source.create_compressed_key(f"data.{field}", data=data)
+            else:
+                outp_source.create_key(
+                    f"data.{field}", data=data,
+                    chunks=dataset_chunk
+                )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Data corrections and event classifications happen here.
 # Also, the corrected data are written to datasets:
+empty_seq = 0
 for seq_n, seq_f in enumerate(seq_files):
-    f_dc = H5File(seq_f)
+    seq_dc = H5File(seq_f)
    out_file = f"{out_folder}/{seq_f.name}".replace("RAW", "CORR")

    step_timer.start()

-    dshape = f_dc[instrument_src, "data.image"].shape
+    img_dc = seq_dc[instrument_src, "data.image"]
+
+    dshape = seq_dc[instrument_src, "data.image"].shape
+
    n_trains = dshape[0]
+    corr_ntrains = dshape[0]  # number of available trains to correct.
+    all_train_ids = img_dc.train_ids    # All trains including trains with no data.
+
+    # Raise a WARNING if this sequence has no trains to correct.
+    # Otherwise, print number of trains with no data.
+    if corr_ntrains == 0:
+        warning(f"No trains to correct for {seq_f.name}: "
+                "Skipping the processing of this file.")
+        empty_seq += 1
+        continue
+    elif len(all_train_ids) != corr_ntrains:
+        print(
+            f"{seq_f.name} has {len(all_train_ids) - corr_ntrains} "
+            "trains with missing data."
+        )

    # If you want to analyze only a certain number of frames
    # instead of all available good frames.
-    if limit_images > 0:
-        n_trains = min(n_trains, limit_images)
-    data_shape = (n_trains, dshape[1], dshape[2])

-    print(f"Correcting file: {seq_f} of shape {data_shape}.")
+    if limit_trains > 0:
+        print(f"\nCorrected trains are limited to: {limit_trains} trains")
+        corr_ntrains = min(corr_ntrains, limit_trains)
+    data_shape = (corr_ntrains, dshape[1], dshape[2])
+
+    print(f"Correcting file {seq_f} of {corr_ntrains} trains.")

-    data_dc = f_dc.select(instrument_src, "data.image", require_all=True).select_trains(np.s_[:n_trains])  # noqa
+    # Overwrite seq_dc after eliminating empty trains or/and applying limited images.
+    seq_dc = seq_dc.select(
+        instrument_src, "*", require_all=True).select_trains(np.s_[:corr_ntrains])

-    raw_data = data_dc[instrument_src, "data.image"].ndarray().astype(np.float32)
+    raw_data = seq_dc[instrument_src, "data.image"].ndarray().astype(np.float32)
+    to_store_arrays = {"image": raw_data}

+    # TODO: move the parts for reading data to plot to later cells.
    if seq_n == 0:
        raw_plt = raw_data.copy()  # plot first sequence only

    step_timer.start()

    # Allocating shared arrays for data arrays for each correction stage.
    data = context.alloc(shape=data_shape, dtype=np.float32)
    bpix_data = context.alloc(shape=data_shape, dtype=np.uint32)
    histCalRaw.fill(raw_data)  # filling histogram with raw uncorrected data

    # Applying offset correction
    context.map(offset_correction, raw_data)
    histCalOffsetCor.fill(data)  # filling histogram with offset corrected data

    if seq_n == 0:
        off_data = data.copy()  # plot first sequence only

+    to_store_arrays["pixels"] = data.copy()
+    to_store_arrays["mask"] = bpix_data

-    corr_arrays = {
-        "pixels": data.copy(),
-        "mask": bpix_data,
-    }
    step_timer.done_step(f'offset correction.')

    if corr_bools.get('common_mode'):
        step_timer.start()

        # Applying common mode correction
        context.map(common_mode, data)
        if seq_n == 0:
            cm_data = data.copy()  # plot first sequence only
-        corr_arrays["pixels_cm"] = data.copy()
+        to_store_arrays["pixels_cm"] = data.copy()
        histCalCommonModeCor.fill(data)  # filling histogram with common mode corrected data

        step_timer.done_step(f'common-mode correction.')

    if corr_bools.get('relgain'):

        step_timer.start()

        # Applying gain correction
        context.map(gain_correction, data)
        if seq_n == 0:
            rg_data = data.copy()  # plot first sequence only
        # TODO: Why storing a repeated constant for each image in corrected files.
-        corr_arrays["gain"] = np.repeat(relativegain[np.newaxis, ...], n_trains, axis=0).astype(np.float32)  # noqa
+        to_store_arrays["gain"] = np.repeat(relativegain[np.newaxis, ...], corr_ntrains, axis=0).astype(np.float32)  # noqa
        histCalGainCor.fill(data)  # filling histogram with gain corrected data
        step_timer.done_step(f'gain correction.')

    if corr_bools.get('pattern_class'):
        step_timer.start()

        ptrn_data = context.alloc(shape=data_shape, dtype=np.int32)
        filtered_data = context.alloc(shape=data_shape, dtype=np.int32)
        # Applying pattern classification correction
-        # Even thougth data is indeed of dtype np.float32,
-        # not specifiying this again screw with the data quality.
+        # Even though data is indeed of dtype np.float32,
+        # not specifying this again screw with the data quality.
        context.map(pattern_classification_correction, data.astype(np.float32))

        if seq_n == 0:
            cls_data = data.copy()  # plot first sequence only
        # split event corrected images plotted for first sequence only
        # (also these events are only singles events):
-        corr_arrays["pixels_classified"] = data.copy()
-        corr_arrays["patterns"] = ptrn_data
+        to_store_arrays["pixels_classified"] = data.copy()
+        to_store_arrays["patterns"] = ptrn_data

        histCalPcorr.fill(data)  # filling histogram with split events corrected data
        # filling histogram with corr data after discarding doubles, triples, quadruple, clusters, and first singles
        histCalPcorrS.fill(filtered_data)
        step_timer.done_step(f'pattern classification correction.')

    step_timer.start()

    # Storing corrected data sources.
-    with h5py.File(out_file, 'w') as ofile:
-        # Copy RAW non-calibrated sources.
-        with h5py.File(seq_f, 'r') as sfile:
-            h5_copy_except.h5_copy_except_paths(
-                sfile, ofile, [],
-            )
-        # TODO: to clear this up: why save corrected data in data/pixels rather than data/image.
-        write_datasets(corr_arrays, ofile)
-
+    write_datasets(
+        seq_dc=seq_dc,
+        corr_arrays=to_store_arrays,
+        out_file=out_file,
+        instrument_src=instrument_src,
+    )
    step_timer.done_step(f'Storing data.')
+# Exit and raise warning if there are no data to correct for all sequences.
+if empty_seq == len(seq_files):
+    warning("No valid trains for RAW data to correct.")
+    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 print("In addition to offset correction, the following corrections were performed:")
 for k, v in corr_bools.items():
    if v:
        print("  -", k.upper())

 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 print("In addition to offset correction, the following corrections were performed:")
 for k, v in corr_bools.items():
    if v:
        print("  -", k.upper())

 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Histograming the resulting spectra:

 # First _ refers to the bin edges and second _ refers to statistics and we ignore both.

 # if you use histCalRaw.get(cumulatative = True) and so on, the cumulatative = True turns the counts array such as
 # RawHistVals and so on into a 1D array instead of keeping the original shape:

 RawHistVals, _, RawHistMids, _ = histCalRaw.get()
 off_cor_HistVals, _, off_cor_HistMids, _ = histCalOffsetCor.get()
 if corr_bools.get('common_mode'):
    cm_cor_HistVals, _, cm_HistMids, _ = histCalCommonModeCor.get()
 if corr_bools.get('relgain'):
    gain_cor_HistVals, _, gain_cor_HistMids, _ = histCalGainCor.get()
 if corr_bools.get('pattern_class'):
    split_HistVals, _, split_HistMids, _ = histCalPcorr.get() # split events corrected
    singles_HistVals, _, singles_HistMids, _ = histCalPcorrS.get() # last s in variable names: singles events
 ```

 %% Cell type:code id: tags:

 ``` python
 # Saving intermediate data to disk:
 step_timer.start()
 np.savez(os.path.join(out_folder, 'Raw_Events.npz'), RawHistMids, RawHistVals)
 np.savez(os.path.join(out_folder, 'Offset_Corrected_Events.npz'), off_cor_HistMids, off_cor_HistVals)
 if corr_bools.get('common_mode'):
    np.savez(os.path.join(out_folder, 'Common_Mode_Corrected_Events.npz'), cm_HistMids, cm_cor_HistVals)
 if corr_bools.get('relgain'):
    np.savez(os.path.join(out_folder, 'Gain_Corrected_Events.npz'), gain_cor_HistMids, gain_cor_HistVals)
 if corr_bools.get('pattern_class'):
    np.savez(os.path.join(out_folder, 'Split_Events_Corrected_Events.npz'), split_HistMids, split_HistVals)
    np.savez(os.path.join(out_folder, 'Singles_Events.npz'), singles_HistMids, singles_HistVals)
 step_timer.done_step(f'Saving intermediate data to disk.')
 print("Various spectra are saved to disk in the form of histograms. Please check {}".format(out_folder))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Raw vs. Corrected Spectra'))
 step_timer.start()

 figure = [{'x': RawHistMids,
      'y': RawHistVals,
      'y_err': np.sqrt(RawHistVals[:]),
      'drawstyle': 'steps-post',
      'errorstyle': 'bars',
      'errorcoarsing': 2,
      'label': 'Uncorrected'
     },
     {'x': off_cor_HistMids,
      'y': off_cor_HistVals,
      'y_err': np.sqrt(off_cor_HistVals[:]),
      'drawstyle': 'steps-post',
      'errorstyle': 'bars',
      'errorcoarsing': 2,
      'label': 'Offset Corrected'
     }]

 if corr_bools.get('common_mode'):
    figure.append({'x': cm_HistMids,
                   'y': cm_cor_HistVals,
                   'y_err': np.sqrt(cm_cor_HistVals[:]),
                   'drawstyle': 'steps-post',
                   'errorstyle': 'bars',
                   'errorcoarsing': 2,
                   'label': 'Common Mode Corrected'})

 if corr_bools.get('relgain'):
    xrange = bin_range
    figure.append({'x': gain_cor_HistMids,
                   'y': gain_cor_HistVals,
                   'y_err': np.sqrt(gain_cor_HistVals[:]),
                   'drawstyle': 'steps-post',
                   'errorstyle': 'bars',
                   'errorcoarsing': 2,
                   'label': 'Gain Corrected'})

 if corr_bools.get('pattern_class'):
    figure.extend([{'x': split_HistMids,
                   'y': split_HistVals,
                   'y_err': np.sqrt(split_HistVals[:]),
                   'drawstyle': 'steps-post',
                   'errorstyle': 'bars',
                   'errorcoarsing': 2,
                   'label': 'Split Events Corrected'
                  },
                  {'x': singles_HistMids,
                   'y': singles_HistVals,
                   'y_err': np.sqrt(singles_HistVals[:]),
                   'drawstyle': 'steps-post',
                   'errorstyle': 'bars',
                   'errorcoarsing': 2,
                   'label': 'Singles Events'
                  }])
 fig = xana.simplePlot(figure, aspect=1, x_label='ADU', y_label='Number of Occurrences', figsize='2col',
                      y_log=True, x_range=bin_range, title = '1 ADU per bin is used.',
                      legend='top-right-frame-1col')
 step_timer.done_step('Plotting')
 ```

 %% Cell type:code id: tags:

 ``` python
 # This function plots pattern statistics:

 def classification_plot(patternStats, hemisphere):

    print("****************** {} HEMISPHERE ******************\n"
          .format(hemisphere))
    fig = plt.figure(figsize=(15, 15))
    ax = fig.add_subplot(4, 4, 1)
    sfields = ["singles", "first singles", "clusters"]
    mfields = ["doubles", "triples", "quads"]
    relativeOccurances = []
    labels = []
    for i, f in enumerate(sfields):
        relativeOccurances.append(patternStats[f])
        labels.append(f)
    for i, f in enumerate(mfields):
        for k in range(len(patternStats[f])):
            relativeOccurances.append(patternStats[f][k])
            labels.append("{}({})".format(f, k))
    relativeOccurances = np.array(relativeOccurances, np.float)
    relativeOccurances /= np.sum(relativeOccurances)
    pie = ax.pie(relativeOccurances, labels=labels, autopct='%1.1f%%', shadow=True)
    ax.set_title("Pattern Occurrence")
    # Set aspect ratio to be equal so that pie is drawn as a circle.
    a = ax.axis('equal')

    smaps = ["singlemap", "firstsinglemap", "clustermap"]
    for i, m in enumerate(smaps):
        ax = fig.add_subplot(4, 4, 2+i)
        pmap = ax.imshow(patternStats[m], interpolation="nearest", vmax=2*np.nanmedian(patternStats[m]))
        ax.set_title(m)
        cb = fig.colorbar(pmap)

    mmaps = ["doublemap", "triplemap", "quadmap"]
    k = 0
    for i, m in enumerate(mmaps):
        for j in range(4):
            ax = fig.add_subplot(4, 4, 2+len(smaps)+k)
            pmap = ax.imshow(patternStats[m][j], interpolation="nearest", vmax=2*np.median(patternStats[m][j]))
            ax.set_title("{}({})".format(m,j))
            cb = fig.colorbar(pmap)
            k+=1
 ```

 %% Cell type:code id: tags:

 ``` python
 # The next two cells plot the classification results for left and right hemispheres, respectively:
 display(Markdown('### Classification Results - Plots'))
 if corr_bools.get('pattern_class'):
    patternStatsLH = patternClassifierLH.getPatternStats()
    classification_plot(patternStatsLH, 'Left')
 ```

 %% Cell type:code id: tags:

 ``` python
 if corr_bools.get('pattern_class'):
    patternStatsRH = patternClassifierRH.getPatternStats()
    classification_plot(patternStatsRH, 'Right')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Classification Results - Tabulated Statistics'))

 if corr_bools.get('pattern_class'):
    step_timer.start()
    t0 = PrettyTable()
    t0.title = "Total Number of Counts after All Corrections"
    t0.field_names = ["Hemisphere", "Singles", "First-Singles", "Clusters"]
    t0.add_row(["LH", patternStatsLH['singles'], patternStatsLH['first singles'], patternStatsLH['clusters']])
    t0.add_row(["RH", patternStatsRH['singles'], patternStatsRH['first singles'], patternStatsRH['clusters']])
    print(t0)

    print("Abbreviations: D (Doubles), T (Triples), Q (Quadruples), L (Left), R (Right), and H (Hemisphere).")
    t1 = PrettyTable()
    t1.field_names = ["Index", "D-LH", "D-RH", "T-LH", "T-RH", "Q-LH", "Q-RH"]
    t1.add_row([0, patternStatsLH['doubles'][0], patternStatsRH['doubles'][0], patternStatsLH['triples'][0],
                patternStatsRH['triples'][0], patternStatsLH['quads'][0], patternStatsRH['quads'][0]])
    t1.add_row([1, patternStatsLH['doubles'][1], patternStatsRH['doubles'][1], patternStatsLH['triples'][1],
                patternStatsRH['triples'][1], patternStatsLH['quads'][1], patternStatsRH['quads'][1]])
    t1.add_row([2, patternStatsLH['doubles'][2], patternStatsRH['doubles'][2], patternStatsLH['triples'][2],
                patternStatsRH['triples'][2], patternStatsLH['quads'][2], patternStatsRH['quads'][2]])
    t1.add_row([3, patternStatsLH['doubles'][3], patternStatsRH['doubles'][3], patternStatsLH['triples'][3],
                patternStatsRH['triples'][3], patternStatsLH['quads'][3], patternStatsRH['quads'][3]])
    print(t1)
    step_timer.done_step('Classification Results - Tabulated Statistics')
 ```

 %% Cell type:code id: tags:

 ``` python
 if corr_bools.get('pattern_class'):
    doublesLH = patternStatsLH['doubles'][0] + patternStatsLH['doubles'][1] + patternStatsLH['doubles'][2] + \
    patternStatsLH['doubles'][3]
    triplesLH = patternStatsLH['triples'][0] + patternStatsLH['triples'][1] + patternStatsLH['triples'][2] + \
    patternStatsLH['triples'][3]
    quadsLH = patternStatsLH['quads'][0] + patternStatsLH['quads'][1] + patternStatsLH['quads'][2] + \
    patternStatsLH['quads'][3]
    allsinglesLH = patternStatsLH['singles'] + patternStatsLH['first singles']
    eventsLH = allsinglesLH + doublesLH + triplesLH + quadsLH

    doublesRH = patternStatsRH['doubles'][0] + patternStatsRH['doubles'][1] + patternStatsRH['doubles'][2] + \
    patternStatsRH['doubles'][3]
    triplesRH = patternStatsRH['triples'][0] + patternStatsRH['triples'][1] + patternStatsRH['triples'][2] + \
    patternStatsRH['triples'][3]
    quadsRH = patternStatsRH['quads'][0] + patternStatsRH['quads'][1] + patternStatsRH['quads'][2] + \
    patternStatsRH['quads'][3]
    allsinglesRH = patternStatsRH['singles'] + patternStatsRH['first singles']
    eventsRH = allsinglesRH + doublesRH + triplesRH + quadsRH

    if eventsLH > 0.:
        reloccurLH = np.array([allsinglesLH/eventsLH, doublesLH/eventsLH, triplesLH/eventsLH, quadsLH/eventsLH])
    else:
        reloccurLH = np.array([0]*4)
    if eventsRH > 0.:
        reloccurRH = np.array([allsinglesRH/eventsRH, doublesRH/eventsRH, triplesRH/eventsRH, quadsRH/eventsRH])
    else:
        reloccurRH = np.array([0]*4)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Classification Results - Pie Charts'))

 if corr_bools.get('pattern_class'):
    step_timer.start()
    fig = plt.figure(figsize=(12, 7))
    ax = fig.add_subplot(1, 2, 1)
    labels = ['Singles', 'Doubles', 'Triples', 'Quads']
    pie = ax.pie(reloccurLH, labels=labels, autopct='%1.1f%%', shadow=True)
    ax.set_title("Pattern Occurrence in LH")
    # Set aspect ratio to be equal so that pie is drawn as a circle.
    a = ax.axis('equal')
    ax = fig.add_subplot(1, 2, 2)
    pie = ax.pie(reloccurRH, labels=labels, autopct='%1.1f%%', shadow=True)
    ax.set_title("Pattern Occurrence in RH")
    # Set aspect ratio to be equal so that pie is drawn as a circle.
    a = ax.axis('equal')
    step_timer.done_step('Classification Results - Pie Charts')
 ```

 %% Cell type:markdown id: tags:

 ### Various Images Averaged Over All Frames of Only the First Sequence ###

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 uncor_mean_im = np.nanmean(raw_data, axis=0)
 offset_mean_im = np.nanmean(off_data, axis=0)

 if corr_bools.get('common_mode'):
    cm_mean_im = np.nanmean(cm_data, axis=0)
 if corr_bools.get('relgain'):
    gain_mean_im = np.nanmean(rg_data, axis=0)
 if corr_bools.get('pattern_class'):
    mean_im_cc = np.nanmean(cls_data, axis=0)

 fig = xana.heatmapPlot(uncor_mean_im, x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                       x_range=(0, pixels_y), y_range=(0, pixels_x),
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Uncorrected Image Averaged over Frames in the First Sequence')

 fig = xana.heatmapPlot(offset_mean_im, x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                       x_range=(0, pixels_y), y_range=(0, pixels_x),
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Offset Corrected Image Averaged over Frames in the First Sequence')

 if corr_bools.get('common_mode'):
    fig = xana.heatmapPlot(cm_mean_im, x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                           x_range=(0, pixels_y), y_range=(0, pixels_x),
                           panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                           title = 'Common Mode Corrected Image Averaged over Frames in the First Sequence')

 if corr_bools.get('relgain'):
    fig = xana.heatmapPlot(gain_mean_im, x_label='Columns', y_label='Rows',
                           lut_label='Signal (ADU)', aspect=1,
                           x_range=(0, pixels_y), y_range=(0, pixels_x),
                           panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                           title = 'Gain Corrected Image Averaged over Frames in the First Sequence')

 if corr_bools.get('pattern_class'):
    fig = xana.heatmapPlot(mean_im_cc, x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                           x_range=(0, pixels_y), y_range=(0, pixels_x), vmin=0, vmax= 18000,
                           title = 'Image of Single Events Averaged over Frames in the First Sequence')
 step_timer.done_step("Plotting")
 ```

 %% Cell type:markdown id: tags:

 ### Images of the First Frame of the First Sequence ###

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 fig = xana.heatmapPlot(raw_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                       x_range=(0, pixels_y), y_range=(0, pixels_x),
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Uncorrected Image (First Frame of the First Sequence)')

 fig = xana.heatmapPlot(off_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                       x_range=(0, pixels_y), y_range=(0, pixels_x),
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Offset Corrected Image (First Frame of the First Sequence)')

 if corr_bools.get('common_mode'):
        fig = xana.heatmapPlot(cm_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)',
                               aspect=1,
                               x_range=(0, pixels_y), y_range=(0, pixels_x),
                               panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                               title = 'Common Mode Corrected Image (First Frame of the First Sequence)')

 if corr_bools.get('relgain'):
        fig = xana.heatmapPlot(rg_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)',
                               aspect=1,
                               x_range=(0, pixels_y), y_range=(0, pixels_x),
                               panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                               title = 'Gain Corrected Image (First Frame of the First Sequence)')

 if corr_bools.get('pattern_class'):
    fig = xana.heatmapPlot(cls_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                           x_range=(0, pixels_y), y_range=(0, pixels_x),
                           panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                           title = 'Image of Single Events (First Frame of the First Sequence)')
 step_timer.done_step("Plotting")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Resetting the histogram calculators:
 histCalRaw.reset()
 histCalOffsetCor.reset()
 if corr_bools.get('common_mode'):
    histCalCommonModeCor.reset()
 if corr_bools.get('relgain'):
    histCalGainCor.reset()
 if corr_bools.get('pattern_class'):
    histCalPcorr.reset()
    histCalPcorrS.reset()
 ```

 %% Cell type:markdown id: tags:

 Next, the corrected event patterns are read from the patterns/dataset created previously and are separated into 4 different categories (singles, doubles, triples and quadruples) using the pattern indices. However, this is done only for one sequence, corresponding to the seq_num variable, as an example.

 Note that the number of bins and the bin range for the following histograms may be different from those presented above (depending on gain) to make the counts more noticible and the peaks more defined.

 If you are interested in plotting the events from all sequences or the spectra of half of the sensor, execute the spectra_pnCCD_NBC.ipynb notebook.

 %% Cell type:code id: tags:

 ``` python
 if corr_bools.get('pattern_class'):
    singles = []
    doubles = []
    triples = []
    quads =  []
    with H5File(f"{out_folder}/{seq_files[0].name.replace('RAW', 'CORR')}") as dc:  # noqa
        data = dc[instrument_src, "data.pixels_classified"].ndarray()
        patterns = dc[instrument_src, "data.patterns"].ndarray()
    # events' patterns indices are as follows: 100 (singles), 101 (first singles), 200 - 203 (doubles),
    # 300 - 303 (triples), and 400 - 403 (quadruples). Note that for the last three types of patterns,
    # there are left, right, up, and down indices.

    # Separating the events:
    # Singles and First Singles:
    for s in range(100, 102):
        single = data.copy()
        single[patterns != s] = np.nan
        singles.append(single)


    for d in range(200, 204):
        double = data.copy()
        double[patterns != d] = np.nan
        doubles.append(double)

    for t in range(300, 304):
        triple = data.copy()
        triple[patterns != t] = np.nan
        triples.append(triple)

    for q in range(400, 404):
        quad = data.copy()
        quad[patterns != q] = np.nan
        quads.append(quad)
 ```

 %% Cell type:code id: tags:

 ``` python
 if corr_bools.get('pattern_class'):
    step_timer.start()
    hA = 0
    h = 0
    for single in singles:
        hs, e = np.histogram(single.flatten(), bins=event_bins, range=b_range) # h: histogram counts, e: bin edges
        h += hs
        hA += hs # hA: counts all events (see below)

    # bin edges array has one extra element => need to plot from 0 to the one before the last element to have the
    # same size as h-array => in what follows, we use e[:-1] (-1 means one before the last element)

    display(Markdown('### Histograms of Corrected Events for One Sequence Only'))
    fig = plt.figure(figsize=(10, 7))
    ax = fig.add_subplot(111)
    ax.step(e[:-1], h, color='blue', label='Events Involving Single Pixels Only')
    ax.semilogy() # y-axis is log, x-axis is linear
    ax.set_xlabel("Energy (ADU) [{} bins per {} ADU]".format(event_bins, b_range[1]-b_range[0]))
    ax.set_ylabel("Corrected Events for One Sequence (counts)")
    ax.set_xlim(b_range)

    h = 0
    for double in doubles:
        hd, e = np.histogram(double.flatten(), bins=event_bins, range=b_range)
        h += hd
        hA += hd

    ax.step(e[:-1], h, color='red', label='Events Splitting on Double Pixels')

    h = 0
    for triple in triples:
        ht, e = np.histogram(triple.flatten(), bins=event_bins, range=b_range)
        h += ht
        hA += ht

    ax.step(e[:-1], h, color='green', label='Events Splitting on Triple Pixels')

    h = 0
    for quad in quads:
        hq, e = np.histogram(quad.flatten(), bins=event_bins, range=b_range)
        h += hq
        hA += hq

    ax.step(e[:-1], h, color='purple', label='Events Splitting on Quadruple Pixels')

    ax.step(e[:-1], hA, color='grey', label='All Valid Events')
    l = ax.legend()
    step_timer.done_step("Plotting")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:markdown id: tags:

 # pnCCD Data Correction #

 Authors: DET Group, Modified by Kiana Setoodehnia - Version 5.0

 The following notebook provides offset, common mode, relative gain, split events and pattern classification corrections of images acquired with the pnCCD. This notebook *does not* yet correct for charge transfer inefficiency.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SQS/202031/p900166/raw"  # input folder
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/remove/pnccd_correct"  # output folder
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run = 347  # which run to read data from
 sequences = [-1]  # sequences to correct, set to -1 for all, range allowed
 sequences_per_node = 1  # number of sequences running on the same slurm node.

 karabo_da = 'PNCCD01' # data aggregators
 karabo_id = "SQS_NQS_PNCCD1MP" # karabo prefix of PNCCD devices
 receiver_id = "PNCCD_FMT-0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5'  # the template to use to access data
 instrument_source_template = '{}/CAL/{}:output'  # template for data source name, will be filled with karabo_id and receiver_id.

 # Parameters affecting data correction.
 commonModeAxis = 0 # axis along which common mode will be calculated, 0 = row, and 1 = column
 commonModeBlockSize = [512, 512] # size of the detector in pixels for common mode calculations
 split_evt_primary_threshold = 4. # primary threshold for split event classification in terms of n sigma noise
 split_evt_secondary_threshold = 3. # secondary threshold for split event classification in terms of n sigma noise
 saturated_threshold = 32000.  # full well capacity in ADU

 # Conditions for retrieving calibration constants
 fix_temperature_top = 0. # fix temperature for top sensor in K, set to 0. to use value from slow data.
 fix_temperature_bot = 0. # fix temperature for bottom senspr in K, set to 0. to use value from slow data.
 gain = -1  # the detector's gain setting. Set to -1 to use the value from the slow data.
 bias_voltage = 0. # the detector's bias voltage. set to 0. to use value from slow data.
 integration_time = 70  # detector's integration time
 photon_energy = 1.6 # Al fluorescence in keV

 # Parameters for the calibration database.
 cal_db_interface = "tcp://max-exfl016:8015" # calibration DB interface to use
 cal_db_timeout = 300000 # timeout on caldb requests
 creation_time = ""  # The timestamp to use with Calibration DB. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41

 # Booleans for selecting corrections to apply.
 only_offset = False # Only, apply offset.
 common_mode = True # Apply common mode correction
 relgain = True # Apply relative gain correction
 pattern_classification = True  # classify split events

 # parameters affecting stored output data.
 chunk_size_idim = 1  # H5 chunking size of output data
-# ONLY FOR TESTING
-limit_images = 0  # this parameter is used for limiting number of images to correct from a sequence file. ONLY FOR TESTING.
+limit_trains = 0  # this parameter is used for limiting number of images to correct from a sequence file.


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Here the herarichy and dependability for correction booleans are defined
 corr_bools = {}

 corr_bools["only_offset"] = only_offset

 # Apply offset only.
 if not only_offset:
    corr_bools["relgain"] = relgain
    corr_bools["common_mode"] = common_mode
    corr_bools["pattern_class"] = pattern_classification
 ```

 %% Cell type:code id: tags:

 ``` python
-import datetime
 import os
+import sys
 import warnings
+from logging import warning
 from pathlib import Path
 warnings.filterwarnings('ignore')

 import h5py
 import matplotlib.pyplot as plt
 import numpy as np
 import pasha as psh
 from IPython.display import Markdown, display
 from extra_data import H5File, RunDirectory
 from prettytable import PrettyTable

 %matplotlib inline

 from XFELDetAna import xfelpyanatools as xana
 from XFELDetAna import xfelpycaltools as xcal
 from cal_tools import pnccdlib
+from cal_tools.files import DataFile
 from cal_tools.tools import (
    calcat_creation_time,
    get_dir_creation_date,
    get_constant_from_db_and_time,
    get_random_db_interface,
    load_specified_constants,
    CalibrationMetadata,
 )
 from cal_tools.step_timing import StepTimer
 from cal_tools import h5_copy_except
 from iCalibrationDB import Conditions, Constants
 from iCalibrationDB.detectors import DetectorTypes
 ```

 %% Cell type:code id: tags:

 ``` python
 # Calibration Database Settings, and Some Initial Run Parameters & Paths:
 display(Markdown('### Initial Settings and Paths'))

 # Sensor size and block size definitions (important for common mode and other corrections):
 pixels_x = 1024  # rows of pnCCD in pixels
 pixels_y = 1024  # columns of pnCCD in pixels
 in_folder = Path(in_folder)
 sensorSize = [pixels_x, pixels_y]
 # For xcal.HistogramCalculators.
 blockSize = [pixels_x//2, pixels_y//2]  # sensor area will be analysed according to blockSize.

 print(f"pnCCD size is: {pixels_x}x{pixels_y} pixels.")
 print(f'Calibration database interface selected: {cal_db_interface}')

 # Paths to the data:
 instrument_src = instrument_source_template.format(karabo_id, receiver_id)
 print(f"Instrument H5File source: {instrument_src}\n")

 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f"Creation time: {creation_time}")
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id: tags:

 ``` python
 run_dc = RunDirectory(in_folder / f"r{run:04d}", _use_voview=False)

 # Output Folder Creation:
 os.makedirs(out_folder, exist_ok=True)

 # NOTE: this notebook shouldn't overwrite calibration metadata file.
 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # Constant paths are saved under retrieved-constants in calibration_metadata.yml
 const_yaml = metadata.get("retrieved-constants", {})

 # extract control data
 step_timer.start()

 ctrl_data = pnccdlib.PnccdCtrl(run_dc, karabo_id)
 if bias_voltage == 0.:
    bias_voltage = ctrl_data.get_bias_voltage()
 if gain == -1:
    gain = ctrl_data.get_gain()
 if fix_temperature_top == 0:
    fix_temperature_top = ctrl_data.get_fix_temperature_top()
 if fix_temperature_bot == 0:
    fix_temperature_bot = ctrl_data.get_fix_temperature_bot()

 step_timer.done_step("Reading control parameters.")

 # Printing the Parameters Read from the Data File:
 display(Markdown('### Detector Parameters'))
 print(f"Bias voltage is {bias_voltage:0.1f} V.")
 print(f"Detector gain is set to 1/{int(gain)}.")
 print(f"Detector integration time is set to {integration_time} ms")
 print(f"Top pnCCD sensor is at temperature of {fix_temperature_top:0.2f} K")
 print(f"Bottom pnCCD sensor is at temperature of {fix_temperature_bot:0.2f} K")
 ```

 %% Cell type:code id: tags:

 ``` python
 seq_files = []
 for f in run_dc.select(instrument_src).files:
    fpath = Path(f.filename)
    if fpath.match(f"*{karabo_da}*.h5"):
        seq_files.append(fpath)
 if sequences != [-1]:
    seq_files = sorted([f for f in seq_files if any(f.match(f"*-S{s:05d}.h5") for s in sequences)])
 print(f"Processing a total of {len(seq_files)} sequence files:")
 print(*seq_files, sep='\n')
 ```

 %% Cell type:code id: tags:

 ``` python
 gain_k = [k for k, v in pnccdlib.VALID_GAINS.items() if v == gain][0]
 if gain_k == 'a':
    split_evt_mip_threshold = 1000. # MIP threshold in ADU for event classification (10 times average noise)

    # Each xcal.HistogramCalculator requires a total number of bins and a binning range. We define these
    # using a dictionary:

    # For all xcal histograms:

    Hist_Bin_Dict = {
        "bins": 35000, # number of bins
        "bin_range": [0, 35000]
    }

    # For the numpy histograms on the last cell of the notebook:
    Event_Bin_Dict = {
        "event_bins": 1000, # number of bins
        "b_range": [0, 35000] # bin range
    }
 elif gain_k == 'b':
    split_evt_mip_threshold = 270. # 10 times the average noise
    Hist_Bin_Dict = {
        "bins": 10000,
        "bin_range": [0, 10000]
    }
    Event_Bin_Dict = {
        "event_bins": 1000,
        "b_range": [0, 10000]
    }
 elif gain_k == 'c':
    split_evt_mip_threshold = 110. # 10 times the average noise
    Hist_Bin_Dict = {
        "bins": 3000,
        "bin_range": [0, 3000]
    }
    Event_Bin_Dict = {
        "event_bins": 1000,
        "b_range": [0, 3000]
    }
 elif gain_k == 'd':
    split_evt_mip_threshold = 90. # 10 times the average noise
    Hist_Bin_Dict = {
        "bins": 1000,
        "bin_range": [0, 1000]
    }
    Event_Bin_Dict = {
        "event_bins": 1000,
        "b_range": [0, 1000]
    }
 elif gain_k == 'e':
    split_evt_mip_threshold = 90. # 10 times the average noise
    Hist_Bin_Dict = {
        "bins": 500,
        "bin_range": [0, 500]
    }
    Event_Bin_Dict = {
        "event_bins": 500,
        "b_range": [0, 500]
    }
 else:
    split_evt_mip_threshold = 90. # 10 times the average noise
    Hist_Bin_Dict = {
        "bins": 220,
        "bin_range": [0, 220]
    }
    Event_Bin_Dict = {
        "event_bins": 220,
        "b_range": [0, 220]
    }

 bins = Hist_Bin_Dict["bins"]
 bin_range = Hist_Bin_Dict["bin_range"]
 event_bins = Event_Bin_Dict["event_bins"]
 b_range = Event_Bin_Dict["b_range"]
 ```

 %% Cell type:markdown id: tags:

 As a first step, dark constants have to be retrieved from the calibration database

 %% Cell type:code id: tags:

 ``` python
 display(Markdown("### Constants retrieval"))
 step_timer.start()

 conditions_dict = {
    "bias_voltage": bias_voltage,
    "integration_time": integration_time,
    "gain_setting": gain,
    "temperature": fix_temperature_top,
    "pixels_x": pixels_x,
    "pixels_y": pixels_y,
 }
 # Dark condition
 dark_condition = Conditions.Dark.CCD(**conditions_dict)
 # Add photon energy.
 conditions_dict.update({"photon_energy": photon_energy})
 illum_condition = Conditions.Illuminated.CCD(**conditions_dict)

 # A dictionary for initializing constants. {cname: empty constant array}
 empty_constants = {
    "Offset": np.zeros((pixels_x, pixels_y, 1), dtype=np.float32),
    "Noise": np.zeros((pixels_x, pixels_y, 1), dtype=np.float32),
    "BadPixelsDark": np.zeros((pixels_x, pixels_y, 1), dtype=np.uint32),
    "RelativeGain": np.zeros((pixels_x, pixels_y), dtype=np.float32),
 }

 if const_yaml:  #  Used while reproducing corrected data.
    print(f"Using stored constants in {metadata.filename}")
    constants, when = load_specified_constants(
        const_yaml[karabo_da]["constants"], empty_constants
    )
 else:
    constants = dict()
    when = dict()
    for cname, cempty in empty_constants.items():
        # No need for retrieving RelativeGain, if not used for correction.
        if not corr_bools.get("relgain") and cname == "RelativeGain":
            continue
        constants[cname], when[cname] = get_constant_from_db_and_time(
            karabo_id,
            karabo_da,
            constant=getattr(Constants.CCD(DetectorTypes.pnCCD), cname)(),
            condition=illum_condition if cname == "RelativeGain" else dark_condition,
            empty_constant=cempty,
            cal_db_interface=get_random_db_interface(cal_db_interface),
            creation_time=creation_time,
        )
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = xana.heatmapPlot(constants["Offset"][:,:,0], x_label='Columns', y_label='Rows', lut_label='Offset (ADU)',
                       aspect=1,
                       x_range=(0, pixels_y), y_range=(0, pixels_x), vmax=16000,
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Dark Offset Map')

 fig = xana.heatmapPlot(constants["Noise"][:,:,0], x_label='Columns', y_label='Rows',
                       lut_label='Corrected Noise (ADU)',
                       aspect=1, x_range=(0, pixels_y), y_range=(0, pixels_x),
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Dark Noise Map')

 fig = xana.heatmapPlot(np.log2(constants["BadPixelsDark"][:,:,0]), x_label='Columns', y_label='Rows',
                       lut_label='Bad Pixel Value (ADU)',
                       aspect=1, x_range=(0, pixels_y), y_range=(0, pixels_x),
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Dark Bad Pixels Map')

 if corr_bools.get('relgain'):
    fig = xana.heatmapPlot(constants["RelativeGain"], figsize=(8, 8), x_label='Columns', y_label='Rows',
                            lut_label='Relative Gain',
                            aspect=1, x_range=(0, pixels_y), y_range=(0, pixels_x), vmin=0.8, vmax=1.2,
                            panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                            panel_top_low_lim = 0.5, panel_top_high_lim = 1.5, panel_side_low_lim = 0.5,
                            panel_side_high_lim = 1.5,
                            title = f'Relative Gain Map for pnCCD (Gain = 1/{int(gain)})')
 step_timer.done_step("Constants retrieval")
 ```

 %% Cell type:code id: tags:

 ``` python
 #************************ Calculators ************************#
 if corr_bools.get('common_mode'):
    # Common Mode Correction Calculator:
    cmCorrection = xcal.CommonModeCorrection([pixels_x, pixels_y],
                                             commonModeBlockSize,
                                             commonModeAxis,
                                             parallel=False, dType=np.float32, stride=1,
                                             noiseMap=constants["Noise"].astype(np.float32), minFrac=0.25)

 if corr_bools.get('pattern_class'):
    # Pattern Classifier Calculator:
    # Left Hemisphere:
    patternClassifierLH = xcal.PatternClassifier([pixels_x, pixels_y//2],
                                                 constants["Noise"][:, :pixels_y//2],
                                                 split_evt_primary_threshold,
                                                 split_evt_secondary_threshold,
                                                 split_evt_mip_threshold,
                                                 tagFirstSingles=3, # track along y-axis, left to right (see
                                                 nCells=1, # split_event.py file in pydetlib/lib/src/
                                                 allowElongated=False, # XFELDetAna/algorithms)
                                                 blockSize=[pixels_x, pixels_y//2],
                                                 parallel=False)

    # Right Hemisphere:
    patternClassifierRH = xcal.PatternClassifier([pixels_x, pixels_y//2],
                                                 constants["Noise"][:, pixels_y//2:],
                                                 split_evt_primary_threshold,
                                                 split_evt_secondary_threshold,
                                                 split_evt_mip_threshold,
                                                 tagFirstSingles=4, # track along y-axis, right to left
                                                 nCells=1,
                                                 allowElongated=False,
                                                 blockSize=[pixels_x, pixels_y//2],
                                                 parallel=False)

    patternClassifierLH._imagesPerChunk = 1
    patternClassifierRH._imagesPerChunk = 1

    patternClassifierLH._noisemap = constants["Noise"][:, :pixels_x//2]
    patternClassifierRH._noisemap = constants["Noise"][:, pixels_x//2:]
    # Setting bad pixels:
    patternClassifierLH.setBadPixelMask(constants["BadPixelsDark"][:, :pixels_x//2] != 0)
    patternClassifierRH.setBadPixelMask(constants["BadPixelsDark"][:, pixels_x//2:] != 0)
 ```

 %% Cell type:code id: tags:

 ``` python
 #***************** Histogram Calculators ******************#
 # Will contain uncorrected data:
 histCalRaw = xcal.HistogramCalculator(sensorSize,
                                      bins=bins,
                                      range=bin_range,
                                      nCells=1,
                                      parallel=False,
                                      blockSize=blockSize)
 # Will contain offset corrected data:
 histCalOffsetCor = xcal.HistogramCalculator(sensorSize,
                                            bins=bins,
                                            range=bin_range,
                                            nCells=1,
                                            parallel=False,
                                            blockSize=blockSize)
 if corr_bools.get('common_mode'):
    # Will contain common mode corrected data:
    histCalCommonModeCor = xcal.HistogramCalculator(sensorSize,
                                                    bins=bins,
                                                    range=bin_range,
                                                    nCells=1,
                                                    parallel=False,
                                                    blockSize=blockSize)
 if corr_bools.get('pattern_class'):
    # Will contain split events pattern data:
    histCalPcorr = xcal.HistogramCalculator(sensorSize,
                                            bins=bins,
                                            range=bin_range,
                                            nCells=1,
                                            parallel=False,
                                            blockSize=blockSize)
    # Will contain singles events data:
    histCalPcorrS = xcal.HistogramCalculator(sensorSize,
                                             bins=bins,
                                             range=bin_range,
                                             nCells=1,
                                             parallel=False,
                                             blockSize=blockSize)
 if corr_bools.get('relgain'):
    # Will contain gain corrected data:
    histCalGainCor = xcal.HistogramCalculator(sensorSize,
                                              bins=bins,
                                              range=bin_range,
                                              nCells=1,
                                              parallel=False,
                                              blockSize=blockSize)
 ```

 %% Cell type:markdown id: tags:

 ## Applying corrections to the raw data

 %% Cell type:code id: tags:

 ``` python
 def offset_correction(wid, index, d):
    """offset correction.
    Equating bad pixels' values to np.nan,
    so that the pattern classifier ignores them:
    """
    d = d.copy()

    # TODO: To clear this up. Is it on purpose to save corrected data with nans?
    d[bpix != 0] = np.nan
    d -= offset  # offset correction

    # TODO: to clear this up. why save the badpixels map in the corrected data?
    bpix_data[index, ...] = bpix
    data[index, ...] = d

 def common_mode(wid, index, d):
    """common-mode correction.
    Discarding events caused by saturated pixels:
    """
    d = np.squeeze(cmCorrection.correct(d, cellTable=np.zeros(pixels_y, np.int32)))
    # we equate these values to np.nan so that the pattern classifier ignores them:
    d[d >= saturated_threshold] = np.nan
    data[index, ...] = d


 def gain_correction(wid, index, d):
    """relative gain correction."""
    d /= relativegain
    data[index, ...] = d


 def pattern_classification_correction(wid, index, d):
    """pattern classification correction.
    data set to save split event corrected images

    The calculation of the cluster map:]
    Dividing the data into left and right hemispheres:
    """

    # pattern classification on corrected data
    dataLH, patternsLH = patternClassifierLH.classify(d[:, :pixels_x//2])
    dataRH, patternsRH = patternClassifierRH.classify(d[:, pixels_x//2:])

    d[:, :pixels_x//2] = np.squeeze(dataLH)
    d[:, pixels_x//2:] = np.squeeze(dataRH)

    patterns = np.zeros(d.shape, patternsLH.dtype)
    patterns[:, :pixels_x//2] = np.squeeze(patternsLH)
    patterns[:, pixels_x//2:] = np.squeeze(patternsRH)
    d[d < split_evt_primary_threshold*noise] = 0
    data[index, ...] = d
    ptrn_data[index, ...] = patterns
    d[patterns != 100] = np.nan  # Discard doubles, triples, quadruple, clusters, first singles
    filtered_data[index, ...] = d
 ```

 %% Cell type:code id: tags:

 ``` python
 # 10 is a number chosen after testing 1 ... 71 parallel threads for a node with 72 cpus.
 parallel_num_threads = 10
 context = psh.context.ThreadContext(num_workers=parallel_num_threads)

 data_path = "INSTRUMENT/"+instrument_src+"/data/"

 offset = np.squeeze(constants["Offset"])
 noise = np.squeeze(constants["Noise"])
 bpix = np.squeeze(constants["BadPixelsDark"])
 relativegain = constants.get("RelativeGain")
 ```

 %% Cell type:code id: tags:

 ``` python
-def write_datasets(corr_arrays, ofile):
+def write_datasets(seq_dc, corr_arrays, out_file, instrument_src):
    """
    Creating datasets first then adding data.
    To have metadata together available at the start of the file,
-    so it's quick to see what the file contains
+    so it's quick to see what the file contains.
    """
-    comp_fields = ["gain", "patterns", "pixels_classified"]
-    img_grp = ofile[data_path]
+    # Create CORR files and add corrected data sections.
+    image_counts = seq_dc[instrument_src, "data.image"].data_counts(labelled=False)
+    dataset_chunk = ((chunk_size_idim,) + corr_arrays["pixels"].shape[1:])  # e.g. (1, pixels_x, pixels_y)
+    with DataFile(out_file, 'w') as ofile:
+
+        # Create INDEX datasets.
+        ofile.create_index(seq_dc.train_ids, from_file=seq_dc.files[0])
+        # Create METDATA datasets
+        ofile.create_metadata(
+            like=seq_dc,
+            sequence=seq_dc.run_metadata()["sequenceNumber"],
+            instrument_channels=(f"{instrument_src}/data",)
+        )
+
+        # Create Instrument section to later add corrected datasets.
+        outp_source = ofile.create_instrument_source(instrument_src)

-    for field, data in corr_arrays.items():
-        kw = dict(chunks=(chunk_size_idim, pixels_x, pixels_y))
-        if field in comp_fields:
-            kw["compression"] = "gzip"
+        # Create count/first datasets at INDEX source.
+        outp_source.create_index(data=image_counts)

-        img_grp.create_dataset(
-            field, shape=data.shape, dtype=data.dtype, **kw)
+        # Store uncorrected trainId in the corrected file.
+        outp_source.create_key(
+            f"data.trainId", data=seq_dc.train_ids,
+            chunks=min(50, len(seq_dc.train_ids))
+        )

-    for field, data in corr_arrays.items():
-        img_grp[field][:] = data
+        # TODO: gain dataset is just the RelativeGain constant
+        # and it doesn't make sense to write it into corrected data.
+        comp_fields = ["gain", "patterns", "pixels_classified"]
+
+        # TODO: to clear this up: why save corrected data
+        # in data/pixels rather than data/image.
+        for field, data in corr_arrays.items():
+            if field in comp_fields:  # Write compressed corrected data.
+                outp_source.create_compressed_key(f"data.{field}", data=data)
+            else:
+                outp_source.create_key(
+                    f"data.{field}", data=data,
+                    chunks=dataset_chunk
+                )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Data corrections and event classifications happen here.
 # Also, the corrected data are written to datasets:
+empty_seq = 0
 for seq_n, seq_f in enumerate(seq_files):
-    f_dc = H5File(seq_f)
+    seq_dc = H5File(seq_f)
    out_file = f"{out_folder}/{seq_f.name}".replace("RAW", "CORR")

    step_timer.start()

-    dshape = f_dc[instrument_src, "data.image"].shape
+    img_dc = seq_dc[instrument_src, "data.image"]
+
+    dshape = seq_dc[instrument_src, "data.image"].shape
+
    n_trains = dshape[0]
+    corr_ntrains = dshape[0]  # number of available trains to correct.
+    all_train_ids = img_dc.train_ids    # All trains including trains with no data.
+
+    # Raise a WARNING if this sequence has no trains to correct.
+    # Otherwise, print number of trains with no data.
+    if corr_ntrains == 0:
+        warning(f"No trains to correct for {seq_f.name}: "
+                "Skipping the processing of this file.")
+        empty_seq += 1
+        continue
+    elif len(all_train_ids) != corr_ntrains:
+        print(
+            f"{seq_f.name} has {len(all_train_ids) - corr_ntrains} "
+            "trains with missing data."
+        )

    # If you want to analyze only a certain number of frames
    # instead of all available good frames.
-    if limit_images > 0:
-        n_trains = min(n_trains, limit_images)
-    data_shape = (n_trains, dshape[1], dshape[2])

-    print(f"Correcting file: {seq_f} of shape {data_shape}.")
+    if limit_trains > 0:
+        print(f"\nCorrected trains are limited to: {limit_trains} trains")
+        corr_ntrains = min(corr_ntrains, limit_trains)
+    data_shape = (corr_ntrains, dshape[1], dshape[2])
+
+    print(f"Correcting file {seq_f} of {corr_ntrains} trains.")

-    data_dc = f_dc.select(instrument_src, "data.image", require_all=True).select_trains(np.s_[:n_trains])  # noqa
+    # Overwrite seq_dc after eliminating empty trains or/and applying limited images.
+    seq_dc = seq_dc.select(
+        instrument_src, "*", require_all=True).select_trains(np.s_[:corr_ntrains])

-    raw_data = data_dc[instrument_src, "data.image"].ndarray().astype(np.float32)
+    raw_data = seq_dc[instrument_src, "data.image"].ndarray().astype(np.float32)
+    to_store_arrays = {"image": raw_data}

+    # TODO: move the parts for reading data to plot to later cells.
    if seq_n == 0:
        raw_plt = raw_data.copy()  # plot first sequence only

    step_timer.start()

    # Allocating shared arrays for data arrays for each correction stage.
    data = context.alloc(shape=data_shape, dtype=np.float32)
    bpix_data = context.alloc(shape=data_shape, dtype=np.uint32)
    histCalRaw.fill(raw_data)  # filling histogram with raw uncorrected data

    # Applying offset correction
    context.map(offset_correction, raw_data)
    histCalOffsetCor.fill(data)  # filling histogram with offset corrected data

    if seq_n == 0:
        off_data = data.copy()  # plot first sequence only

+    to_store_arrays["pixels"] = data.copy()
+    to_store_arrays["mask"] = bpix_data

-    corr_arrays = {
-        "pixels": data.copy(),
-        "mask": bpix_data,
-    }
    step_timer.done_step(f'offset correction.')

    if corr_bools.get('common_mode'):
        step_timer.start()

        # Applying common mode correction
        context.map(common_mode, data)
        if seq_n == 0:
            cm_data = data.copy()  # plot first sequence only
-        corr_arrays["pixels_cm"] = data.copy()
+        to_store_arrays["pixels_cm"] = data.copy()
        histCalCommonModeCor.fill(data)  # filling histogram with common mode corrected data

        step_timer.done_step(f'common-mode correction.')

    if corr_bools.get('relgain'):

        step_timer.start()

        # Applying gain correction
        context.map(gain_correction, data)
        if seq_n == 0:
            rg_data = data.copy()  # plot first sequence only
        # TODO: Why storing a repeated constant for each image in corrected files.
-        corr_arrays["gain"] = np.repeat(relativegain[np.newaxis, ...], n_trains, axis=0).astype(np.float32)  # noqa
+        to_store_arrays["gain"] = np.repeat(relativegain[np.newaxis, ...], corr_ntrains, axis=0).astype(np.float32)  # noqa
        histCalGainCor.fill(data)  # filling histogram with gain corrected data
        step_timer.done_step(f'gain correction.')

    if corr_bools.get('pattern_class'):
        step_timer.start()

        ptrn_data = context.alloc(shape=data_shape, dtype=np.int32)
        filtered_data = context.alloc(shape=data_shape, dtype=np.int32)
        # Applying pattern classification correction
-        # Even thougth data is indeed of dtype np.float32,
-        # not specifiying this again screw with the data quality.
+        # Even though data is indeed of dtype np.float32,
+        # not specifying this again screw with the data quality.
        context.map(pattern_classification_correction, data.astype(np.float32))

        if seq_n == 0:
            cls_data = data.copy()  # plot first sequence only
        # split event corrected images plotted for first sequence only
        # (also these events are only singles events):
-        corr_arrays["pixels_classified"] = data.copy()
-        corr_arrays["patterns"] = ptrn_data
+        to_store_arrays["pixels_classified"] = data.copy()
+        to_store_arrays["patterns"] = ptrn_data

        histCalPcorr.fill(data)  # filling histogram with split events corrected data
        # filling histogram with corr data after discarding doubles, triples, quadruple, clusters, and first singles
        histCalPcorrS.fill(filtered_data)
        step_timer.done_step(f'pattern classification correction.')

    step_timer.start()

    # Storing corrected data sources.
-    with h5py.File(out_file, 'w') as ofile:
-        # Copy RAW non-calibrated sources.
-        with h5py.File(seq_f, 'r') as sfile:
-            h5_copy_except.h5_copy_except_paths(
-                sfile, ofile, [],
-            )
-        # TODO: to clear this up: why save corrected data in data/pixels rather than data/image.
-        write_datasets(corr_arrays, ofile)
-
+    write_datasets(
+        seq_dc=seq_dc,
+        corr_arrays=to_store_arrays,
+        out_file=out_file,
+        instrument_src=instrument_src,
+    )
    step_timer.done_step(f'Storing data.')
+# Exit and raise warning if there are no data to correct for all sequences.
+if empty_seq == len(seq_files):
+    warning("No valid trains for RAW data to correct.")
+    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 print("In addition to offset correction, the following corrections were performed:")
 for k, v in corr_bools.items():
    if v:
        print("  -", k.upper())

 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 print("In addition to offset correction, the following corrections were performed:")
 for k, v in corr_bools.items():
    if v:
        print("  -", k.upper())

 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Histograming the resulting spectra:

 # First _ refers to the bin edges and second _ refers to statistics and we ignore both.

 # if you use histCalRaw.get(cumulatative = True) and so on, the cumulatative = True turns the counts array such as
 # RawHistVals and so on into a 1D array instead of keeping the original shape:

 RawHistVals, _, RawHistMids, _ = histCalRaw.get()
 off_cor_HistVals, _, off_cor_HistMids, _ = histCalOffsetCor.get()
 if corr_bools.get('common_mode'):
    cm_cor_HistVals, _, cm_HistMids, _ = histCalCommonModeCor.get()
 if corr_bools.get('relgain'):
    gain_cor_HistVals, _, gain_cor_HistMids, _ = histCalGainCor.get()
 if corr_bools.get('pattern_class'):
    split_HistVals, _, split_HistMids, _ = histCalPcorr.get() # split events corrected
    singles_HistVals, _, singles_HistMids, _ = histCalPcorrS.get() # last s in variable names: singles events
 ```

 %% Cell type:code id: tags:

 ``` python
 # Saving intermediate data to disk:
 step_timer.start()
 np.savez(os.path.join(out_folder, 'Raw_Events.npz'), RawHistMids, RawHistVals)
 np.savez(os.path.join(out_folder, 'Offset_Corrected_Events.npz'), off_cor_HistMids, off_cor_HistVals)
 if corr_bools.get('common_mode'):
    np.savez(os.path.join(out_folder, 'Common_Mode_Corrected_Events.npz'), cm_HistMids, cm_cor_HistVals)
 if corr_bools.get('relgain'):
    np.savez(os.path.join(out_folder, 'Gain_Corrected_Events.npz'), gain_cor_HistMids, gain_cor_HistVals)
 if corr_bools.get('pattern_class'):
    np.savez(os.path.join(out_folder, 'Split_Events_Corrected_Events.npz'), split_HistMids, split_HistVals)
    np.savez(os.path.join(out_folder, 'Singles_Events.npz'), singles_HistMids, singles_HistVals)
 step_timer.done_step(f'Saving intermediate data to disk.')
 print("Various spectra are saved to disk in the form of histograms. Please check {}".format(out_folder))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Raw vs. Corrected Spectra'))
 step_timer.start()

 figure = [{'x': RawHistMids,
      'y': RawHistVals,
      'y_err': np.sqrt(RawHistVals[:]),
      'drawstyle': 'steps-post',
      'errorstyle': 'bars',
      'errorcoarsing': 2,
      'label': 'Uncorrected'
     },
     {'x': off_cor_HistMids,
      'y': off_cor_HistVals,
      'y_err': np.sqrt(off_cor_HistVals[:]),
      'drawstyle': 'steps-post',
      'errorstyle': 'bars',
      'errorcoarsing': 2,
      'label': 'Offset Corrected'
     }]

 if corr_bools.get('common_mode'):
    figure.append({'x': cm_HistMids,
                   'y': cm_cor_HistVals,
                   'y_err': np.sqrt(cm_cor_HistVals[:]),
                   'drawstyle': 'steps-post',
                   'errorstyle': 'bars',
                   'errorcoarsing': 2,
                   'label': 'Common Mode Corrected'})

 if corr_bools.get('relgain'):
    xrange = bin_range
    figure.append({'x': gain_cor_HistMids,
                   'y': gain_cor_HistVals,
                   'y_err': np.sqrt(gain_cor_HistVals[:]),
                   'drawstyle': 'steps-post',
                   'errorstyle': 'bars',
                   'errorcoarsing': 2,
                   'label': 'Gain Corrected'})

 if corr_bools.get('pattern_class'):
    figure.extend([{'x': split_HistMids,
                   'y': split_HistVals,
                   'y_err': np.sqrt(split_HistVals[:]),
                   'drawstyle': 'steps-post',
                   'errorstyle': 'bars',
                   'errorcoarsing': 2,
                   'label': 'Split Events Corrected'
                  },
                  {'x': singles_HistMids,
                   'y': singles_HistVals,
                   'y_err': np.sqrt(singles_HistVals[:]),
                   'drawstyle': 'steps-post',
                   'errorstyle': 'bars',
                   'errorcoarsing': 2,
                   'label': 'Singles Events'
                  }])
 fig = xana.simplePlot(figure, aspect=1, x_label='ADU', y_label='Number of Occurrences', figsize='2col',
                      y_log=True, x_range=bin_range, title = '1 ADU per bin is used.',
                      legend='top-right-frame-1col')
 step_timer.done_step('Plotting')
 ```

 %% Cell type:code id: tags:

 ``` python
 # This function plots pattern statistics:

 def classification_plot(patternStats, hemisphere):

    print("****************** {} HEMISPHERE ******************\n"
          .format(hemisphere))
    fig = plt.figure(figsize=(15, 15))
    ax = fig.add_subplot(4, 4, 1)
    sfields = ["singles", "first singles", "clusters"]
    mfields = ["doubles", "triples", "quads"]
    relativeOccurances = []
    labels = []
    for i, f in enumerate(sfields):
        relativeOccurances.append(patternStats[f])
        labels.append(f)
    for i, f in enumerate(mfields):
        for k in range(len(patternStats[f])):
            relativeOccurances.append(patternStats[f][k])
            labels.append("{}({})".format(f, k))
    relativeOccurances = np.array(relativeOccurances, np.float)
    relativeOccurances /= np.sum(relativeOccurances)
    pie = ax.pie(relativeOccurances, labels=labels, autopct='%1.1f%%', shadow=True)
    ax.set_title("Pattern Occurrence")
    # Set aspect ratio to be equal so that pie is drawn as a circle.
    a = ax.axis('equal')

    smaps = ["singlemap", "firstsinglemap", "clustermap"]
    for i, m in enumerate(smaps):
        ax = fig.add_subplot(4, 4, 2+i)
        pmap = ax.imshow(patternStats[m], interpolation="nearest", vmax=2*np.nanmedian(patternStats[m]))
        ax.set_title(m)
        cb = fig.colorbar(pmap)

    mmaps = ["doublemap", "triplemap", "quadmap"]
    k = 0
    for i, m in enumerate(mmaps):
        for j in range(4):
            ax = fig.add_subplot(4, 4, 2+len(smaps)+k)
            pmap = ax.imshow(patternStats[m][j], interpolation="nearest", vmax=2*np.median(patternStats[m][j]))
            ax.set_title("{}({})".format(m,j))
            cb = fig.colorbar(pmap)
            k+=1
 ```

 %% Cell type:code id: tags:

 ``` python
 # The next two cells plot the classification results for left and right hemispheres, respectively:
 display(Markdown('### Classification Results - Plots'))
 if corr_bools.get('pattern_class'):
    patternStatsLH = patternClassifierLH.getPatternStats()
    classification_plot(patternStatsLH, 'Left')
 ```

 %% Cell type:code id: tags:

 ``` python
 if corr_bools.get('pattern_class'):
    patternStatsRH = patternClassifierRH.getPatternStats()
    classification_plot(patternStatsRH, 'Right')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Classification Results - Tabulated Statistics'))

 if corr_bools.get('pattern_class'):
    step_timer.start()
    t0 = PrettyTable()
    t0.title = "Total Number of Counts after All Corrections"
    t0.field_names = ["Hemisphere", "Singles", "First-Singles", "Clusters"]
    t0.add_row(["LH", patternStatsLH['singles'], patternStatsLH['first singles'], patternStatsLH['clusters']])
    t0.add_row(["RH", patternStatsRH['singles'], patternStatsRH['first singles'], patternStatsRH['clusters']])
    print(t0)

    print("Abbreviations: D (Doubles), T (Triples), Q (Quadruples), L (Left), R (Right), and H (Hemisphere).")
    t1 = PrettyTable()
    t1.field_names = ["Index", "D-LH", "D-RH", "T-LH", "T-RH", "Q-LH", "Q-RH"]
    t1.add_row([0, patternStatsLH['doubles'][0], patternStatsRH['doubles'][0], patternStatsLH['triples'][0],
                patternStatsRH['triples'][0], patternStatsLH['quads'][0], patternStatsRH['quads'][0]])
    t1.add_row([1, patternStatsLH['doubles'][1], patternStatsRH['doubles'][1], patternStatsLH['triples'][1],
                patternStatsRH['triples'][1], patternStatsLH['quads'][1], patternStatsRH['quads'][1]])
    t1.add_row([2, patternStatsLH['doubles'][2], patternStatsRH['doubles'][2], patternStatsLH['triples'][2],
                patternStatsRH['triples'][2], patternStatsLH['quads'][2], patternStatsRH['quads'][2]])
    t1.add_row([3, patternStatsLH['doubles'][3], patternStatsRH['doubles'][3], patternStatsLH['triples'][3],
                patternStatsRH['triples'][3], patternStatsLH['quads'][3], patternStatsRH['quads'][3]])
    print(t1)
    step_timer.done_step('Classification Results - Tabulated Statistics')
 ```

 %% Cell type:code id: tags:

 ``` python
 if corr_bools.get('pattern_class'):
    doublesLH = patternStatsLH['doubles'][0] + patternStatsLH['doubles'][1] + patternStatsLH['doubles'][2] + \
    patternStatsLH['doubles'][3]
    triplesLH = patternStatsLH['triples'][0] + patternStatsLH['triples'][1] + patternStatsLH['triples'][2] + \
    patternStatsLH['triples'][3]
    quadsLH = patternStatsLH['quads'][0] + patternStatsLH['quads'][1] + patternStatsLH['quads'][2] + \
    patternStatsLH['quads'][3]
    allsinglesLH = patternStatsLH['singles'] + patternStatsLH['first singles']
    eventsLH = allsinglesLH + doublesLH + triplesLH + quadsLH

    doublesRH = patternStatsRH['doubles'][0] + patternStatsRH['doubles'][1] + patternStatsRH['doubles'][2] + \
    patternStatsRH['doubles'][3]
    triplesRH = patternStatsRH['triples'][0] + patternStatsRH['triples'][1] + patternStatsRH['triples'][2] + \
    patternStatsRH['triples'][3]
    quadsRH = patternStatsRH['quads'][0] + patternStatsRH['quads'][1] + patternStatsRH['quads'][2] + \
    patternStatsRH['quads'][3]
    allsinglesRH = patternStatsRH['singles'] + patternStatsRH['first singles']
    eventsRH = allsinglesRH + doublesRH + triplesRH + quadsRH

    if eventsLH > 0.:
        reloccurLH = np.array([allsinglesLH/eventsLH, doublesLH/eventsLH, triplesLH/eventsLH, quadsLH/eventsLH])
    else:
        reloccurLH = np.array([0]*4)
    if eventsRH > 0.:
        reloccurRH = np.array([allsinglesRH/eventsRH, doublesRH/eventsRH, triplesRH/eventsRH, quadsRH/eventsRH])
    else:
        reloccurRH = np.array([0]*4)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Classification Results - Pie Charts'))

 if corr_bools.get('pattern_class'):
    step_timer.start()
    fig = plt.figure(figsize=(12, 7))
    ax = fig.add_subplot(1, 2, 1)
    labels = ['Singles', 'Doubles', 'Triples', 'Quads']
    pie = ax.pie(reloccurLH, labels=labels, autopct='%1.1f%%', shadow=True)
    ax.set_title("Pattern Occurrence in LH")
    # Set aspect ratio to be equal so that pie is drawn as a circle.
    a = ax.axis('equal')
    ax = fig.add_subplot(1, 2, 2)
    pie = ax.pie(reloccurRH, labels=labels, autopct='%1.1f%%', shadow=True)
    ax.set_title("Pattern Occurrence in RH")
    # Set aspect ratio to be equal so that pie is drawn as a circle.
    a = ax.axis('equal')
    step_timer.done_step('Classification Results - Pie Charts')
 ```

 %% Cell type:markdown id: tags:

 ### Various Images Averaged Over All Frames of Only the First Sequence ###

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 uncor_mean_im = np.nanmean(raw_data, axis=0)
 offset_mean_im = np.nanmean(off_data, axis=0)

 if corr_bools.get('common_mode'):
    cm_mean_im = np.nanmean(cm_data, axis=0)
 if corr_bools.get('relgain'):
    gain_mean_im = np.nanmean(rg_data, axis=0)
 if corr_bools.get('pattern_class'):
    mean_im_cc = np.nanmean(cls_data, axis=0)

 fig = xana.heatmapPlot(uncor_mean_im, x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                       x_range=(0, pixels_y), y_range=(0, pixels_x),
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Uncorrected Image Averaged over Frames in the First Sequence')

 fig = xana.heatmapPlot(offset_mean_im, x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                       x_range=(0, pixels_y), y_range=(0, pixels_x),
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Offset Corrected Image Averaged over Frames in the First Sequence')

 if corr_bools.get('common_mode'):
    fig = xana.heatmapPlot(cm_mean_im, x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                           x_range=(0, pixels_y), y_range=(0, pixels_x),
                           panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                           title = 'Common Mode Corrected Image Averaged over Frames in the First Sequence')

 if corr_bools.get('relgain'):
    fig = xana.heatmapPlot(gain_mean_im, x_label='Columns', y_label='Rows',
                           lut_label='Signal (ADU)', aspect=1,
                           x_range=(0, pixels_y), y_range=(0, pixels_x),
                           panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                           title = 'Gain Corrected Image Averaged over Frames in the First Sequence')

 if corr_bools.get('pattern_class'):
    fig = xana.heatmapPlot(mean_im_cc, x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                           x_range=(0, pixels_y), y_range=(0, pixels_x), vmin=0, vmax= 18000,
                           title = 'Image of Single Events Averaged over Frames in the First Sequence')
 step_timer.done_step("Plotting")
 ```

 %% Cell type:markdown id: tags:

 ### Images of the First Frame of the First Sequence ###

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 fig = xana.heatmapPlot(raw_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                       x_range=(0, pixels_y), y_range=(0, pixels_x),
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Uncorrected Image (First Frame of the First Sequence)')

 fig = xana.heatmapPlot(off_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                       x_range=(0, pixels_y), y_range=(0, pixels_x),
                       panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                       title = 'Offset Corrected Image (First Frame of the First Sequence)')

 if corr_bools.get('common_mode'):
        fig = xana.heatmapPlot(cm_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)',
                               aspect=1,
                               x_range=(0, pixels_y), y_range=(0, pixels_x),
                               panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                               title = 'Common Mode Corrected Image (First Frame of the First Sequence)')

 if corr_bools.get('relgain'):
        fig = xana.heatmapPlot(rg_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)',
                               aspect=1,
                               x_range=(0, pixels_y), y_range=(0, pixels_x),
                               panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                               title = 'Gain Corrected Image (First Frame of the First Sequence)')

 if corr_bools.get('pattern_class'):
    fig = xana.heatmapPlot(cls_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
                           x_range=(0, pixels_y), y_range=(0, pixels_x),
                           panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
                           title = 'Image of Single Events (First Frame of the First Sequence)')
 step_timer.done_step("Plotting")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Resetting the histogram calculators:
 histCalRaw.reset()
 histCalOffsetCor.reset()
 if corr_bools.get('common_mode'):
    histCalCommonModeCor.reset()
 if corr_bools.get('relgain'):
    histCalGainCor.reset()
 if corr_bools.get('pattern_class'):
    histCalPcorr.reset()
    histCalPcorrS.reset()
 ```

 %% Cell type:markdown id: tags:

 Next, the corrected event patterns are read from the patterns/dataset created previously and are separated into 4 different categories (singles, doubles, triples and quadruples) using the pattern indices. However, this is done only for one sequence, corresponding to the seq_num variable, as an example.

 Note that the number of bins and the bin range for the following histograms may be different from those presented above (depending on gain) to make the counts more noticible and the peaks more defined.

 If you are interested in plotting the events from all sequences or the spectra of half of the sensor, execute the spectra_pnCCD_NBC.ipynb notebook.

 %% Cell type:code id: tags:

 ``` python
 if corr_bools.get('pattern_class'):
    singles = []
    doubles = []
    triples = []
    quads =  []
    with H5File(f"{out_folder}/{seq_files[0].name.replace('RAW', 'CORR')}") as dc:  # noqa
        data = dc[instrument_src, "data.pixels_classified"].ndarray()
        patterns = dc[instrument_src, "data.patterns"].ndarray()
    # events' patterns indices are as follows: 100 (singles), 101 (first singles), 200 - 203 (doubles),
    # 300 - 303 (triples), and 400 - 403 (quadruples). Note that for the last three types of patterns,
    # there are left, right, up, and down indices.

    # Separating the events:
    # Singles and First Singles:
    for s in range(100, 102):
        single = data.copy()
        single[patterns != s] = np.nan
        singles.append(single)


    for d in range(200, 204):
        double = data.copy()
        double[patterns != d] = np.nan
        doubles.append(double)

    for t in range(300, 304):
        triple = data.copy()
        triple[patterns != t] = np.nan
        triples.append(triple)

    for q in range(400, 404):
        quad = data.copy()
        quad[patterns != q] = np.nan
        quads.append(quad)
 ```

 %% Cell type:code id: tags:

 ``` python
 if corr_bools.get('pattern_class'):
    step_timer.start()
    hA = 0
    h = 0
    for single in singles:
        hs, e = np.histogram(single.flatten(), bins=event_bins, range=b_range) # h: histogram counts, e: bin edges
        h += hs
        hA += hs # hA: counts all events (see below)

    # bin edges array has one extra element => need to plot from 0 to the one before the last element to have the
    # same size as h-array => in what follows, we use e[:-1] (-1 means one before the last element)

    display(Markdown('### Histograms of Corrected Events for One Sequence Only'))
    fig = plt.figure(figsize=(10, 7))
    ax = fig.add_subplot(111)
    ax.step(e[:-1], h, color='blue', label='Events Involving Single Pixels Only')
    ax.semilogy() # y-axis is log, x-axis is linear
    ax.set_xlabel("Energy (ADU) [{} bins per {} ADU]".format(event_bins, b_range[1]-b_range[0]))
    ax.set_ylabel("Corrected Events for One Sequence (counts)")
    ax.set_xlim(b_range)

    h = 0
    for double in doubles:
        hd, e = np.histogram(double.flatten(), bins=event_bins, range=b_range)
        h += hd
        hA += hd

    ax.step(e[:-1], h, color='red', label='Events Splitting on Double Pixels')

    h = 0
    for triple in triples:
        ht, e = np.histogram(triple.flatten(), bins=event_bins, range=b_range)
        h += ht
        hA += ht

    ax.step(e[:-1], h, color='green', label='Events Splitting on Triple Pixels')

    h = 0
    for quad in quads:
        hq, e = np.histogram(quad.flatten(), bins=event_bins, range=b_range)
        h += hq
        hA += hq

    ax.step(e[:-1], h, color='purple', label='Events Splitting on Quadruple Pixels')

    ax.step(e[:-1], hA, color='grey', label='All Valid Events')
    l = ax.legend()
    step_timer.done_step("Plotting")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

--- a/setup.py
+++ b/setup.py
@@ -102,6 +102,8 @@ install_requires = [
        "traitlets==4.3.3",
        "xarray==2022.3.0",
        "EXtra-redu==0.0.7",
+        "rich==12.6.0",
+        "httpx==0.23.0",
 ]

 if "readthedocs.org" not in sys.executable:

--- a/src/cal_tools/restful_config.yaml
+++ b/src/cal_tools/restful_config.yaml
@@ -10,12 +10,12 @@ mdc:
  user-secret: '@note add this to secrets file'

 calcat:
+  base-api-url: http://exflcalproxy:8080/api
+  use-oauth2: false
+  token-url: https://in.xfel.eu/calibration/oauth/token
  auth-url: https://in.xfel.eu/calibration/oauth/authorize
-  base-api-url: https://in.xfel.eu/calibration/api
  refresh-url: https://in.xfel.eu/calibration/oauth/token
  scope: ''
-  token-url: https://in.xfel.eu/calibration/oauth/token
-  use-oauth2: true
  user-email: calibration@example.com
  user-id: '@note add this to secrets file'
  user-secret: '@note add this to secrets file'
--- a/webservice/README.md
+++ b/webservice/README.md
@@ -189,3 +189,34 @@ status in myMdC should update as the processing occurs.

 The command ``squeue -u xcaltst`` will show running & pending Slurm jobs started
 by this test system.
+
+Manually Submitting Jobs
+------------------------
+
+A script `manual_launch.py` is provided to manually submit jobs to the service.
+
+```bash
+usage: manual_launch.py [-h] --proposal PROPOSAL [--delay DELAY] [--noconfirm] [--really] slices [slices ...]
+
+Manually submit calibration jobs.
+
+positional arguments:
+  slices               slices (or single numbers) of runs to process, inclusive range, starting at 1 (e.g. 1:3 parsed to {1, 2, 3}, 10 parsed to {10}, :10
+                       parsed to {1, 2, ..., 10})
+
+optional arguments:
+  -h, --help           show this help message and exit
+  --proposal PROPOSAL  proposal number
+  --delay DELAY        delay in seconds between submissions
+  --noconfirm          skip confirmation
+  --really             actually submit jobs instead of just printing them
+
+To run in the background use `nohup PYTHONUNBUFFERED=1 python manual_launch.py ... &` followed by `disown`.
+```
+
+Slices inclusive, so `1:10` would mean runs 1 to 10 inclusive of 1 and 10. The
+'slice' can also be a single number.
+
+Example of usage would be `python3 ./manual_launch.py 1 10:12 160:-1 --delay 60
+--proposal 2222 --really` to submit runs 1, 10 to 12, and 160+ for calibration,
+for proposal 2222, with a 60 second delay between submissions.
--- a/webservice/manual_launch.py
+++ b/webservice/manual_launch.py
+from __future__ import annotations
+
+import argparse
+import datetime as dt
+import time
+from contextlib import contextmanager
+from pathlib import Path
+from typing import Generator, Optional
+
 import zmq
+from config import webservice as config
+from httpx import Client, Response
+from rich import print
+from rich.progress import (
+    MofNCompleteColumn,
+    Progress,
+    SpinnerColumn,
+    TextColumn,
+    TimeElapsedColumn,
+)
+from rich.prompt import Prompt
+
+parser = argparse.ArgumentParser(
+    description="Manually submit calibration jobs.",
+    epilog="""To run in the background use `nohup PYTHONUNBUFFERED=1 python
+    manual_launch.py ... &` followed by `disown`.""",
+)
+
+parser.add_argument(
+    "slices",
+    type=str,
+    nargs="+",
+    help="""slices (or single numbers) of runs to process, inclusive range, starting at
+    1 (e.g. 1:3 parsed to {1, 2, 3}, 10 parsed to {10}, :10 parsed to {1, 2, ...,
+    10})""",
+)
+parser.add_argument(
+    "--proposal",
+    type=int,
+    help="proposal number",
+    required=True,
+)
+parser.add_argument(
+    "--delay",
+    default=30,
+    type=int,
+    help="delay in seconds between submissions",
+    required=False,
+)
+parser.add_argument(
+    "--noconfirm",
+    action="store_true",
+    help="skip confirmation",
+)
+parser.add_argument(
+    "--really",
+    action="store_true",
+    help="actually submit jobs instead of just printing them",
+)
+
+
+BEARER = {
+    "access_token": "",
+    "expires_at": dt.datetime.now(),
+}
+
+
+def pre_checks():
+    # Fail fast if we don't have the required configs set
+    required_keys = ["token-url", "user-id", "user-secret", "user-email"]
+    for k in required_keys:
+        if config["metadata-client"][k] is None:
+            print(
+                f"Missing key [bold red]`{k}`[/bold red] in metadata client configuration"
+            )
+            print("[bold red]Aborted[/bold red]")
+            exit(1)
+
+
+def get_bearer_token() -> str:
+    if BEARER["access_token"] and BEARER["expires_at"] > dt.datetime.now():
+        return BEARER["access_token"]
+
+    with Client() as client:
+        response = client.post(
+            f"{config['metadata-client']['token-url']}",
+            data={
+                "grant_type": "client_credentials",
+                "client_id": config["metadata-client"]["user-id"],
+                "client_secret": config["metadata-client"]["user-secret"],
+            },
+        )
+
+    data = response.json()
+
+    if any(k not in data for k in ["access_token", "expires_in"]):
+        print(
+            "Response from MyMdC missing required fields, check webservice `user-id`"
+            f"and `user-secret`. Response: {data=}",
+        )
+        raise ValueError("Invalid response from MyMdC")
+
+    expires_in = dt.timedelta(seconds=data["expires_in"])
+
+    BEARER["access_token"] = data["access_token"]
+    BEARER["expires_at"] = dt.datetime.now() + expires_in
+
+    return BEARER["access_token"]
+
+
+@contextmanager
+def get_client() -> Generator[Client, None, None]:
+    bearer_token = get_bearer_token()
+
+    with Client() as client:
+        headers = {
+            "accept": "application/json; version=1",
+            "X-User-Email": config["metadata-client"]["user-email"],
+            "Authorization": f"Bearer {bearer_token}",
+        }
+
+        client.headers.update(headers)
+
+        yield client
+
+
+def _get_runs_by_proposal(number: int, client: Client, page: int = 1) -> Response:
+    return client.get(
+        f"{config['metadata-client']['base-api-url']}/runs/runs_by_proposal",
+        params={"proposal_number": number, "page": page},
+        timeout=10,
+    )
+
+
+def get_runs_by_proposal_all(number: int) -> list[dict]:
+    with get_client() as client:
+        res = _get_runs_by_proposal(number, client, 1)
+        if res.status_code != 200:
+            raise ValueError(res.url, res.text)
+        runs = res.json()
+        for page in range(2, int(res.headers.get("x-total-pages", 1)) + 1):
+            _ = _get_runs_by_proposal(number, client, page)
+            runs.extend(_.json())
+
+    return runs
+
+
+def main(
+    proposal_no: int,
+    slices: list[slice],
+    delay: int,
+    noconfirm: Optional[bool] = False,
+    really: Optional[bool] = False,
+):
+    with Progress(transient=True) as progress:
+        task_submission = progress.add_task(
+            "[yellow]Querying FS for proposal information", total=None
+        )
+        exp = Path("/gpfs/exfel/exp")
+        proposal_paths = list(exp.glob(f"*/*/p{proposal_no:06d}"))
+        if len(proposal_paths) != 1:
+            raise ValueError(f"Proposal {proposal_no} not found")
+
+        proposal_path = proposal_paths[0]
+        instrument = proposal_path.parts[4]
+        cycle = proposal_path.parts[5]
+
+        progress.update(task_submission, description="[yellow]Querying MyMdC for runs")
+
+        all_runs = get_runs_by_proposal_all(proposal_no)
+
+    run_no_id_map = {run["run_number"]: run["id"] for run in all_runs}
+    max_run_no = max(run_no_id_map.keys())
+    requested_ranges = [range(*s.indices(max_run_no)) for s in slices]
+    requested_run_nos = {run_no for r in requested_ranges for run_no in r}
+
+    requests = dict(
+        sorted(
+            {
+                run_no: run_no_id_map[run_no]
+                for run_no in requested_run_nos
+                if run_no in run_no_id_map
+            }.items()
+        )
+    )
+
+    if missing_run_ids := set(requested_run_nos) - set(run_no_id_map.keys()):
+        print(
+            f"[bold red]Missing run IDs for run number(s) {missing_run_ids}[/bold red]"
+        )
+
+    if not really:
+        print("[yellow]`--really` flag missing, not submitting jobs")
+
+    if not noconfirm and not Prompt.ask(
+        f"Submit [red bold]{len(requests)}[/red bold] jobs for proposal "
+        f"[bold]{proposal_no}[/bold]? [y/[bold]n[/bold]]",
+        default=False,
+    ):
+        print("[bold red]Aborted[/bold red]")
+        exit(1)
+
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        MofNCompleteColumn(),
+        TimeElapsedColumn(),
+    ) as progress:
+        description = f"[green]Submitted request for p{proposal_no:05d}/{{run_str}} "
+        task_submission = progress.add_task(
+            f"{description}r---[------]", total=len(requests)
+        )
+        con = zmq.Context()
+        socket = con.socket(zmq.REQ)
+        con = socket.connect("tcp://max-exfl016:5555")
+
+        if not really:
+            #  Fake socket for testing, just logs what would have been sent via ZMQ
+            socket = lambda: None
+            socket.send = lambda x: progress.console.log(
+                f"mock `zmq.REQ` socket send: {x}"
+            )
+            socket.recv = lambda: "mock `zmq.REQ` socket response"
+
+        last_run_no = list(requests.keys())[-1]
+
+        for run_no, run_id in requests.items():
+            args = (
+                "correct",
+                str(run_id),
+                "_",
+                str(instrument),
+                str(cycle),
+                f"{proposal_no:06d}",
+                str(run_no),
+                "-",
+            )
+            msg = f"""['{"','".join(args)}']""".encode()
+            progress.console.log(args)
+            socket.send(msg)
+
+            progress.update(
+                task_submission,
+                advance=1,
+                description=description.format(
+                    run_str=f"[bold yellow]r{run_no:03d}[{run_id:06d}]"
+                ),
+            )
+
+            res = socket.recv()
+            progress.console.log(res)
+
+            if run_no != last_run_no:
+                progress.console.log(f"sleeping for {delay}s")
+                time.sleep(delay)
+            else:
+                progress.update(task_submission, description="[green]Done")
+
+
+if __name__ == "__main__":
+    args = vars(parser.parse_args())
+
+    slices = []
+    for s in args["slices"]:
+        slice_split = tuple(map(lambda x: int(x) if x else None, s.split(":")))
+        sep = None
+        if len(slice_split) == 1:
+            start, stop = slice_split[0], slice_split[0]
+        elif len(slice_split) == 2:
+            start, stop = slice_split
+        else:
+            start, stop, sep = slice_split
+
+        # Python slice indices are 0-based, but we want to be 1-based
+        if start is None or start == 0:
+            start = 1
+
+        if stop:
+            stop = stop + 1 if stop != -1 else stop
+
+        slices.append(slice(start, stop, sep))
+
+    pre_checks()

-con = zmq.Context()
-socket = con.socket(zmq.REQ)
-con = socket.connect("tcp://max-exfl017:5555")
-
-action = 'dark_request'
-dark_run_id = '258'
-sase = 'sase1'
-instrument = 'CALLAB'
-cycle = '202031'
-proposal = '900113'
-detector_id = 'SPB_DET_AGIPD1M-1'
-pdu_physical_names = '["AGIPD00 (Q1M1)"', '"AGIPD01 (Q1M2)"', '"AGIPD02 (Q1M3)"', '"AGIPD03 (Q1M4)"', '"AGIPD04 (Q2M1)"', '"AGIPD05 (Q2M2)"', '"AGIPD06 (Q2M3)"', '"AGIPD07 (Q2M4)"', '"AGIPD08 (Q3M1)"', '"AGIPD09 (Q3M2)"', '"AGIPD10 (Q3M3)"', '"AGIPD11 (Q3M4)"', '"AGIPD12 (Q4M1)"', '"AGIPD13 (Q4M2)"', '"AGIPD14 (Q4M3)"', '"AGIPD15 (Q4M4)"]'  # noqa
-pdu_karabo_das = '["AGIPD00"', ' "AGIPD01"', ' "AGIPD02"', ' "AGIPD03"', ' "AGIPD04"', ' "AGIPD05"', ' "AGIPD06"', ' "AGIPD07"', ' "AGIPD08"', ' "AGIPD09"', ' "AGIPD10"', ' "AGIPD11"', ' "AGIPD12"', ' "AGIPD13"', ' "AGIPD14"', ' "AGIPD15"]'  # noqa
-operation_mode = 'FIXED_GAIN'
-run_numbers = '[9985,]'
-
-
-data = [action, dark_run_id, sase, instrument, cycle, proposal, detector_id,
-        operation_mode, *pdu_physical_names, *pdu_karabo_das, run_numbers]
-stuff = [action, dark_run_id, sase, instrument, cycle, proposal, 'SPB_DET_AGIPD1M-1', 'ADAPTIVE_GAIN', '["AGIPD00 (Q1M1)"', '"AGIPD01 (Q1M2)"', '"AGIPD02 (Q1M3)"', '"AGIPD03 (Q1M4)"', '"AGIPD04 (Q2M1)"', '"AGIPD05 (Q2M2)"', '"AGIPD06 (Q2M3)"', '"AGIPD07 (Q2M4)"', '"AGIPD08 (Q3M1)"', '"AGIPD09 (Q3M2)"', '"AGIPD10 (Q3M3)"', '"AGIPD11 (Q3M4)"', '"AGIPD12 (Q4M1)"', '"AGIPD13 (Q4M2)"', '"AGIPD14 (Q4M3)"', '"AGIPD15 (Q4M4)"]', '["AGIPD00"', ' "AGIPD01"', ' "AGIPD02"', ' "AGIPD03"', ' "AGIPD04"', ' "AGIPD05"', ' "AGIPD06"', ' "AGIPD07"', ' "AGIPD08"', ' "AGIPD09"', ' "AGIPD10"', ' "AGIPD11"', ' "AGIPD12"', ' "AGIPD13"', ' "AGIPD14"', ' "AGIPD15"]', '[9992', ' 9991', ' 9990]']
-
-socket.send(str(stuff).encode())
-resp = socket.recv_multipart()[0]
-print(resp.decode())
+    main(
+        args["proposal"],
+        slices,
+        args["delay"],
+        args["noconfirm"],
+        args["really"],
+    )
No results found