Compare revisions

Karim Ahmed · Karim Ahmed · Karim Ahmed · Karim Ahmed · Karim Ahmed · Karim Ahmed
--- a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
+++ b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
 %% Cell type:markdown id: tags:

 # AGIPD Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202131/p900230/raw" # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/remove/agipd_resolve_conf"  # the folder to output to, required
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 train_ids = [-1] # train IDs to correct, set to -1 for all, range allowed
 run = 275 # runs to process, required

 karabo_id = "SPB_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_template = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 index_source_template = 'INDEX/{}/DET/{}:xtdf/'  # path in the HDF5 file to images
 ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information
 karabo_id_control = "SPB_IRU_AGIPD1M1" # karabo-id for control device

 slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milliseconds
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants

 use_ppu_device = ''  # Device ID for a pulse picker device to only process picked trains, empty string to disable
 ppu_train_offset = 0  # When using the pulse picker, offset between the PPU's sequence start and actually picked train

 mem_cells = 0  # Number of memory cells used, set to 0 to automatically infer
 bias_voltage = 0  # bias voltage, set to 0 to use stored value in slow data.
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)
 photon_energy = 9.2 # photon energy in keV
 overwrite = True # set to True if existing data should be overwritten
 max_pulses = [0, 352, 1] # range list [st, end, step] of memory cell indices to be processed within a train. 3 allowed maximum list input elements.
 mem_cells_db = 0 # set to a value different than 0 to use this value for DB queries
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction parameters
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
 cm_dark_fraction = 0.66 # threshold for fraction of  empty pixels to consider module enough dark to perform CM correction
 cm_dark_range = [-50.,30] # range for signal value ADU for pixel to be consider as a dark pixel
 cm_n_itr = 4 # number of iterations for common mode correction
 hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
 mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
 noisy_adc_threshold = 0.25 # threshold to mask complete adc
 ff_gain = 7.2 # conversion gain for absolute FlatField constants, while applying xray_gain

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 zero_nans = False # set NaN values in corrected data to 0
 zero_orange = False # set to 0 very negative and very large values in corrected data
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
 corr_asic_diag = False # if set, diagonal drop offs on ASICs are corrected
 force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold
 force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold
 mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
 common_mode = False # Common mode correction
 melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
 mask_zero_std = False # Mask pixels with zero standard deviation across train
 low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain

 use_litframe_device = '' # Device ID for a lit frame finder device to only process illuminated frames, empty string to disable
 energy_threshold = -1000 # The low limit for the energy (uJ) exposed by frames subject to processing. If -1000, selection by pulse energy is disabled

 # Plotting parameters
 skip_plots = False # exit after writing corrected files and metadata
 cell_id_preview = 1 # cell Id used for preview in single-shot plots

 # Paralellization parameters
 chunk_size = 1000  # Size of chunk for image-wise correction
 n_cores_correct = 16 # Number of chunks to be processed in parallel
 n_cores_files = 4 # Number of files to be processed in parallel
 sequences_per_node = 2 # number of sequence files per cluster node if run as SLURM job, set to 0 to not run SLURM parallel
 max_nodes = 8 # Maximum number of SLURM jobs to split correction work into


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import os
 import math
 import multiprocessing
 import re
 import traceback
 import warnings
 from datetime import timedelta
 from pathlib import Path
 from time import perf_counter

 import tabulate
 from dateutil import parser
 from IPython.display import Latex, Markdown, display

 warnings.filterwarnings('ignore')
 import matplotlib
 import matplotlib.pyplot as plt
 import yaml
 from extra_data import H5File, RunDirectory, stack_detector_data, by_id
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from matplotlib import cm as colormap
 from matplotlib.colors import LogNorm

 matplotlib.use("agg")
 %matplotlib inline
 import numpy as np
 import seaborn as sns

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")

 import cal_tools
 import seaborn as sns
 from cal_tools import agipdalgs as calgs
 from cal_tools.agipdlib import (
    AgipdCorrections,
    AgipdCtrl,
    CellRange,
    LitFrameSelection,
 )
 from cal_tools.ana_tools import get_range
 from cal_tools.enums import AgipdGainMode, BadPixels
 from cal_tools.step_timing import StepTimer

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_folder = in_folder / f'r{run:04d}'
 ```

 %% Cell type:markdown id: tags:

 ## Evaluated parameters ##

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the hierarchy and dependability for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["zero_nans"] = zero_nans
    corr_bools["zero_orange"] = zero_orange
    corr_bools["mask_noisy_adc"] = mask_noisy_adc
    corr_bools["force_hg_if_below"] = force_hg_if_below
    corr_bools["force_mg_if_below"] = force_mg_if_below
    corr_bools["common_mode"] = common_mode
    corr_bools["melt_snow"] = melt_snow
    corr_bools["mask_zero_std"] = mask_zero_std
    corr_bools["low_medium_gap"] = low_medium_gap

 # Many corrections don't apply to fixed gain mode; will explicitly disable later if detected
 disable_for_fixed_gain = [
    "adjust_mg_baseline",
    "blc_set_min",
    "force_hg_if_below",
    "force_mg_if_below",
    "low_medium_gap",
    "melt_snow",
    "rel_gain"
 ]
 ```

 %% Cell type:code id: tags:

 ``` python
 if sequences == [-1]:
    sequences = None

 dc = RunDirectory(run_folder)

 ctrl_src = ctrl_source_template.format(karabo_id_control)
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 index_src = index_source_template.format(karabo_id, receiver_template)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create output folder
 out_folder.mkdir(parents=True, exist_ok=True)

 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 # Evaluate requested modules
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 print("Process modules:", ', '.join(cal_tools.tools.module_index_to_qm(x) for x in modules))
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_ppu_device:
    # Obtain trains to process if using a pulse picker device.

    # Will throw an uncaught exception if the device is wrong.
    seq_start = dc[use_ppu_device, 'trainTrigger.sequenceStart.value'].ndarray()

    # The trains picked are the unique values of trainTrigger.sequenceStart
    # minus the first (previous trigger before this run).
    train_ids = np.unique(seq_start)[1:] + ppu_train_offset

    print(f'PPU device {use_ppu_device} triggered for {len(train_ids)} train(s)')

 elif train_ids != [-1]:
    # Specific trains passed by parameter, convert to ndarray.
    train_ids = np.array(train_ids)

    print(f'Processing up to {len(train_ids)} manually selected train(s)')
 else:
    # Process all trains.
    train_ids = None

    print(f'Processing all valid trains')
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mapped_files, _, total_sequences, _, _ =  cal_tools.tools.map_modules_from_folder(
    str(in_folder), run, path_template, karabo_da, sequences
 )
 file_list = []

 # ToDo: Split table over pages
 print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
 table = []
 ti = 0
 for k, files in mapped_files.items():
    i = 0
    for f in list(files.queue):
        file_list.append(f)
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["#", "module", "# module", "file"])))
 file_list = sorted(file_list, key=lambda name: name[-10:])
 ```

 %% Cell type:code id: tags:

 ``` python
 first_mod_channel = sorted(modules)[0]

 instrument_src_mod = [
    s for s in list(dc.all_sources) if f"{first_mod_channel}CH" in s][0]
 mod_channel = int(re.findall(rf".*{first_mod_channel}CH([0-9]+):.*", instrument_src_mod)[0])

 agipd_cond = AgipdCtrl(
    run_dc=dc,
    image_src=instrument_src_mod,
    ctrl_src=ctrl_src,
    raise_error=False,  # to be able to process very old data without gain_setting value
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate creation time
 creation_time = None
 if use_dir_creation_date:
    creation_time = cal_tools.tools.get_dir_creation_date(str(in_folder), run)
    offset = parser.parse(creation_date_offset)
    delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
    creation_time += delta

 if acq_rate == 0.:
    acq_rate = agipd_cond.get_acq_rate()
 if mem_cells == 0.:
    mem_cells = agipd_cond.get_num_cells()
 # TODO: look for alternative for passing creation_time
 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == 0.:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()
 if gain_mode == -1:
    gain_mode = agipd_cond.get_gain_mode()
 else:
    gain_mode = AgipdGainMode(gain_mode)
 ```

 %% Cell type:code id: tags:

 ``` python
 if mem_cells is None:
    raise ValueError(f"No raw images found in {filename}")

 mem_cells_db = mem_cells if mem_cells_db == 0 else mem_cells_db

 print(f"Maximum memory cells to calibrate: {mem_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Using {creation_time} as creation time")
 print("Operating conditions are:")
 print(f"• Bias voltage: {bias_voltage}")
 print(f"• Memory cells: {mem_cells_db}")
 print(f"• Acquisition rate: {acq_rate}")
 print(f"• Gain setting: {gain_setting}")
 print(f"• Gain mode: {gain_mode.name}")
 print(f"• Integration time: {integration_time}")
 print(f"• Photon Energy: {photon_energy}")
 ```

 %% Cell type:code id: tags:

 ``` python
 if gain_mode:
    for to_disable in disable_for_fixed_gain:
        if corr_bools.get(to_disable, False):
            print(f"Warning: {to_disable} correction was requested, but does not apply to fixed gain mode")
            corr_bools[to_disable] = False
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_litframe_device:
    # check run for the AgipdLitFrameFinder device

    if use_litframe_device + ':output' in dc.instrument_sources:
        # Use selection provided by the AgipdLitFrameFinder (if the device is recorded)
        cell_sel = LitFrameSelection(use_litframe_device, dc, train_ids, max_pulses, energy_threshold)
        train_ids = cell_sel.train_ids
    else:
        # Use range selection (if the device is not recorded)
        print(f"WARNING: LitFrameFinder {use_litframe_device} device is not found.")
        cell_sel = CellRange(max_pulses, max_cells=mem_cells)
 else:
    # Use range selection
    cell_sel = CellRange(max_pulses, max_cells=mem_cells)

 print(cell_sel.msg())
 ```

 %% Cell type:markdown id: tags:

 ## Data processing ##

 %% Cell type:code id: tags:

 ``` python
 agipd_corr = AgipdCorrections(
    mem_cells,
    cell_sel,
    h5_data_path=instrument_src,
    h5_index_path=index_src,
    corr_bools=corr_bools,
    gain_mode=gain_mode,
    comp_threads=os.cpu_count() // n_cores_files,
    train_ids=train_ids
 )

 agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
 agipd_corr.hg_hard_threshold = hg_hard_threshold
 agipd_corr.mg_hard_threshold = mg_hard_threshold

 agipd_corr.cm_dark_min = cm_dark_range[0]
 agipd_corr.cm_dark_max = cm_dark_range[1]
 agipd_corr.cm_dark_fraction = cm_dark_fraction
 agipd_corr.cm_n_itr = cm_n_itr
 agipd_corr.noisy_adc_threshold = noisy_adc_threshold
 agipd_corr.ff_gain = ff_gain
 ```

 %% Cell type:code id: tags:

 ``` python
 module_index_to_karabo_da = {mod: da for (mod, da) in zip(modules, karabo_da)}
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve calibration constants to RAM
 agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))

 metadata = cal_tools.tools.CalibrationMetadata(out_folder)
 # NOTE: this notebook will not overwrite calibration metadata file
 const_yaml = metadata.get("retrieved-constants", {})

 def retrieve_constants(mod):
    """
    Retrieve calibration constants and load them to shared memory

    Metadata for constants is taken from yml file or retrieved from the DB
    """
    err = ""
    k_da = module_index_to_karabo_da[mod]
    try:
        # check if there is a yaml file in out_folder that has the device constants.
        if k_da in const_yaml:
            when = agipd_corr.initialize_from_yaml(k_da, const_yaml, mod)
        else:
            # TODO: replace with proper retrieval (as done in pre-correction)
            when = agipd_corr.initialize_from_db(
                karabo_id=karabo_id,
                karabo_da=k_da,
                cal_db_interface=cal_db_interface,
                creation_time=creation_time,
                memory_cells=mem_cells_db,
                bias_voltage=bias_voltage,
                photon_energy=photon_energy,
                gain_setting=gain_setting,
                acquisition_rate=acq_rate,
                integration_time=integration_time,
                module_idx=mod,
                only_dark=False,
            )
    except Exception as e:
        err = f"Error: {e}\nError traceback: {traceback.format_exc()}"
        when = None
    return err, mod, when, k_da


 print(f'Preparing constants (FF: {agipd_corr.corr_bools.get("xray_corr", False)}, PC: {any(agipd_corr.pc_bools)}, '
      f'BLC: {any(agipd_corr.blc_bools)})')
 ts = perf_counter()
 with multiprocessing.Pool(processes=len(modules)) as pool:
    const_out = pool.map(retrieve_constants, modules)
 print(f"Constants were loaded in {perf_counter()-ts:.01f}s")
 ```

 %% Cell type:code id: tags:

 ``` python
 # allocate memory for images and hists
 n_images_max = mem_cells * 256
 data_shape = (n_images_max, 512, 128)
 agipd_corr.allocate_images(data_shape, n_cores_files)
 ```

 %% Cell type:code id: tags:

 ``` python
 def batches(l, batch_size):
    """Group a list into batches of (up to) batch_size elements"""
    start = 0
    while start < len(l):
        yield l[start:start + batch_size]
        start += batch_size
 ```

 %% Cell type:code id: tags:

 ``` python
 def imagewise_chunks(img_counts):
    """Break up the loaded data into chunks of up to chunk_size

    Yields (file data slot, start index, stop index)
    """


    for i_proc, n_img in enumerate(img_counts):
        n_chunks = math.ceil(n_img / chunk_size)
        for i in range(n_chunks):
            yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 with multiprocessing.Pool() as pool:
    step_timer.done_step('Started pool')

    for file_batch in batches(file_list, n_cores_files):
        # TODO: Move some printed output to logging or similar
        print(f"Processing next {len(file_batch)} files")
        step_timer.start()
        img_counts = pool.starmap(
            agipd_corr.read_file,
            zip(range(len(file_batch)), file_batch, [not common_mode]*len(file_batch))
        )
        step_timer.done_step(f'Loading data from files')

        if img_counts == 0:
            # Skip any further processing and output if there are no images to
            # correct in this file.
            continue

        if mask_zero_std:
            # Evaluate zero-data-std mask
            pool.starmap(
                agipd_corr.mask_zero_std, itertools.product(
                    range(len(file_batch)),
                    np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
                )
            )
            step_timer.done_step('Mask 0 std')

        # Perform offset image-wise correction
        pool.starmap(agipd_corr.offset_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Offset correction")

        if blc_noise or blc_stripes or blc_hmatch:
            # Perform image-wise correction
            pool.starmap(agipd_corr.baseline_correction, imagewise_chunks(img_counts))
            step_timer.done_step("Base-line shift correction")

        if common_mode:
            # In common mode corrected is enabled.
            # Cell selection is only activated after common mode correction.
            # Perform cross-file correction parallel over asics
            pool.starmap(agipd_corr.cm_correction, itertools.product(
                range(len(file_batch)), range(16)  # 16 ASICs per module
            ))
            step_timer.done_step("Common-mode correction")

            img_counts = pool.map(agipd_corr.apply_selected_pulses, range(len(file_batch)))
            step_timer.done_step("Applying selected cells after common mode correction")
        # Perform image-wise correction"
        pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Gain corrections")

        # Save corrected data
        pool.starmap(agipd_corr.write_file, [
            (i_proc, file_name, str(out_folder / Path(file_name).name.replace("RAW", "CORR")))
            for i_proc, file_name in enumerate(file_batch)
        ])
        step_timer.done_step("Save")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Correction of {len(file_list)} files is finished")
 print(f"Total processing time {step_timer.timespan():.01f} s")
 print(f"Timing summary per batch of {n_cores_files} files:")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 # if the yml file contains "retrieved-constants", that means a leading
 # notebook got processed and the reporting would be generated from it.
 fst_print = True
 timestamps = {}

 for i, (error, modno, when, k_da) in enumerate(const_out):
    qm = cal_tools.tools.module_index_to_qm(modno)
    # expose errors while applying correction
    if error:
        print("Error: {}".format(error) )

    if k_da not in const_yaml:
        if fst_print:
            print("Constants are retrieved with creation time: ")
            fst_print = False

        module_timestamps = {}

        # If correction is crashed
        if not error:
            print(f"{qm}:")
            for key, item in when.items():
                if hasattr(item, 'strftime'):
                    item = item.strftime('%y-%m-%d %H:%M')
                when[key] = item
                print('{:.<12s}'.format(key), item)

        # Store few time stamps if exists
        # Add NA to keep array structure
        for key in ['Offset', 'SlopesPC', 'SlopesFF']:
            if when and key in when and when[key]:
                module_timestamps[key] = when[key]
            else:
                if error is not None:
                    module_timestamps[key] = "Err"
                else:
                    module_timestamps[key] = "NA"
        timestamps[qm] = module_timestamps

 seq = sequences[0] if sequences else 0

 if timestamps:
    with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fd:
        yaml.safe_dump({"time-summary": {f"S{seq}": timestamps}}, fd)
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print('Skipping plots')
    import sys
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.gca(projection='3d')

    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)
    Z = data.T

    # Plot the surface.
    ax.plot_surface(X, Y, Z, cmap=colormap.coolwarm, linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")


 def do_2d_plot(data, edges, y_axis, x_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),
              np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
                   norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 def get_trains_data(data_folder, source, include, detector_id, tid=None, modules=16, fillvalue=None):
    """Load single train for all module

    :param data_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param detector_id: The karabo id of the detector to get data for
    :param tid: Train Id to be loaded. First train is considered if None is given
    :param path: Path to find image data inside h5 file
    """
    run_data = RunDirectory(data_folder, include)
    if tid is not None:
        tid, data = run_data.select(f'{detector_id}/DET/*', source).train_from_id(tid)
    else:
        tid, data = next(iter(run_data.select(f'{detector_id}/DET/*', source).trains(require_all=True)))

-    return tid, stack_detector_data(train=data, data=source, fillvalue=fillvalue, modules=modules)
+    # TODO: remove and use the keep_dims version after updating Extra-data.
+    # Avoid using default axis with sources of an expected scalar value per train.
+    if len(range(*cell_sel.crange)) == 1 and source in ['image.blShift', 'image.cellId', 'image.pulseId']:
+        axis = 0
+    else:
+        axis = -3
+
+    stacked_data = stack_detector_data(
+        train=data, data=source, fillvalue=fillvalue, modules=modules, axis=axis)
+    # Add cellId dimension when correcting one cellId only.
+    if (
+        len(range(*cell_sel.crange)) == 1 and
+        data_folder != run_folder  # avoid adding pulse dims for raw data.
+    ):
+        stacked_data = stacked_data[np.newaxis, ...]
+
+    return tid, stacked_data
 ```

 %% Cell type:code id: tags:

 ``` python
 if dinstance == "AGIPD500K":
    geom = AGIPD_500K2GGeometry.from_origin()
 else:
    geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
        (-525, 625),
        (-550, -10),
        (520, -160),
        (542.5, 475),
    ])
 ```

 %% Cell type:code id: tags:

 ``` python
 include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
 tid, corrected = get_trains_data(out_folder, 'image.data', include, karabo_id, modules=nmods)

 _, gains = get_trains_data(out_folder, 'image.gain', include, karabo_id, tid, modules=nmods)
 _, mask = get_trains_data(out_folder, 'image.mask', include, karabo_id, tid, modules=nmods)
 _, blshift = get_trains_data(out_folder, 'image.blShift', include, karabo_id, tid, modules=nmods)
 _, cellId = get_trains_data(out_folder, 'image.cellId', include, karabo_id, tid, modules=nmods)
 _, pulseId = get_trains_data(out_folder, 'image.pulseId', include, karabo_id, tid, modules=nmods, fillvalue=0)
 _, raw = get_trains_data(run_folder, 'image.data', include, karabo_id, tid, modules=nmods)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Analogue Gain ###

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(raw[:,0,...].flatten().astype(np.float32),
                                         raw[:,1,...].flatten().astype(np.float32),
                                         bins=(100, 100),
                                         range=[[4000, 8192], [4000, 8192]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Digitized Gain ###

 The following plot shows plots signal vs. digitized gain

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(corrected.flatten().astype(np.float32),
                                         gains.flatten().astype(np.float32), bins=(100, 3),
                                         range=[[-50, 8192], [0, 3]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Gain statistics in %")
 table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
          f'{gains[gains==1].size/gains.size*100:.03f}',
          f'{gains[gains==2].size/gains.size*100:.03f}']]
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["High", "Medium", "Low"])))
 ```

 %% Cell type:markdown id: tags:

 ### Intensity per Pulse ###

 %% Cell type:code id: tags:

 ``` python
 pulse_range = [np.min(pulseId[pulseId>=0]), np.max(pulseId[pulseId>=0])]

+# Modify pulse_range, if only one pulse is selected.
+if pulse_range[0] == pulse_range[1]:
+    pulse_range = [0, pulse_range[1]+int(acq_rate)]
+
 mean_data = np.nanmean(corrected, axis=(2, 3))
 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100, int(pulse_range[1])),
                                      range=[[-50, 1000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")

 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100,  int(pulse_range[1])),
                                      range=[[-50, 200000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 ```

 %% Cell type:markdown id: tags:

 ### Baseline shift ###

 Estimated base-line shift with respect to the total ADU counts of corrected image.

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(blshift.flatten(), bins=100, log=True)
 _ = plt.xlabel('Baseline shift [ADU]')
 _ = plt.ylabel('Counts')
 _ = ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 10))
 corrected_ave = np.nansum(corrected, axis=(2, 3))
 plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)
 plt.xlim(-1, 1000)
 plt.grid()
 plt.xlabel('Illuminated corrected [MADU] ')
 _ = plt.ylabel('Estimated baseline shift [ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
-display(Markdown('### Raw preview ###\n'))
-display(Markdown(f'Mean over images of the RAW data\n'))
+if cell_id_preview not in cellId[:, 0]:
+    print(f"WARNING: The selected cell_id_preview value {cell_id_preview} is not available in the corrected data.")
+    cell_id_preview = cellId[:, 0][0]
+    cell_idx_preview = 0
+    print(f"Previewing the first available cellId: {cell_id_preview}.")
+else:
+    cell_idx_preview = np.where(cellId[:, 0] == cell_id_preview)[0][0]
 ```

 %% Cell type:code id: tags:

 ``` python
-fig = plt.figure(figsize=(20, 10))
-ax = fig.add_subplot(111)
-data = np.mean(raw[:, 0, ...], axis=0)
-vmin, vmax = get_range(data, 5)
-ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
+display(Markdown('### Raw preview ###\n'))
+if cellId.shape[0] != 1:
+    display(Markdown(f'Mean over images of the RAW data\n'))
+    fig = plt.figure(figsize=(20, 10))
+    ax = fig.add_subplot(111)
+    data = np.mean(raw[slice(*cell_sel.crange), 0, ...], axis=0)
+    vmin, vmax = get_range(data, 5)
+    ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
+else:
+    print("Skipping mean RAW preview for single memory cell, "
+          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
-display(Markdown(f'Single shot of the RAW data from cell {np.max(cellId[cell_id_preview])} \n'))
+display(Markdown(f'Single shot of the RAW data from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
-vmin, vmax = get_range(raw[cell_id_preview, 0, ...], 5)
-ax = geom.plot_data_fast(raw[cell_id_preview, 0, ...], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
+vmin, vmax = get_range(raw[cell_idx_preview, 0, ...], 5)
+ax = geom.plot_data_fast(raw[cell_idx_preview, 0, ...], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Corrected preview ###\n'))
-display(Markdown(f'A single shot image from cell {np.max(cellId[cell_id_preview])} \n'))
+if cellId.shape[0] != 1:
+    display(Markdown('### Mean CORRECTED Preview ###\n'))
+    display(Markdown(f'A mean across train: {tid}\n'))
+    fig = plt.figure(figsize=(20, 10))
+    ax = fig.add_subplot(111)
+    data = np.mean(corrected, axis=0)
+    vmin, vmax = get_range(data, 7)
+    ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=-50, vmax=vmax)
+else:
+    print("Skipping mean CORRECTED preview for single memory cell, "
+          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
+display(Markdown(f'A single shot of the CORRECTED image from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
-vmin, vmax = get_range(corrected[cell_id_preview], 7, -50)
+vmin, vmax = get_range(corrected[cell_idx_preview], 7, -50)
 vmin = - 50
-ax = geom.plot_data_fast(corrected[cell_id_preview], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
+ax = geom.plot_data_fast(corrected[cell_idx_preview], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
-vmin, vmax = get_range(corrected[cell_id_preview], 5, -50)
+vmin, vmax = get_range(corrected[cell_idx_preview], 5, -50)
 nbins = np.int((vmax + 50) / 2)
-h = ax.hist(corrected[cell_id_preview].flatten(),
+h = ax.hist(corrected[cell_idx_preview].flatten(),
            bins=nbins, range=(-50, vmax),
            histtype='stepfilled', log=True)
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
-display(Markdown('### Mean CORRECTED Preview ###\n'))
-display(Markdown(f'A mean across one train\n'))
-```
-
-%% Cell type:code id: tags:
-
-``` python
-fig = plt.figure(figsize=(20, 10))
-ax = fig.add_subplot(111)
-data = np.mean(corrected, axis=0)
-vmin, vmax = get_range(data, 7)
-ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=-50, vmax=vmax)
-```
-
-%% Cell type:code id: tags:
-
-``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected, 10, -100)
 vmax = np.nanmax(corrected)
 if vmax > 50000:
    vmax=50000
 nbins = np.int((vmax + 100) / 5)
 h = ax.hist(corrected.flatten(), bins=nbins,
            range=(-100, vmax), histtype='step', log=True, label = 'All')
 ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='High gain', color='green')
 ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Medium gain', color='red')
 ax.hist(corrected[gains == 2].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Low gain', color='yellow')
 ax.legend()
 ax.grid()
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Maximum GAIN Preview ###\n'))
 display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.max(gains, axis=0), ax=ax,
                         cmap="jet", vmin=-1, vmax=3)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'### Single Shot Bad Pixels ### \n'))
-display(Markdown(f'A single shot bad pixel map from cell {np.max(cellId[cell_id_preview])} \n'))
+display(Markdown(f'A single shot bad pixel map from cell {cell_id_preview} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
-geom.plot_data_fast(np.log2(mask[cell_id_preview]), ax=ax, vmin=0, vmax=32, cmap="jet")
+geom.plot_data_fast(np.log2(mask[cell_idx_preview]), ax=ax, vmin=0, vmax=32, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train  ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(np.mean(mask>0, axis=0), vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train. Only Dark Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 cm = np.copy(mask)
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 ax = geom.plot_data_fast(np.mean(cm>0, axis=0),
                         vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 # AGIPD Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202131/p900230/raw" # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/remove/agipd_resolve_conf"  # the folder to output to, required
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 train_ids = [-1] # train IDs to correct, set to -1 for all, range allowed
 run = 275 # runs to process, required

 karabo_id = "SPB_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_template = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 index_source_template = 'INDEX/{}/DET/{}:xtdf/'  # path in the HDF5 file to images
 ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information
 karabo_id_control = "SPB_IRU_AGIPD1M1" # karabo-id for control device

 slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milliseconds
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants

 use_ppu_device = ''  # Device ID for a pulse picker device to only process picked trains, empty string to disable
 ppu_train_offset = 0  # When using the pulse picker, offset between the PPU's sequence start and actually picked train

 mem_cells = 0  # Number of memory cells used, set to 0 to automatically infer
 bias_voltage = 0  # bias voltage, set to 0 to use stored value in slow data.
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)
 photon_energy = 9.2 # photon energy in keV
 overwrite = True # set to True if existing data should be overwritten
 max_pulses = [0, 352, 1] # range list [st, end, step] of memory cell indices to be processed within a train. 3 allowed maximum list input elements.
 mem_cells_db = 0 # set to a value different than 0 to use this value for DB queries
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction parameters
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
 cm_dark_fraction = 0.66 # threshold for fraction of  empty pixels to consider module enough dark to perform CM correction
 cm_dark_range = [-50.,30] # range for signal value ADU for pixel to be consider as a dark pixel
 cm_n_itr = 4 # number of iterations for common mode correction
 hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
 mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
 noisy_adc_threshold = 0.25 # threshold to mask complete adc
 ff_gain = 7.2 # conversion gain for absolute FlatField constants, while applying xray_gain

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 zero_nans = False # set NaN values in corrected data to 0
 zero_orange = False # set to 0 very negative and very large values in corrected data
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
 corr_asic_diag = False # if set, diagonal drop offs on ASICs are corrected
 force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold
 force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold
 mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
 common_mode = False # Common mode correction
 melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
 mask_zero_std = False # Mask pixels with zero standard deviation across train
 low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain

 use_litframe_device = '' # Device ID for a lit frame finder device to only process illuminated frames, empty string to disable
 energy_threshold = -1000 # The low limit for the energy (uJ) exposed by frames subject to processing. If -1000, selection by pulse energy is disabled

 # Plotting parameters
 skip_plots = False # exit after writing corrected files and metadata
 cell_id_preview = 1 # cell Id used for preview in single-shot plots

 # Paralellization parameters
 chunk_size = 1000  # Size of chunk for image-wise correction
 n_cores_correct = 16 # Number of chunks to be processed in parallel
 n_cores_files = 4 # Number of files to be processed in parallel
 sequences_per_node = 2 # number of sequence files per cluster node if run as SLURM job, set to 0 to not run SLURM parallel
 max_nodes = 8 # Maximum number of SLURM jobs to split correction work into


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import os
 import math
 import multiprocessing
 import re
 import traceback
 import warnings
 from datetime import timedelta
 from pathlib import Path
 from time import perf_counter

 import tabulate
 from dateutil import parser
 from IPython.display import Latex, Markdown, display

 warnings.filterwarnings('ignore')
 import matplotlib
 import matplotlib.pyplot as plt
 import yaml
 from extra_data import H5File, RunDirectory, stack_detector_data, by_id
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from matplotlib import cm as colormap
 from matplotlib.colors import LogNorm

 matplotlib.use("agg")
 %matplotlib inline
 import numpy as np
 import seaborn as sns

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")

 import cal_tools
 import seaborn as sns
 from cal_tools import agipdalgs as calgs
 from cal_tools.agipdlib import (
    AgipdCorrections,
    AgipdCtrl,
    CellRange,
    LitFrameSelection,
 )
 from cal_tools.ana_tools import get_range
 from cal_tools.enums import AgipdGainMode, BadPixels
 from cal_tools.step_timing import StepTimer

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_folder = in_folder / f'r{run:04d}'
 ```

 %% Cell type:markdown id: tags:

 ## Evaluated parameters ##

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the hierarchy and dependability for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["zero_nans"] = zero_nans
    corr_bools["zero_orange"] = zero_orange
    corr_bools["mask_noisy_adc"] = mask_noisy_adc
    corr_bools["force_hg_if_below"] = force_hg_if_below
    corr_bools["force_mg_if_below"] = force_mg_if_below
    corr_bools["common_mode"] = common_mode
    corr_bools["melt_snow"] = melt_snow
    corr_bools["mask_zero_std"] = mask_zero_std
    corr_bools["low_medium_gap"] = low_medium_gap

 # Many corrections don't apply to fixed gain mode; will explicitly disable later if detected
 disable_for_fixed_gain = [
    "adjust_mg_baseline",
    "blc_set_min",
    "force_hg_if_below",
    "force_mg_if_below",
    "low_medium_gap",
    "melt_snow",
    "rel_gain"
 ]
 ```

 %% Cell type:code id: tags:

 ``` python
 if sequences == [-1]:
    sequences = None

 dc = RunDirectory(run_folder)

 ctrl_src = ctrl_source_template.format(karabo_id_control)
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 index_src = index_source_template.format(karabo_id, receiver_template)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create output folder
 out_folder.mkdir(parents=True, exist_ok=True)

 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 # Evaluate requested modules
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 print("Process modules:", ', '.join(cal_tools.tools.module_index_to_qm(x) for x in modules))
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_ppu_device:
    # Obtain trains to process if using a pulse picker device.

    # Will throw an uncaught exception if the device is wrong.
    seq_start = dc[use_ppu_device, 'trainTrigger.sequenceStart.value'].ndarray()

    # The trains picked are the unique values of trainTrigger.sequenceStart
    # minus the first (previous trigger before this run).
    train_ids = np.unique(seq_start)[1:] + ppu_train_offset

    print(f'PPU device {use_ppu_device} triggered for {len(train_ids)} train(s)')

 elif train_ids != [-1]:
    # Specific trains passed by parameter, convert to ndarray.
    train_ids = np.array(train_ids)

    print(f'Processing up to {len(train_ids)} manually selected train(s)')
 else:
    # Process all trains.
    train_ids = None

    print(f'Processing all valid trains')
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mapped_files, _, total_sequences, _, _ =  cal_tools.tools.map_modules_from_folder(
    str(in_folder), run, path_template, karabo_da, sequences
 )
 file_list = []

 # ToDo: Split table over pages
 print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
 table = []
 ti = 0
 for k, files in mapped_files.items():
    i = 0
    for f in list(files.queue):
        file_list.append(f)
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["#", "module", "# module", "file"])))
 file_list = sorted(file_list, key=lambda name: name[-10:])
 ```

 %% Cell type:code id: tags:

 ``` python
 first_mod_channel = sorted(modules)[0]

 instrument_src_mod = [
    s for s in list(dc.all_sources) if f"{first_mod_channel}CH" in s][0]
 mod_channel = int(re.findall(rf".*{first_mod_channel}CH([0-9]+):.*", instrument_src_mod)[0])

 agipd_cond = AgipdCtrl(
    run_dc=dc,
    image_src=instrument_src_mod,
    ctrl_src=ctrl_src,
    raise_error=False,  # to be able to process very old data without gain_setting value
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate creation time
 creation_time = None
 if use_dir_creation_date:
    creation_time = cal_tools.tools.get_dir_creation_date(str(in_folder), run)
    offset = parser.parse(creation_date_offset)
    delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
    creation_time += delta

 if acq_rate == 0.:
    acq_rate = agipd_cond.get_acq_rate()
 if mem_cells == 0.:
    mem_cells = agipd_cond.get_num_cells()
 # TODO: look for alternative for passing creation_time
 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == 0.:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()
 if gain_mode == -1:
    gain_mode = agipd_cond.get_gain_mode()
 else:
    gain_mode = AgipdGainMode(gain_mode)
 ```

 %% Cell type:code id: tags:

 ``` python
 if mem_cells is None:
    raise ValueError(f"No raw images found in {filename}")

 mem_cells_db = mem_cells if mem_cells_db == 0 else mem_cells_db

 print(f"Maximum memory cells to calibrate: {mem_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Using {creation_time} as creation time")
 print("Operating conditions are:")
 print(f"• Bias voltage: {bias_voltage}")
 print(f"• Memory cells: {mem_cells_db}")
 print(f"• Acquisition rate: {acq_rate}")
 print(f"• Gain setting: {gain_setting}")
 print(f"• Gain mode: {gain_mode.name}")
 print(f"• Integration time: {integration_time}")
 print(f"• Photon Energy: {photon_energy}")
 ```

 %% Cell type:code id: tags:

 ``` python
 if gain_mode:
    for to_disable in disable_for_fixed_gain:
        if corr_bools.get(to_disable, False):
            print(f"Warning: {to_disable} correction was requested, but does not apply to fixed gain mode")
            corr_bools[to_disable] = False
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_litframe_device:
    # check run for the AgipdLitFrameFinder device

    if use_litframe_device + ':output' in dc.instrument_sources:
        # Use selection provided by the AgipdLitFrameFinder (if the device is recorded)
        cell_sel = LitFrameSelection(use_litframe_device, dc, train_ids, max_pulses, energy_threshold)
        train_ids = cell_sel.train_ids
    else:
        # Use range selection (if the device is not recorded)
        print(f"WARNING: LitFrameFinder {use_litframe_device} device is not found.")
        cell_sel = CellRange(max_pulses, max_cells=mem_cells)
 else:
    # Use range selection
    cell_sel = CellRange(max_pulses, max_cells=mem_cells)

 print(cell_sel.msg())
 ```

 %% Cell type:markdown id: tags:

 ## Data processing ##

 %% Cell type:code id: tags:

 ``` python
 agipd_corr = AgipdCorrections(
    mem_cells,
    cell_sel,
    h5_data_path=instrument_src,
    h5_index_path=index_src,
    corr_bools=corr_bools,
    gain_mode=gain_mode,
    comp_threads=os.cpu_count() // n_cores_files,
    train_ids=train_ids
 )

 agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
 agipd_corr.hg_hard_threshold = hg_hard_threshold
 agipd_corr.mg_hard_threshold = mg_hard_threshold

 agipd_corr.cm_dark_min = cm_dark_range[0]
 agipd_corr.cm_dark_max = cm_dark_range[1]
 agipd_corr.cm_dark_fraction = cm_dark_fraction
 agipd_corr.cm_n_itr = cm_n_itr
 agipd_corr.noisy_adc_threshold = noisy_adc_threshold
 agipd_corr.ff_gain = ff_gain
 ```

 %% Cell type:code id: tags:

 ``` python
 module_index_to_karabo_da = {mod: da for (mod, da) in zip(modules, karabo_da)}
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve calibration constants to RAM
 agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))

 metadata = cal_tools.tools.CalibrationMetadata(out_folder)
 # NOTE: this notebook will not overwrite calibration metadata file
 const_yaml = metadata.get("retrieved-constants", {})

 def retrieve_constants(mod):
    """
    Retrieve calibration constants and load them to shared memory

    Metadata for constants is taken from yml file or retrieved from the DB
    """
    err = ""
    k_da = module_index_to_karabo_da[mod]
    try:
        # check if there is a yaml file in out_folder that has the device constants.
        if k_da in const_yaml:
            when = agipd_corr.initialize_from_yaml(k_da, const_yaml, mod)
        else:
            # TODO: replace with proper retrieval (as done in pre-correction)
            when = agipd_corr.initialize_from_db(
                karabo_id=karabo_id,
                karabo_da=k_da,
                cal_db_interface=cal_db_interface,
                creation_time=creation_time,
                memory_cells=mem_cells_db,
                bias_voltage=bias_voltage,
                photon_energy=photon_energy,
                gain_setting=gain_setting,
                acquisition_rate=acq_rate,
                integration_time=integration_time,
                module_idx=mod,
                only_dark=False,
            )
    except Exception as e:
        err = f"Error: {e}\nError traceback: {traceback.format_exc()}"
        when = None
    return err, mod, when, k_da


 print(f'Preparing constants (FF: {agipd_corr.corr_bools.get("xray_corr", False)}, PC: {any(agipd_corr.pc_bools)}, '
      f'BLC: {any(agipd_corr.blc_bools)})')
 ts = perf_counter()
 with multiprocessing.Pool(processes=len(modules)) as pool:
    const_out = pool.map(retrieve_constants, modules)
 print(f"Constants were loaded in {perf_counter()-ts:.01f}s")
 ```

 %% Cell type:code id: tags:

 ``` python
 # allocate memory for images and hists
 n_images_max = mem_cells * 256
 data_shape = (n_images_max, 512, 128)
 agipd_corr.allocate_images(data_shape, n_cores_files)
 ```

 %% Cell type:code id: tags:

 ``` python
 def batches(l, batch_size):
    """Group a list into batches of (up to) batch_size elements"""
    start = 0
    while start < len(l):
        yield l[start:start + batch_size]
        start += batch_size
 ```

 %% Cell type:code id: tags:

 ``` python
 def imagewise_chunks(img_counts):
    """Break up the loaded data into chunks of up to chunk_size

    Yields (file data slot, start index, stop index)
    """


    for i_proc, n_img in enumerate(img_counts):
        n_chunks = math.ceil(n_img / chunk_size)
        for i in range(n_chunks):
            yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 with multiprocessing.Pool() as pool:
    step_timer.done_step('Started pool')

    for file_batch in batches(file_list, n_cores_files):
        # TODO: Move some printed output to logging or similar
        print(f"Processing next {len(file_batch)} files")
        step_timer.start()
        img_counts = pool.starmap(
            agipd_corr.read_file,
            zip(range(len(file_batch)), file_batch, [not common_mode]*len(file_batch))
        )
        step_timer.done_step(f'Loading data from files')

        if img_counts == 0:
            # Skip any further processing and output if there are no images to
            # correct in this file.
            continue

        if mask_zero_std:
            # Evaluate zero-data-std mask
            pool.starmap(
                agipd_corr.mask_zero_std, itertools.product(
                    range(len(file_batch)),
                    np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
                )
            )
            step_timer.done_step('Mask 0 std')

        # Perform offset image-wise correction
        pool.starmap(agipd_corr.offset_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Offset correction")

        if blc_noise or blc_stripes or blc_hmatch:
            # Perform image-wise correction
            pool.starmap(agipd_corr.baseline_correction, imagewise_chunks(img_counts))
            step_timer.done_step("Base-line shift correction")

        if common_mode:
            # In common mode corrected is enabled.
            # Cell selection is only activated after common mode correction.
            # Perform cross-file correction parallel over asics
            pool.starmap(agipd_corr.cm_correction, itertools.product(
                range(len(file_batch)), range(16)  # 16 ASICs per module
            ))
            step_timer.done_step("Common-mode correction")

            img_counts = pool.map(agipd_corr.apply_selected_pulses, range(len(file_batch)))
            step_timer.done_step("Applying selected cells after common mode correction")
        # Perform image-wise correction"
        pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Gain corrections")

        # Save corrected data
        pool.starmap(agipd_corr.write_file, [
            (i_proc, file_name, str(out_folder / Path(file_name).name.replace("RAW", "CORR")))
            for i_proc, file_name in enumerate(file_batch)
        ])
        step_timer.done_step("Save")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Correction of {len(file_list)} files is finished")
 print(f"Total processing time {step_timer.timespan():.01f} s")
 print(f"Timing summary per batch of {n_cores_files} files:")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 # if the yml file contains "retrieved-constants", that means a leading
 # notebook got processed and the reporting would be generated from it.
 fst_print = True
 timestamps = {}

 for i, (error, modno, when, k_da) in enumerate(const_out):
    qm = cal_tools.tools.module_index_to_qm(modno)
    # expose errors while applying correction
    if error:
        print("Error: {}".format(error) )

    if k_da not in const_yaml:
        if fst_print:
            print("Constants are retrieved with creation time: ")
            fst_print = False

        module_timestamps = {}

        # If correction is crashed
        if not error:
            print(f"{qm}:")
            for key, item in when.items():
                if hasattr(item, 'strftime'):
                    item = item.strftime('%y-%m-%d %H:%M')
                when[key] = item
                print('{:.<12s}'.format(key), item)

        # Store few time stamps if exists
        # Add NA to keep array structure
        for key in ['Offset', 'SlopesPC', 'SlopesFF']:
            if when and key in when and when[key]:
                module_timestamps[key] = when[key]
            else:
                if error is not None:
                    module_timestamps[key] = "Err"
                else:
                    module_timestamps[key] = "NA"
        timestamps[qm] = module_timestamps

 seq = sequences[0] if sequences else 0

 if timestamps:
    with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fd:
        yaml.safe_dump({"time-summary": {f"S{seq}": timestamps}}, fd)
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print('Skipping plots')
    import sys
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.gca(projection='3d')

    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)
    Z = data.T

    # Plot the surface.
    ax.plot_surface(X, Y, Z, cmap=colormap.coolwarm, linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")


 def do_2d_plot(data, edges, y_axis, x_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),
              np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
                   norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 def get_trains_data(data_folder, source, include, detector_id, tid=None, modules=16, fillvalue=None):
    """Load single train for all module

    :param data_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param detector_id: The karabo id of the detector to get data for
    :param tid: Train Id to be loaded. First train is considered if None is given
    :param path: Path to find image data inside h5 file
    """
    run_data = RunDirectory(data_folder, include)
    if tid is not None:
        tid, data = run_data.select(f'{detector_id}/DET/*', source).train_from_id(tid)
    else:
        tid, data = next(iter(run_data.select(f'{detector_id}/DET/*', source).trains(require_all=True)))

-    return tid, stack_detector_data(train=data, data=source, fillvalue=fillvalue, modules=modules)
+    # TODO: remove and use the keep_dims version after updating Extra-data.
+    # Avoid using default axis with sources of an expected scalar value per train.
+    if len(range(*cell_sel.crange)) == 1 and source in ['image.blShift', 'image.cellId', 'image.pulseId']:
+        axis = 0
+    else:
+        axis = -3
+
+    stacked_data = stack_detector_data(
+        train=data, data=source, fillvalue=fillvalue, modules=modules, axis=axis)
+    # Add cellId dimension when correcting one cellId only.
+    if (
+        len(range(*cell_sel.crange)) == 1 and
+        data_folder != run_folder  # avoid adding pulse dims for raw data.
+    ):
+        stacked_data = stacked_data[np.newaxis, ...]
+
+    return tid, stacked_data
 ```

 %% Cell type:code id: tags:

 ``` python
 if dinstance == "AGIPD500K":
    geom = AGIPD_500K2GGeometry.from_origin()
 else:
    geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
        (-525, 625),
        (-550, -10),
        (520, -160),
        (542.5, 475),
    ])
 ```

 %% Cell type:code id: tags:

 ``` python
 include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
 tid, corrected = get_trains_data(out_folder, 'image.data', include, karabo_id, modules=nmods)

 _, gains = get_trains_data(out_folder, 'image.gain', include, karabo_id, tid, modules=nmods)
 _, mask = get_trains_data(out_folder, 'image.mask', include, karabo_id, tid, modules=nmods)
 _, blshift = get_trains_data(out_folder, 'image.blShift', include, karabo_id, tid, modules=nmods)
 _, cellId = get_trains_data(out_folder, 'image.cellId', include, karabo_id, tid, modules=nmods)
 _, pulseId = get_trains_data(out_folder, 'image.pulseId', include, karabo_id, tid, modules=nmods, fillvalue=0)
 _, raw = get_trains_data(run_folder, 'image.data', include, karabo_id, tid, modules=nmods)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Analogue Gain ###

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(raw[:,0,...].flatten().astype(np.float32),
                                         raw[:,1,...].flatten().astype(np.float32),
                                         bins=(100, 100),
                                         range=[[4000, 8192], [4000, 8192]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Digitized Gain ###

 The following plot shows plots signal vs. digitized gain

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(corrected.flatten().astype(np.float32),
                                         gains.flatten().astype(np.float32), bins=(100, 3),
                                         range=[[-50, 8192], [0, 3]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Gain statistics in %")
 table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
          f'{gains[gains==1].size/gains.size*100:.03f}',
          f'{gains[gains==2].size/gains.size*100:.03f}']]
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["High", "Medium", "Low"])))
 ```

 %% Cell type:markdown id: tags:

 ### Intensity per Pulse ###

 %% Cell type:code id: tags:

 ``` python
 pulse_range = [np.min(pulseId[pulseId>=0]), np.max(pulseId[pulseId>=0])]

+# Modify pulse_range, if only one pulse is selected.
+if pulse_range[0] == pulse_range[1]:
+    pulse_range = [0, pulse_range[1]+int(acq_rate)]
+
 mean_data = np.nanmean(corrected, axis=(2, 3))
 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100, int(pulse_range[1])),
                                      range=[[-50, 1000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")

 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100,  int(pulse_range[1])),
                                      range=[[-50, 200000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 ```

 %% Cell type:markdown id: tags:

 ### Baseline shift ###

 Estimated base-line shift with respect to the total ADU counts of corrected image.

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(blshift.flatten(), bins=100, log=True)
 _ = plt.xlabel('Baseline shift [ADU]')
 _ = plt.ylabel('Counts')
 _ = ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 10))
 corrected_ave = np.nansum(corrected, axis=(2, 3))
 plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)
 plt.xlim(-1, 1000)
 plt.grid()
 plt.xlabel('Illuminated corrected [MADU] ')
 _ = plt.ylabel('Estimated baseline shift [ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
-display(Markdown('### Raw preview ###\n'))
-display(Markdown(f'Mean over images of the RAW data\n'))
+if cell_id_preview not in cellId[:, 0]:
+    print(f"WARNING: The selected cell_id_preview value {cell_id_preview} is not available in the corrected data.")
+    cell_id_preview = cellId[:, 0][0]
+    cell_idx_preview = 0
+    print(f"Previewing the first available cellId: {cell_id_preview}.")
+else:
+    cell_idx_preview = np.where(cellId[:, 0] == cell_id_preview)[0][0]
 ```

 %% Cell type:code id: tags:

 ``` python
-fig = plt.figure(figsize=(20, 10))
-ax = fig.add_subplot(111)
-data = np.mean(raw[:, 0, ...], axis=0)
-vmin, vmax = get_range(data, 5)
-ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
+display(Markdown('### Raw preview ###\n'))
+if cellId.shape[0] != 1:
+    display(Markdown(f'Mean over images of the RAW data\n'))
+    fig = plt.figure(figsize=(20, 10))
+    ax = fig.add_subplot(111)
+    data = np.mean(raw[slice(*cell_sel.crange), 0, ...], axis=0)
+    vmin, vmax = get_range(data, 5)
+    ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
+else:
+    print("Skipping mean RAW preview for single memory cell, "
+          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
-display(Markdown(f'Single shot of the RAW data from cell {np.max(cellId[cell_id_preview])} \n'))
+display(Markdown(f'Single shot of the RAW data from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
-vmin, vmax = get_range(raw[cell_id_preview, 0, ...], 5)
-ax = geom.plot_data_fast(raw[cell_id_preview, 0, ...], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
+vmin, vmax = get_range(raw[cell_idx_preview, 0, ...], 5)
+ax = geom.plot_data_fast(raw[cell_idx_preview, 0, ...], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Corrected preview ###\n'))
-display(Markdown(f'A single shot image from cell {np.max(cellId[cell_id_preview])} \n'))
+if cellId.shape[0] != 1:
+    display(Markdown('### Mean CORRECTED Preview ###\n'))
+    display(Markdown(f'A mean across train: {tid}\n'))
+    fig = plt.figure(figsize=(20, 10))
+    ax = fig.add_subplot(111)
+    data = np.mean(corrected, axis=0)
+    vmin, vmax = get_range(data, 7)
+    ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=-50, vmax=vmax)
+else:
+    print("Skipping mean CORRECTED preview for single memory cell, "
+          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
+display(Markdown(f'A single shot of the CORRECTED image from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
-vmin, vmax = get_range(corrected[cell_id_preview], 7, -50)
+vmin, vmax = get_range(corrected[cell_idx_preview], 7, -50)
 vmin = - 50
-ax = geom.plot_data_fast(corrected[cell_id_preview], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
+ax = geom.plot_data_fast(corrected[cell_idx_preview], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
-vmin, vmax = get_range(corrected[cell_id_preview], 5, -50)
+vmin, vmax = get_range(corrected[cell_idx_preview], 5, -50)
 nbins = np.int((vmax + 50) / 2)
-h = ax.hist(corrected[cell_id_preview].flatten(),
+h = ax.hist(corrected[cell_idx_preview].flatten(),
            bins=nbins, range=(-50, vmax),
            histtype='stepfilled', log=True)
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
-display(Markdown('### Mean CORRECTED Preview ###\n'))
-display(Markdown(f'A mean across one train\n'))
-```
-
-%% Cell type:code id: tags:
-
-``` python
-fig = plt.figure(figsize=(20, 10))
-ax = fig.add_subplot(111)
-data = np.mean(corrected, axis=0)
-vmin, vmax = get_range(data, 7)
-ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=-50, vmax=vmax)
-```
-
-%% Cell type:code id: tags:
-
-``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected, 10, -100)
 vmax = np.nanmax(corrected)
 if vmax > 50000:
    vmax=50000
 nbins = np.int((vmax + 100) / 5)
 h = ax.hist(corrected.flatten(), bins=nbins,
            range=(-100, vmax), histtype='step', log=True, label = 'All')
 ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='High gain', color='green')
 ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Medium gain', color='red')
 ax.hist(corrected[gains == 2].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Low gain', color='yellow')
 ax.legend()
 ax.grid()
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Maximum GAIN Preview ###\n'))
 display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.max(gains, axis=0), ax=ax,
                         cmap="jet", vmin=-1, vmax=3)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'### Single Shot Bad Pixels ### \n'))
-display(Markdown(f'A single shot bad pixel map from cell {np.max(cellId[cell_id_preview])} \n'))
+display(Markdown(f'A single shot bad pixel map from cell {cell_id_preview} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
-geom.plot_data_fast(np.log2(mask[cell_id_preview]), ax=ax, vmin=0, vmax=32, cmap="jet")
+geom.plot_data_fast(np.log2(mask[cell_idx_preview]), ax=ax, vmin=0, vmax=32, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train  ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(np.mean(mask>0, axis=0), vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train. Only Dark Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 cm = np.copy(mask)
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 ax = geom.plot_data_fast(np.mean(cm>0, axis=0),
                         vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

--- a/notebooks/AGIPD/AGIPD_Correct_and_Verify_Summary_NBC.ipynb
+++ b/notebooks/AGIPD/AGIPD_Correct_and_Verify_Summary_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Summary of the AGIPD offline correction #

 %% Cell type:code id: tags:

 ``` python
 run = 11 # runs to process, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/AGIPD_Corr" # path to output to, required
 karabo_id = "SPB_DET_AGIPD1M-1" # karabo karabo_id
 modules = [-1]
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 ```

 %% Cell type:code id: tags:

 ``` python
 import re
 import warnings
 from pathlib import Path

 import dateutil.parser
 import numpy as np
 import yaml

 warnings.filterwarnings('ignore')

 import matplotlib.pyplot as plt

 %matplotlib inline
 import tabulate
 from cal_tools.tools import CalibrationMetadata
 from IPython.display import Latex, Markdown, display
 ```

 %% Cell type:code id: tags:

 ``` python
 out_folder = Path(out_folder)
 metadata = CalibrationMetadata(out_folder)
 const_dict = metadata.setdefault("retrieved-constants", {})
 time_dict = const_dict.setdefault("time-summary", {})

 # Extracting Instrument string
 instrument = karabo_id.split("_")[0]
 # Evaluate detector instance for mapping
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 # This is needed only if AGIPD Correction notebook had no precorrection notebooks for retrieving constants
 # gather all generated sequence yml files for time summary of retrieved constant under retrieved-constants in metadata.yml
 for fn in sorted(out_folder.glob("retrieved_constants_*.yml")):
    with fn.open("r") as fd:
        fdict = yaml.safe_load(fd)
    # append different sequences' time summary to the main yaml
    time_dict.update(fdict["time-summary"])
    fn.unlink()

 metadata.save()
 ```

 %% Cell type:code id: tags:

 ``` python
 def print_const_table(const):
    print(f"{const} constants were injected on:")
    table_data = {}
    for seq, mod_data in time_dict.items():
        for mod, const_data in mod_data.items():
            timestamp = const_data[const]
            table_data.setdefault(timestamp, []).append(f"{seq}:{mod}")
    table = []
-    if len(table_data) == 1:
+    if not len(table_data):
+        table.append(["No constants retrieved"])
+    elif len(table_data) == 1:
        table.append([[*table_data][0], "All modules"])
    else:
        for timestamp, seqmod in table_data.items():
            table.append([timestamp, seqmod[0]])
            for further_module in seqmod[1:]:
                table.append(["", further_module])
    display(Latex(tabulate.tabulate(table,
                 tablefmt="latex",
                 headers=["Timestamps", "Modules and sequences"])))

 for const in ['Offset', 'SlopesPC', 'SlopesFF']:
    print_const_table(const)
 ```
-
-%% Cell type:code id: tags:
-
-``` python
-```

 %% Cell type:markdown id: tags:

 # Summary of the AGIPD offline correction #

 %% Cell type:code id: tags:

 ``` python
 run = 11 # runs to process, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/AGIPD_Corr" # path to output to, required
 karabo_id = "SPB_DET_AGIPD1M-1" # karabo karabo_id
 modules = [-1]
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 ```

 %% Cell type:code id: tags:

 ``` python
 import re
 import warnings
 from pathlib import Path

 import dateutil.parser
 import numpy as np
 import yaml

 warnings.filterwarnings('ignore')

 import matplotlib.pyplot as plt

 %matplotlib inline
 import tabulate
 from cal_tools.tools import CalibrationMetadata
 from IPython.display import Latex, Markdown, display
 ```

 %% Cell type:code id: tags:

 ``` python
 out_folder = Path(out_folder)
 metadata = CalibrationMetadata(out_folder)
 const_dict = metadata.setdefault("retrieved-constants", {})
 time_dict = const_dict.setdefault("time-summary", {})

 # Extracting Instrument string
 instrument = karabo_id.split("_")[0]
 # Evaluate detector instance for mapping
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 # This is needed only if AGIPD Correction notebook had no precorrection notebooks for retrieving constants
 # gather all generated sequence yml files for time summary of retrieved constant under retrieved-constants in metadata.yml
 for fn in sorted(out_folder.glob("retrieved_constants_*.yml")):
    with fn.open("r") as fd:
        fdict = yaml.safe_load(fd)
    # append different sequences' time summary to the main yaml
    time_dict.update(fdict["time-summary"])
    fn.unlink()

 metadata.save()
 ```

 %% Cell type:code id: tags:

 ``` python
 def print_const_table(const):
    print(f"{const} constants were injected on:")
    table_data = {}
    for seq, mod_data in time_dict.items():
        for mod, const_data in mod_data.items():
            timestamp = const_data[const]
            table_data.setdefault(timestamp, []).append(f"{seq}:{mod}")
    table = []
-    if len(table_data) == 1:
+    if not len(table_data):
+        table.append(["No constants retrieved"])
+    elif len(table_data) == 1:
        table.append([[*table_data][0], "All modules"])
    else:
        for timestamp, seqmod in table_data.items():
            table.append([timestamp, seqmod[0]])
            for further_module in seqmod[1:]:
                table.append(["", further_module])
    display(Latex(tabulate.tabulate(table,
                 tablefmt="latex",
                 headers=["Timestamps", "Modules and sequences"])))

 for const in ['Offset', 'SlopesPC', 'SlopesFF']:
    print_const_table(const)
 ```
-
-%% Cell type:code id: tags:
-
-``` python
-```

--- a/notebooks/AGIPD/AGIPD_Retrieve_Constants_Precorrection.ipynb
+++ b/notebooks/AGIPD/AGIPD_Retrieve_Constants_Precorrection.ipynb
 %% Cell type:markdown id: tags:

 # AGIPD Retrieving Constants Pre-correction #

 Author: European XFEL Detector Group, Version: 1.0

 Retrieving Required Constants for Offline Calibration of the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202030/p900119/raw" # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/AGIPD_"  # the folder to output to, required
 sequences =  [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 run = 80 # runs to process, required

 karabo_id = "SPB_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 ctrl_source_template = '{}/MDL/FPGA_COMP_TEST'  # path to control information
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 receiver_template = "{}CH0" # inset for receiver devices
 karabo_id_control = "SPB_IRU_AGIPD1M1" # karabo-id for control device

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants

 slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants
 calfile =  "" # path to calibration file. Leave empty if all data should come from DB
 nodb = False # if set only file-based constants will be used
 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 0  # bias voltage, set to 0 to use stored value in slow data.
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)
 photon_energy = 9.2 # photon energy in keV
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = True # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 ```

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis
 # Here the hierarichy and dependencies for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_hmatch"] = blc_hmatch
 ```

 %% Cell type:code id: tags:

 ``` python
 from pathlib import Path
 from typing import List, Tuple

 import matplotlib
 import matplotlib.pyplot as plt
 import multiprocessing
 import numpy as np
 from datetime import timedelta
 from dateutil import parser
 from extra_data import RunDirectory

 matplotlib.use("agg")

 from cal_tools import agipdlib, tools
 from cal_tools.enums import AgipdGainMode
 from iCalibrationDB import Conditions, Constants, Detectors
 ```

 %% Cell type:code id: tags:

 ``` python
 # slopes_ff_from_files left as str for now
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 metadata = tools.CalibrationMetadata(out_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 creation_time = None
 if use_dir_creation_date:
    creation_time = tools.get_dir_creation_date(str(in_folder), run)
    offset = parser.parse(creation_date_offset)
    delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
    creation_time += delta
    print(f"Using {creation_time} as creation time")

 if sequences[0] == -1:
    sequences = None

 print(f"Outputting to {out_folder}")
 out_folder.mkdir(parents=True, exist_ok=True)

 melt_snow = False if corr_bools["only_offset"] else agipdlib.SnowResolution.NONE
 ```

 %% Cell type:code id: tags:

 ``` python
 ctrl_src = ctrl_source_template.format(karabo_id_control)

 print(f"Detector in use is {karabo_id}")

 # Extracting Instrument string
 instrument = karabo_id.split("_")[0]
 # Evaluate detector instance for mapping
 if instrument == "SPB":
    nmods = 16
 elif instrument == "MID":
    nmods = 16
 elif instrument == "HED":
    nmods = 8

 print(f"Instrument {instrument}")

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
 ```

 %% Cell type:code id: tags:

 ``` python
 run_dc = RunDirectory(in_folder / f"r{run:04d}")

-# set everything up filewise
-mapped_files, _, _, _, _ = tools.map_modules_from_folder(
-    str(in_folder), run, path_template, karabo_da, sequences=[0]
-)
+instrument_src = instrument_source_template.format(karabo_id, receiver_template)
+
+instr_dc = run_dc.select(instrument_src.format("*"), require_all=True)
+
+if not instr_dc.train_ids:
+    raise ValueError(f"No images found for {in_folder / f'r{run:04d}'}")
 ```

 %% Cell type:code id: tags:

 ``` python
-# Read AGIPD conditions from the 1st sequence of 1st module and slow data.
-instrument_src = instrument_source_template.format(karabo_id, receiver_template)
-instrument_src_mod = instrument_src.format(0)
-
 agipd_cond = agipdlib.AgipdCtrl(
    run_dc=run_dc,
-    image_src=None,  # Not need, as we wont read mem_cells or acq_rate.
+    image_src=None,  # Not neededed, as we wont read mem_cells or acq_rate.
    ctrl_src=ctrl_src,
 )

 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == 0.:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()
 if gain_mode == -1:
    gain_mode = agipd_cond.get_gain_mode()
 else:
    gain_mode = AgipdGainMode(gain_mode)
 ```

 %% Cell type:markdown id: tags:

 ## Retrieve Constants ##

 %% Cell type:code id: tags:

 ``` python
 def retrieve_constants(
-    karabo_da: str, idx: int
+    k_da: str, idx: int
 ) -> Tuple[str, str, float, float, str, dict]:
    """
    Retrieve constants for a module.

    :return:
-            karabo_da: karabo data aggregator.
+            k_da: karabo data aggregator.
            acq_rate: acquisition rate parameter.
            mem_cells: number of memory cells.
            mdata_dict: (DICT) dictionary with the metadata for the retrieved constants.
    """
+    # check if this module has images to process.
+    if instrument_src.format(idx) not in instr_dc.all_sources:
+        print("ERROR: No raw images found for "
+              f"{tools.module_index_to_qm(idx)}({k_da}).")
+
+        return None, k_da, None, None
+
+    agipd_cond.image_src = instrument_src.format(idx)
+
    if mem_cells == 0:
-        # either or look around in sequence files
-        agipd_cond.image_src = instrument_src.format(idx)
+        # Read value from fast data.
        local_mem_cells = agipd_cond.get_num_cells()
    else:
-        # or use overriding notebook parameter
+        # or use overriding notebook parameter.
        local_mem_cells = mem_cells

-    # maybe we never found this in a sequence file...
-    if local_mem_cells is None:
-        raise ValueError(
-            "No raw images found for "
-            f"{tools.module_index_to_qm(module_index)}({karabo_da}) for all sequences")
-
    if acq_rate == 0.:
        local_acq_rate = agipd_cond.get_acq_rate()
    else:
        local_acq_rate = acq_rate

    # avoid retrieving constant, if requested.
    if nodb_with_dark:
-        return
+        return None, k_da, None, None

    const_dict = agipdlib.assemble_constant_dict(
        corr_bools,
        pc_bools,
        local_mem_cells,
        bias_voltage,
        gain_setting,
        local_acq_rate,
        photon_energy,
        gain_mode=gain_mode,
        beam_energy=None,
        only_dark=only_dark,
        integration_time=integration_time
    )

    # Retrieve multiple constants through an input dictionary
    # to return a dict of useful metadata.
    mdata_dict = dict()
    mdata_dict["constants"] = dict()
    mdata_dict["physical-detector-unit"] = None  # initialization

    for const_name, (const_init_fun, const_shape, (cond_type, cond_param)) in const_dict.items():  # noqa
        if gain_mode and const_name in ("ThresholdsDark",):
            continue

        # saving metadata in a dict
        const_mdata = dict()
        mdata_dict["constants"][const_name] = const_mdata

        if slopes_ff_from_files and const_name in ["SlopesFF", "BadPixelsFF"]:
            const_mdata["file-path"] = (
-                f"{slopes_ff_from_files}/slopesff_bpmask_module_{tools.module_index_to_qm(module_index)}.h5")  # noqa
+                f"{slopes_ff_from_files}/slopesff_bpmask_module_{tools.module_index_to_qm(idx)}.h5")  # noqa
            const_mdata["creation-time"] = "00:00:00"
            continue

        if gain_mode and const_name in (
            "BadPixelsPC", "SlopesPC", "BadPixelsFF", "SlopesFF"
        ):
            param_copy = cond_param.copy()
            del param_copy["gain_mode"]
            condition = getattr(Conditions, cond_type).AGIPD(**param_copy)
        else:
            condition = getattr(Conditions, cond_type).AGIPD(**cond_param)

        _, mdata = tools.get_from_db(
            karabo_id,
-            karabo_da,
+            k_da,
            getattr(Constants.AGIPD, const_name)(),
            condition,
            getattr(np, const_init_fun)(const_shape),
            cal_db_interface,
            creation_time,
            meta_only=True,
            verbosity=0,
        )
        mdata_const = mdata.calibration_constant_version
        # check if constant was sucessfully retrieved.
        if mdata.comm_db_success:
            const_mdata["file-path"] = (
                f"{mdata_const.hdf5path}" f"{mdata_const.filename}"
            )
            const_mdata["creation-time"] = f"{mdata_const.begin_at}"
            mdata_dict["physical-detector-unit"] = mdata_const.device_name
        else:
            const_mdata["file-path"] = const_dict[const_name][:2]
            const_mdata["creation-time"] = None

-    return mdata_dict, karabo_da, local_acq_rate, local_mem_cells
+    return mdata_dict, k_da, local_acq_rate, local_mem_cells
 ```

 %% Cell type:code id: tags:

 ``` python
 # Constant paths & timestamps are saved under retrieved-constants in calibration_metadata.yml
 retrieved_constants = metadata.setdefault("retrieved-constants", {})
 ```

 %% Cell type:code id: tags:

 ``` python
 pc_bools = [corr_bools.get("rel_gain"),
            corr_bools.get("adjust_mg_baseline"),
            corr_bools.get('blc_noise'),
            corr_bools.get('blc_hmatch'),
            corr_bools.get('blc_stripes'),
            melt_snow]
 inp = []
 only_dark = False
 nodb_with_dark = False
 if not nodb:
    only_dark = (calfile != "")
 if calfile != "" and not corr_bools["only_offset"]:
    nodb_with_dark = nodb

 da_to_qm = dict()
 for module_index, k_da in zip(modules, karabo_da):
    da_to_qm[k_da] = tools.module_index_to_qm(module_index)
    if k_da in retrieved_constants:
        print(
            f"Constant for {k_da} already in calibration_metadata.yml, won't query again.")
        continue

    inp.append((k_da, module_index))
 ```

 %% Cell type:code id: tags:

 ``` python
 with multiprocessing.Pool(processes=nmods) as pool:
    results = pool.starmap(retrieve_constants, inp)
 ```

 %% Cell type:code id: tags:

 ``` python
 acq_rate_mods = []
 mem_cells_mods = []
-for md_dict, karabo_da, acq_rate, mem_cells in results:
-    retrieved_constants[karabo_da] = md_dict
+for md_dict, k_da, acq_rate, mem_cells in results:
+    if acq_rate is None and mem_cells is None:
+        continue
+    md_dict, k_da, acq_rate, mem_cells
+    retrieved_constants[k_da] = md_dict
    mem_cells_mods.append(mem_cells)
    acq_rate_mods.append(acq_rate)

 # Validate that mem_cells and acq_rate are the same for all modules.
 # TODO: Should a warning be enough?
 if len(set(mem_cells_mods)) != 1 or len(set(acq_rate_mods)) != 1:
    print(
        "WARNING: Number of memory cells or "
        "acquisition rate are not identical for all modules.\n"
        f"mem_cells: {mem_cells_mods}.\nacq_rate: {acq_rate_mods}.")

 # check if it is requested not to retrieve any constants from the database
 if nodb_with_dark:
    print("No constants were retrieved as calibrated files will be used.")
 else:

    print("\nRetrieved constants for modules:",
          ', '.join([tools.module_index_to_qm(x) for x in modules]))
    print(f"Operating conditions are:")
    print(f"• Bias voltage: {bias_voltage}")
    print(f"• Memory cells: {mem_cells}")
    print(f"• Acquisition rate: {acq_rate}")
    print(f"• Gain mode: {gain_mode.name}")
    print(f"• Gain setting: {gain_setting}")
    print(f"• Integration time: {integration_time}")
    print(f"• Photon Energy: {photon_energy}")
    print("Constant metadata is saved under \"retrieved-constants\" in calibration_metadata.yml\n")
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Using constants with creation times:")
 timestamps = {}

 for k_da, module_name in da_to_qm.items():
+    if k_da not in retrieved_constants.keys():
+        continue
    module_timestamps = timestamps[module_name] = {}
    module_constants = retrieved_constants[k_da]

    print(f"{module_name}:")
    for cname, mdata in module_constants["constants"].items():
        if hasattr(mdata["creation-time"], 'strftime'):
            mdata["creation-time"] = mdata["creation-time"].strftime('%y-%m-%d %H:%M')
        print(f'{cname:.<12s}', mdata["creation-time"])

    for cname in ['Offset', 'SlopesPC', 'SlopesFF']:
        if cname in module_constants["constants"]:
            module_timestamps[cname] = module_constants["constants"][cname]["creation-time"]
        else:
            module_timestamps[cname] = "NA"

 time_summary = retrieved_constants.setdefault("time-summary", {})
 time_summary["SAll"] = timestamps

 metadata.save()
 ```

 %% Cell type:markdown id: tags:

 # AGIPD Retrieving Constants Pre-correction #

 Author: European XFEL Detector Group, Version: 1.0

 Retrieving Required Constants for Offline Calibration of the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202030/p900119/raw" # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/AGIPD_"  # the folder to output to, required
 sequences =  [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 run = 80 # runs to process, required

 karabo_id = "SPB_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 ctrl_source_template = '{}/MDL/FPGA_COMP_TEST'  # path to control information
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 receiver_template = "{}CH0" # inset for receiver devices
 karabo_id_control = "SPB_IRU_AGIPD1M1" # karabo-id for control device

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants

 slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants
 calfile =  "" # path to calibration file. Leave empty if all data should come from DB
 nodb = False # if set only file-based constants will be used
 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 0  # bias voltage, set to 0 to use stored value in slow data.
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)
 photon_energy = 9.2 # photon energy in keV
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = True # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 ```

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis
 # Here the hierarichy and dependencies for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_hmatch"] = blc_hmatch
 ```

 %% Cell type:code id: tags:

 ``` python
 from pathlib import Path
 from typing import List, Tuple

 import matplotlib
 import matplotlib.pyplot as plt
 import multiprocessing
 import numpy as np
 from datetime import timedelta
 from dateutil import parser
 from extra_data import RunDirectory

 matplotlib.use("agg")

 from cal_tools import agipdlib, tools
 from cal_tools.enums import AgipdGainMode
 from iCalibrationDB import Conditions, Constants, Detectors
 ```

 %% Cell type:code id: tags:

 ``` python
 # slopes_ff_from_files left as str for now
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 metadata = tools.CalibrationMetadata(out_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 creation_time = None
 if use_dir_creation_date:
    creation_time = tools.get_dir_creation_date(str(in_folder), run)
    offset = parser.parse(creation_date_offset)
    delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
    creation_time += delta
    print(f"Using {creation_time} as creation time")

 if sequences[0] == -1:
    sequences = None

 print(f"Outputting to {out_folder}")
 out_folder.mkdir(parents=True, exist_ok=True)

 melt_snow = False if corr_bools["only_offset"] else agipdlib.SnowResolution.NONE
 ```

 %% Cell type:code id: tags:

 ``` python
 ctrl_src = ctrl_source_template.format(karabo_id_control)

 print(f"Detector in use is {karabo_id}")

 # Extracting Instrument string
 instrument = karabo_id.split("_")[0]
 # Evaluate detector instance for mapping
 if instrument == "SPB":
    nmods = 16
 elif instrument == "MID":
    nmods = 16
 elif instrument == "HED":
    nmods = 8

 print(f"Instrument {instrument}")

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
 ```

 %% Cell type:code id: tags:

 ``` python
 run_dc = RunDirectory(in_folder / f"r{run:04d}")

-# set everything up filewise
-mapped_files, _, _, _, _ = tools.map_modules_from_folder(
-    str(in_folder), run, path_template, karabo_da, sequences=[0]
-)
+instrument_src = instrument_source_template.format(karabo_id, receiver_template)
+
+instr_dc = run_dc.select(instrument_src.format("*"), require_all=True)
+
+if not instr_dc.train_ids:
+    raise ValueError(f"No images found for {in_folder / f'r{run:04d}'}")
 ```

 %% Cell type:code id: tags:

 ``` python
-# Read AGIPD conditions from the 1st sequence of 1st module and slow data.
-instrument_src = instrument_source_template.format(karabo_id, receiver_template)
-instrument_src_mod = instrument_src.format(0)
-
 agipd_cond = agipdlib.AgipdCtrl(
    run_dc=run_dc,
-    image_src=None,  # Not need, as we wont read mem_cells or acq_rate.
+    image_src=None,  # Not neededed, as we wont read mem_cells or acq_rate.
    ctrl_src=ctrl_src,
 )

 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == 0.:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()
 if gain_mode == -1:
    gain_mode = agipd_cond.get_gain_mode()
 else:
    gain_mode = AgipdGainMode(gain_mode)
 ```

 %% Cell type:markdown id: tags:

 ## Retrieve Constants ##

 %% Cell type:code id: tags:

 ``` python
 def retrieve_constants(
-    karabo_da: str, idx: int
+    k_da: str, idx: int
 ) -> Tuple[str, str, float, float, str, dict]:
    """
    Retrieve constants for a module.

    :return:
-            karabo_da: karabo data aggregator.
+            k_da: karabo data aggregator.
            acq_rate: acquisition rate parameter.
            mem_cells: number of memory cells.
            mdata_dict: (DICT) dictionary with the metadata for the retrieved constants.
    """
+    # check if this module has images to process.
+    if instrument_src.format(idx) not in instr_dc.all_sources:
+        print("ERROR: No raw images found for "
+              f"{tools.module_index_to_qm(idx)}({k_da}).")
+
+        return None, k_da, None, None
+
+    agipd_cond.image_src = instrument_src.format(idx)
+
    if mem_cells == 0:
-        # either or look around in sequence files
-        agipd_cond.image_src = instrument_src.format(idx)
+        # Read value from fast data.
        local_mem_cells = agipd_cond.get_num_cells()
    else:
-        # or use overriding notebook parameter
+        # or use overriding notebook parameter.
        local_mem_cells = mem_cells

-    # maybe we never found this in a sequence file...
-    if local_mem_cells is None:
-        raise ValueError(
-            "No raw images found for "
-            f"{tools.module_index_to_qm(module_index)}({karabo_da}) for all sequences")
-
    if acq_rate == 0.:
        local_acq_rate = agipd_cond.get_acq_rate()
    else:
        local_acq_rate = acq_rate

    # avoid retrieving constant, if requested.
    if nodb_with_dark:
-        return
+        return None, k_da, None, None

    const_dict = agipdlib.assemble_constant_dict(
        corr_bools,
        pc_bools,
        local_mem_cells,
        bias_voltage,
        gain_setting,
        local_acq_rate,
        photon_energy,
        gain_mode=gain_mode,
        beam_energy=None,
        only_dark=only_dark,
        integration_time=integration_time
    )

    # Retrieve multiple constants through an input dictionary
    # to return a dict of useful metadata.
    mdata_dict = dict()
    mdata_dict["constants"] = dict()
    mdata_dict["physical-detector-unit"] = None  # initialization

    for const_name, (const_init_fun, const_shape, (cond_type, cond_param)) in const_dict.items():  # noqa
        if gain_mode and const_name in ("ThresholdsDark",):
            continue

        # saving metadata in a dict
        const_mdata = dict()
        mdata_dict["constants"][const_name] = const_mdata

        if slopes_ff_from_files and const_name in ["SlopesFF", "BadPixelsFF"]:
            const_mdata["file-path"] = (
-                f"{slopes_ff_from_files}/slopesff_bpmask_module_{tools.module_index_to_qm(module_index)}.h5")  # noqa
+                f"{slopes_ff_from_files}/slopesff_bpmask_module_{tools.module_index_to_qm(idx)}.h5")  # noqa
            const_mdata["creation-time"] = "00:00:00"
            continue

        if gain_mode and const_name in (
            "BadPixelsPC", "SlopesPC", "BadPixelsFF", "SlopesFF"
        ):
            param_copy = cond_param.copy()
            del param_copy["gain_mode"]
            condition = getattr(Conditions, cond_type).AGIPD(**param_copy)
        else:
            condition = getattr(Conditions, cond_type).AGIPD(**cond_param)

        _, mdata = tools.get_from_db(
            karabo_id,
-            karabo_da,
+            k_da,
            getattr(Constants.AGIPD, const_name)(),
            condition,
            getattr(np, const_init_fun)(const_shape),
            cal_db_interface,
            creation_time,
            meta_only=True,
            verbosity=0,
        )
        mdata_const = mdata.calibration_constant_version
        # check if constant was sucessfully retrieved.
        if mdata.comm_db_success:
            const_mdata["file-path"] = (
                f"{mdata_const.hdf5path}" f"{mdata_const.filename}"
            )
            const_mdata["creation-time"] = f"{mdata_const.begin_at}"
            mdata_dict["physical-detector-unit"] = mdata_const.device_name
        else:
            const_mdata["file-path"] = const_dict[const_name][:2]
            const_mdata["creation-time"] = None

-    return mdata_dict, karabo_da, local_acq_rate, local_mem_cells
+    return mdata_dict, k_da, local_acq_rate, local_mem_cells
 ```

 %% Cell type:code id: tags:

 ``` python
 # Constant paths & timestamps are saved under retrieved-constants in calibration_metadata.yml
 retrieved_constants = metadata.setdefault("retrieved-constants", {})
 ```

 %% Cell type:code id: tags:

 ``` python
 pc_bools = [corr_bools.get("rel_gain"),
            corr_bools.get("adjust_mg_baseline"),
            corr_bools.get('blc_noise'),
            corr_bools.get('blc_hmatch'),
            corr_bools.get('blc_stripes'),
            melt_snow]
 inp = []
 only_dark = False
 nodb_with_dark = False
 if not nodb:
    only_dark = (calfile != "")
 if calfile != "" and not corr_bools["only_offset"]:
    nodb_with_dark = nodb

 da_to_qm = dict()
 for module_index, k_da in zip(modules, karabo_da):
    da_to_qm[k_da] = tools.module_index_to_qm(module_index)
    if k_da in retrieved_constants:
        print(
            f"Constant for {k_da} already in calibration_metadata.yml, won't query again.")
        continue

    inp.append((k_da, module_index))
 ```

 %% Cell type:code id: tags:

 ``` python
 with multiprocessing.Pool(processes=nmods) as pool:
    results = pool.starmap(retrieve_constants, inp)
 ```

 %% Cell type:code id: tags:

 ``` python
 acq_rate_mods = []
 mem_cells_mods = []
-for md_dict, karabo_da, acq_rate, mem_cells in results:
-    retrieved_constants[karabo_da] = md_dict
+for md_dict, k_da, acq_rate, mem_cells in results:
+    if acq_rate is None and mem_cells is None:
+        continue
+    md_dict, k_da, acq_rate, mem_cells
+    retrieved_constants[k_da] = md_dict
    mem_cells_mods.append(mem_cells)
    acq_rate_mods.append(acq_rate)

 # Validate that mem_cells and acq_rate are the same for all modules.
 # TODO: Should a warning be enough?
 if len(set(mem_cells_mods)) != 1 or len(set(acq_rate_mods)) != 1:
    print(
        "WARNING: Number of memory cells or "
        "acquisition rate are not identical for all modules.\n"
        f"mem_cells: {mem_cells_mods}.\nacq_rate: {acq_rate_mods}.")

 # check if it is requested not to retrieve any constants from the database
 if nodb_with_dark:
    print("No constants were retrieved as calibrated files will be used.")
 else:

    print("\nRetrieved constants for modules:",
          ', '.join([tools.module_index_to_qm(x) for x in modules]))
    print(f"Operating conditions are:")
    print(f"• Bias voltage: {bias_voltage}")
    print(f"• Memory cells: {mem_cells}")
    print(f"• Acquisition rate: {acq_rate}")
    print(f"• Gain mode: {gain_mode.name}")
    print(f"• Gain setting: {gain_setting}")
    print(f"• Integration time: {integration_time}")
    print(f"• Photon Energy: {photon_energy}")
    print("Constant metadata is saved under \"retrieved-constants\" in calibration_metadata.yml\n")
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Using constants with creation times:")
 timestamps = {}

 for k_da, module_name in da_to_qm.items():
+    if k_da not in retrieved_constants.keys():
+        continue
    module_timestamps = timestamps[module_name] = {}
    module_constants = retrieved_constants[k_da]

    print(f"{module_name}:")
    for cname, mdata in module_constants["constants"].items():
        if hasattr(mdata["creation-time"], 'strftime'):
            mdata["creation-time"] = mdata["creation-time"].strftime('%y-%m-%d %H:%M')
        print(f'{cname:.<12s}', mdata["creation-time"])

    for cname in ['Offset', 'SlopesPC', 'SlopesFF']:
        if cname in module_constants["constants"]:
            module_timestamps[cname] = module_constants["constants"][cname]["creation-time"]
        else:
            module_timestamps[cname] = "NA"

 time_summary = retrieved_constants.setdefault("time-summary", {})
 time_summary["SAll"] = timestamps

 metadata.save()
 ```

--- a/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
+++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
 %% Cell type:markdown id: tags:

 # AGIPD Characterize Dark Images #

 Author: European XFEL Detector Group, Version: 2.0

 The following code analyzes a set of dark images taken with the AGIPD detector to deduce detector offsets , noise, bad-pixel maps and thresholding. All four types of constants are evaluated per-pixel and per-memory cell. Data for the detector's three gain stages needs to be present, separated into separate runs.

 The evaluated calibration constants are stored locally and injected in the calibration data base.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/d/raw/CALLAB/202031/p900113" # path to input data, required
 out_folder = "" # path to output to, required
 modules = [-1]  # list of modules to evaluate, RANGE ALLOWED
 run_high = 9985 # run number in which high gain data was recorded, required
 run_med = 9984 # run number in which medium gain data was recorded, required
 run_low = 9983 # run number in which low gain data was recorded, required
 operation_mode = "ADAPTIVE_GAIN"  # Detector operation mode, optional (defaults to "ADAPTIVE_GAIN")

 karabo_id = "HED_DET_AGIPD500K2G" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_template = "{}CH0" # inset for receiver devices
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information
 karabo_id_control = "HED_EXP_AGIPD500K2G" # karabo-id for control device '

 use_dir_creation_date = True  # use dir creation date as data production reference date
 cal_db_interface = "tcp://max-exfl016:8020" # the database interface to use
 cal_db_timeout = 3000000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database

 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 0 # bias voltage, set to 0 to use stored value in slow data.
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode, use -1 to use value stored in slow data.
 integration_time = -1 # integration time, negative values for auto-detection.
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 interlaced = False # assume interlaced data format, for data prior to Dec. 2017

 thresholds_offset_sigma = 3. # offset sigma thresholds for offset deduced bad pixels
 thresholds_offset_hard = [0, 0]  # For setting the same threshold offset for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.
 thresholds_offset_hard_hg = [3000, 7000]  # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_mg = [6000, 10000]  # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_lg = [6000, 10000]  # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_hg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_hg, but for fixed gain operation
 thresholds_offset_hard_mg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_mg, but for fixed gain operation
 thresholds_offset_hard_lg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_lg, but for fixed gain operation

 thresholds_noise_sigma = 5. # noise sigma thresholds for offset deduced bad pixels
 thresholds_noise_hard = [0, 0] # For setting the same threshold noise for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.
 thresholds_noise_hard_hg = [4, 20] # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_mg = [4, 20] # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_lg = [4, 20] # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels

 thresholds_gain_sigma = 5.  # Gain separation sigma threshold
-max_trains = 0  # Maximum number of trains to use for processing dark. Set to 0 to process all available trains.
-min_trains = 1  # Miniumum number of trains for processing dark. If raw folder has less than minimum trains processing is stopped.
+max_trains = 550  # Maximum number of trains to use for processing dark. Set to 0 to process all available trains. 550 added for ~500GB nodes to temporarely avoid memory issues.
+min_trains = 1  # Miniumum number of trains for processing dark. If run folder has less than minimum trains, processing is stopped.
 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. ~7mins extra time for 64 memory cells

 # This is used if modules is not specified:
 def find_modules(in_folder, run_high, modules):
    if (modules is not None) and modules != [-1]:
        return modules
    from pathlib import Path
    import re
    modules = set()
    for file in Path(in_folder, f'r{run_high:04d}').iterdir():
        m = re.search(r'-AGIPD(\d{2})-', file.name)
        if m:
            modules.add(int(m.group(1)))
    return sorted(modules)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import multiprocessing
 import os
 from collections import OrderedDict
 from datetime import timedelta
 from pathlib import Path
 from typing import List, Tuple

 import dateutil.parser
 import matplotlib
 import numpy as np
 import pasha as psh
-import psutil
 import tabulate
 import yaml
 from IPython.display import Latex, Markdown, display
 from extra_data import RunDirectory

 matplotlib.use('agg')

 import iCalibrationDB
 import matplotlib.pyplot as plt
 from cal_tools.agipdlib import AgipdCtrl
 from cal_tools.enums import AgipdGainMode, BadPixels
 from cal_tools.plotting import (
    create_constant_overview,
    plot_badpix_3d,
    show_overview,
    show_processed_modules,
 )
 from cal_tools.tools import (
    get_dir_creation_date,
    get_from_db,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    map_gain_stages,
    module_index_to_qm,
    run_prop_seq_from_path,
    save_const_to_h5,
    send_to_db,
 )

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 # insert control device if format string (does nothing otherwise)
 ctrl_src = ctrl_source_template.format(karabo_id_control)

 runs_dict = OrderedDict()
+run_numbers = [run_high, run_med, run_low]

-for gain_idx, (run_name, run_number) in enumerate(zip(
-    ["high", "med", "low"],
-    [run_high, run_med, run_low]
-)):
+for gain_idx, (run_name, run_number) in enumerate(zip(["high", "med", "low"], run_numbers)):
    runs_dict[run_name] = {
        "number": run_number,
        "gain": gain_idx,
        "dc": RunDirectory(f'{in_folder}/r{run_number:04d}/')
    }

 creation_time=None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)

 print(f"Using {creation_time} as creation time of constant.")

 run, prop, seq = run_prop_seq_from_path(in_folder)

 # Read report path and create file location tuple to add with the injection
 file_loc = f"proposal:{prop} runs:{run_low} {run_med} {run_high}"

 report = get_report(out_folder)
 cal_db_interface = get_random_db_interface(cal_db_interface)
 print(f'Calibration database interface: {cal_db_interface}')

 instrument = karabo_id.split("_")[0]

 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
-run_numbers = [run_high, run_med, run_low]

 def create_karabo_da_list(modules):
    return(["AGIPD{:02d}".format(i) for i in modules])

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = create_karabo_da_list(modules)
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create out_folder if it doesn't exist.
 Path(out_folder).mkdir(parents=True, exist_ok=True)

-max_trains_list = []
 file_sizes = []
 for run_dict in runs_dict.values():
-    missing_modules = []
+    missing_modules = []  # modules with no images within a run.
+    n_trains_list = []   # list of the number of trains for each module within a run.
    # This is important in case of no slurm parallelization over modules is done.
    # (e.g. running notebook interactively)
-    dc = run_dict["dc"].select(
-        [(instrument_src.format(m), "*") for m in modules],
-        require_all=True
-    )
-    # validate that there are trains and that data sources are
-    # present for any of the selected modules.
-    n_trains = len(dc.train_ids)
+    for m in modules:
+        # validate that there are trains for the selected modules and run.
+        dc = run_dict["dc"].select(
+            instrument_src.format(m), "*", require_all=True)
+        n_trains = len(dc.train_ids)
+
+        if n_trains == 0:
+            print(f"WARNING: No images for module AGIPD{m:02d}, run {run_dict['number']}.")
+            missing_modules.append(m)
+        # Raise a warning if the module has less trains than expected.
+        elif n_trains < min_trains:
+            print(f"WARNING: AGIPD{m:02d}, run {run_dict['number']} "
+                  f"has trains less than minimum trains: {min_trains}.")
+        else:
+            print(f"Processing {max_trains if max_trains < n_trains else n_trains} "
+                  f"for AGIPD{m:02d}, run {run_dict['number']} ")

-    if n_trains == 0:
-        raise ValueError(f"No images to process for run: {run_dict['number']}")
+        n_trains_list.append(n_trains)
+
+        file_sizes += [os.path.getsize(f.filename) / 1e9 for f in dc.files]

-    max_trains_list.append(n_trains)
-    file_sizes += [os.path.getsize(f.filename) / 1e9 for f in dc.files]
+    if max(n_trains_list) == 0:
+        raise ValueError(f"No images to process for run: {run_dict['number']}")
+    elif max(n_trains_list) < min_trains:
+        raise ValueError(f"{run_dict['number']} has less than minimum trains: {min_trains}")

 # Update modules and karabo_da lists based on available modules to processes.
 modules = [m for m in modules if m not in missing_modules]
 karabo_da = create_karabo_da_list(modules)

 print(f"Will process data in a total of {len(file_sizes)} files ({sum(file_sizes):.02f} GB).")
 ```

 %% Cell type:markdown id: tags:

 ## Read and validate the runs control data.

 %% Cell type:code id: tags:

 ``` python
 def read_run_conditions(runs_dict: dict):
    agipd_cond = AgipdCtrl(
        run_dc=runs_dict["dc"],
        image_src=instrument_src_mod,
        ctrl_src=ctrl_src,
    )
    cond_dict["runs"].append(runs_dict["number"])
    if acq_rate == 0:
        cond_dict["acq_rate"].append(agipd_cond.get_acq_rate())
    if mem_cells == 0:
        cond_dict["mem_cells"].append(agipd_cond.get_num_cells())
    if gain_setting == -1:
        cond_dict["gain_setting"].append(
            agipd_cond.get_gain_setting(creation_time))
    if bias_voltage == 0.:
        cond_dict["bias_voltage"].append(
            agipd_cond.get_bias_voltage(karabo_id_control))
    if integration_time == -1:
        cond_dict["integration_time"].append(
            agipd_cond.get_integration_time())
    if gain_mode == -1:
        cond_dict["gain_mode"].append(agipd_cond.get_gain_mode())
    else:
        cond_dict["gain_mode"].append(AgipdGainMode(gain_mode))
 ```

 %% Cell type:code id: tags:

 ``` python
 def validate_gain_modes(gain_modes: List[AgipdGainMode]):
    # Validate that gain modes are not a mix of adaptive and fixed gain.
    if all(
        gm == AgipdGainMode.ADAPTIVE_GAIN for gm in gain_modes
    ):
        fixed_gain_mode = False
+    # Some runs are adaptive by mistake.
    elif any(
        gm == AgipdGainMode.ADAPTIVE_GAIN for gm in gain_modes
    ):
        raise ValueError(
-            f"ERROR: Given runs {self.read_conditions['run_number']}"
+            f"ERROR: Given runs {run_numbers}"
            " have a mix of ADAPTIVE and FIXED gain modes: "
-            f"{self.read_conditions['gain_mode']}."
+            f"{gain_modes}."
    )
-    else:
+    elif list(gain_modes) == [
+        AgipdGainMode.FIXED_HIGH_GAIN,
+        AgipdGainMode.FIXED_MEDIUM_GAIN,
+        AgipdGainMode.FIXED_LOW_GAIN
+    ]:
        fixed_gain_mode = True
+    else:
+        raise ValueError(
+        "ERROR: Wrong arrangment of given dark runs. "
+        f"Given runs' gain_modes are {gain_modes} for runs: {run_numbers}."
+    )
    return fixed_gain_mode
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read slow data from 1st channel only.
 # Read all modules in one notebook and validate the conditions across detectors?
 # Currently slurm jobs run per one module.

 # TODO: what if first module is not available. Maybe only channel 2 available
 instrument_src_mod = instrument_src.format(modules[0])

 cond_dict = dict()
 fixed_gain_mode = None

 with multiprocessing.Manager() as manager:
    cond_dict["runs"] = manager.list()
    cond_dict["acq_rate"] = manager.list()
    cond_dict["mem_cells"] = manager.list()
    cond_dict["gain_setting"] = manager.list()
    cond_dict["gain_mode"] = manager.list()
    cond_dict["bias_voltage"] = manager.list()
    cond_dict["integration_time"] = manager.list()

    with multiprocessing.Pool(processes=len(modules)) as pool:
        pool.starmap(read_run_conditions, zip(runs_dict.values()))

    for cond, vlist in cond_dict.items():
        if cond == "runs":
            continue
        elif cond == "gain_mode":
            fixed_gain_mode = validate_gain_modes(cond_dict["gain_mode"])
-        if not all(x == vlist[0] for x in vlist):
+        elif not all(x == vlist[0] for x in vlist):
            # TODO: raise ERROR??
            print(
                f"WARNING: {cond} is not the same for the runs "
                f"{cond_dict['runs']} with values"
                f" of {cond_dict[cond]}, respectively."
            )
    if cond_dict["acq_rate"]: acq_rate = cond_dict["acq_rate"][0]
    if cond_dict["mem_cells"]: mem_cells = cond_dict["mem_cells"][0]
    if cond_dict["gain_setting"]: gain_setting = cond_dict["gain_setting"][0]
    if cond_dict["gain_mode"]: gain_mode = list(cond_dict["gain_mode"])
    if cond_dict["bias_voltage"]: bias_voltage = cond_dict["bias_voltage"][0]
    if cond_dict["integration_time"]: integration_time = cond_dict["integration_time"][0]
 ```

 %% Cell type:code id: tags:

 ``` python
 # Determine the gain operation mode based on the gain_mode stored in control h5file.
 if operation_mode not in ("ADAPTIVE_GAIN", "FIXED_GAIN"):
    print(f"WARNING: unknown operation_mode \"{operation_mode}\" parameter set")

-if (
-    gain_mode == [
-        AgipdGainMode.FIXED_HIGH_GAIN,
-        AgipdGainMode.FIXED_MEDIUM_GAIN,
-        AgipdGainMode.FIXED_LOW_GAIN
-    ] and
-    operation_mode == "ADAPTIVE_GAIN"
-):
+if fixed_gain_mode and operation_mode == "ADAPTIVE_GAIN":
    print(
-        "WARNING: operation_mode parameter is ADAPTIVE_GAIN, "
-        "slow data indicates FIXED_GAIN.")
+        "WARNING: Operation_mode parameter is ADAPTIVE_GAIN, but"
+        "slow data indicates FIXED_GAIN. Processing fixed gain constants.")
 elif not fixed_gain_mode and operation_mode == "FIXED_GAIN":
    print(
-        "WARNING: operation_mode parameter is FIXED_GAIN, "
-        "slow data indicates ADAPTIVE_GAIN")
-elif not all(gm == AgipdGainMode.ADAPTIVE_GAIN for gm in gain_mode):
-    raise ValueError(
-        "ERROR: Wrong arrangment of given dark runs. "
-        f"Given runs' gain_modes are {gain_mode} for runs: {runs}."
-    )
+        "WARNING: Operation_mode parameter is FIXED_GAIN, "
+        "slow data indicates ADAPTIVE_GAIN. Processing adaptive gain constants.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Parameters are:")
 print(f"Proposal: {prop}")
 print(f"Acquisition rate: {acq_rate}")
 print(f"Memory cells: {mem_cells}")
 print(f"Runs: {run_numbers}")
 print(f"Interlaced mode: {interlaced}")
 print(f"Using DB: {db_output}")
 print(f"Input: {in_folder}")
 print(f"Output: {out_folder}")
 print(f"Bias voltage: {bias_voltage}V")
 print(f"Gain setting: {gain_setting}")
 print(f"Integration time: {integration_time}")
 print(f"Operation mode is {'fixed' if fixed_gain_mode else 'adaptive'} gain mode")
 ```

 %% Cell type:code id: tags:

 ``` python
 if thresholds_offset_hard != [0, 0]:
    # if set, this will override the individual parameters
    thresholds_offset_hard = [thresholds_offset_hard] * 3
 elif fixed_gain_mode:
    thresholds_offset_hard = [
        thresholds_offset_hard_hg_fixed,
        thresholds_offset_hard_mg_fixed,
        thresholds_offset_hard_lg_fixed,
    ]
 else:
    thresholds_offset_hard = [
        thresholds_offset_hard_hg,
        thresholds_offset_hard_mg,
        thresholds_offset_hard_lg,
    ]
 print("Will use the following hard offset thresholds")
 for name, value in zip(("High", "Medium", "Low"), thresholds_offset_hard):
    print(f"- {name} gain: {value}")

 if thresholds_noise_hard != [0, 0]:
    thresholds_noise_hard = [thresholds_noise_hard] * 3
 else:
    thresholds_noise_hard = [
        thresholds_noise_hard_hg,
        thresholds_noise_hard_mg,
        thresholds_noise_hard_lg,
    ]
 ```

-%% Cell type:code id: tags:
-
-``` python
-# Check if max_trains can be processed.
-
-# more relevant if running on multiple modules (i.e. within notebook)
-# mem_cells * gains * n_constants * modules * agipd_[x,y]image_size * 2
-av_mem = psutil.virtual_memory().available
-possible_trains = av_mem // (352 * 3 * 3 * len(modules) * 131072 * 2)
-if max_trains == 0:
-    max_trains = max(max_trains_list)
-if max_trains > possible_trains:
-    max_trains = possible_trains
-    print(
-        f"WARNING: available memory for processing is { av_mem / 1e9:.02f} GB."
-        f" Modifing max_trains to process to {max_trains}")
-
-for run_dict in runs_dict.values():
-    run_dict["dc"] = run_dict["dc"].select_trains(np.s_[:max_trains])
-```
-
 %% Cell type:markdown id: tags:

 ## Calculate Offsets, Noise and Thresholds ##

 The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array.

 %% Cell type:code id: tags:

 ``` python
-parallel_num_procs = min(12, len(modules)*3)
+parallel_num_procs = min(6, len(modules)*3)
 parallel_num_threads = multiprocessing.cpu_count() // parallel_num_procs
 print(f"Will use {parallel_num_procs} processes with {parallel_num_threads} threads each")

 def characterize_module(
    channel: int, runs_dict: dict,
 ) -> Tuple[int, int, np.array, np.array, np.array, np.array, np.array]:

    # Select the corresponding module channel.
    instrument_src_mod = instrument_src.format(channel)

-    run_dc = runs_dict["dc"]
+    run_dc = runs_dict["dc"].select(instrument_src_mod, require_all=True)
+    if max_trains != 0:
+        run_dc = run_dc.select_trains(np.s_[:max_trains])
    gain_index = runs_dict["gain"]

-    if run_dc[instrument_src_mod, "image.data"].shape[0] < min_trains:
-        print(
-            f"WARNING: {run_dc.files} have less than "
-            "minimum trains: {min_trains}.")
-
    # Read module's image and cellId data.
    im = run_dc[instrument_src_mod, "image.data"].ndarray()
    cell_ids = np.squeeze(run_dc[instrument_src_mod, "image.cellId"].ndarray())

    local_thresholds_offset_hard = thresholds_offset_hard[gain_index]
    local_thresholds_noise_hard = thresholds_noise_hard[gain_index]

    if interlaced:
        if not fixed_gain_mode:
            ga = im[1::2, 0, ...]
        im = im[0::2, 0, ...].astype(np.float32)
        cell_ids = cell_ids[::2]
    else:
        if not fixed_gain_mode:
            ga = im[:, 1, ...]
        im = im[:, 0, ...].astype(np.float32)
    im = np.transpose(im)
    if not fixed_gain_mode:
        ga = np.transpose(ga)

    context = psh.context.ThreadContext(num_workers=parallel_num_threads)
    offset = context.alloc(shape=(im.shape[0], im.shape[1], mem_cells), dtype=np.float64)
    noise = context.alloc(like=offset)

    if fixed_gain_mode:
        gains = None
        gains_std = None
    else:
        gains = context.alloc(like=offset)
        gains_std = context.alloc(like=offset)

    def process_cell(worker_id, array_index, cell_number):
        cell_slice_index = (cell_ids == cell_number)
        im_slice = im[..., cell_slice_index]
        offset[..., cell_number] = np.median(im_slice, axis=2)
        noise[..., cell_number] = np.std(im_slice, axis=2)
        if not fixed_gain_mode:
            ga_slice = ga[..., cell_slice_index]
            gains[..., cell_number] = np.median(ga_slice, axis=2)
            gains_std[..., cell_number] = np.std(ga_slice, axis=2)
    context.map(process_cell, np.unique(cell_ids))

    # bad pixels
    bp = np.zeros_like(offset, dtype=np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0,1))
    offset_std = np.nanstd(offset, axis=(0,1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD
    bp[(offset < local_thresholds_offset_hard[0]) |
       (offset > local_thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0,1))
    noise_std = np.nanstd(noise, axis=(0,1))
    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD
    bp[(noise < local_thresholds_noise_hard[0]) | (noise > local_thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR

    return channel, gain_index, offset, noise, gains, gains_std, bp
 ```

 %% Cell type:code id: tags:

 ``` python
 with multiprocessing.Pool(processes=parallel_num_procs) as pool:
    results = pool.starmap(
        characterize_module, itertools.product(modules, list(runs_dict.values())))

 # mapped values for processing 2 modules example:
 # [
 #     0, {"gain": 0, "run_number": <run-high>, "dc": <high-dc>},
 #     0, {"gain": 1, "run_number": <run-med>, "dc": <med-dc>},
 #     0, {"gain": 2, "run_number": <run-low>, "dc": <low-dc>},
 #     1, {"gain": 0, "run_number": <run-high>, "dc": <high-dc>},
 #     1, {"gain": 1, "run_number": <run-med>, "dc": <med-dc>},
 #     1, {"gain": 2, "run_number": <run-low>, "dc": <low-dc>},
 # ]
 ```

 %% Cell type:code id: tags:

 ``` python
 offset_g = OrderedDict()
 noise_g = OrderedDict()
 badpix_g = OrderedDict()
 if not fixed_gain_mode:
    gain_g = OrderedDict()
    gainstd_g = OrderedDict()


 for module_index, gain_index, offset, noise, gains, gains_std, bp in results:
    qm = module_index_to_qm(module_index)
    if qm not in offset_g:
        offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2], 3))
        noise_g[qm] = np.zeros_like(offset_g[qm])
        badpix_g[qm] = np.zeros_like(offset_g[qm], np.uint32)
        if not fixed_gain_mode:
            gain_g[qm] = np.zeros_like(offset_g[qm])
            gainstd_g[qm] = np.zeros_like(offset_g[qm])

    offset_g[qm][..., gain_index] = offset
    noise_g[qm][..., gain_index] = noise
    badpix_g[qm][..., gain_index] = bp
    if not fixed_gain_mode:
        gain_g[qm][..., gain_index] = gains
        gainstd_g[qm][..., gain_index] = gains_std
 ```

 %% Cell type:code id: tags:

 ``` python
 # Add bad pixels due to bad gain separation
 if not fixed_gain_mode:
    for qm in gain_g.keys():
        for g in range(2):
            # Bad pixels during bad gain separation.
            # Fraction of pixels in the module with separation lower than "thresholds_gain_sigma".
            bad_sep = (gain_g[qm][..., g+1] - gain_g[qm][..., g]) / \
                np.sqrt(gainstd_g[qm][..., g+1]**2 + gainstd_g[qm][..., g]**2)
            badpix_g[qm][...,g+1][bad_sep<thresholds_gain_sigma] |= \
                BadPixels.GAIN_THRESHOLDING_ERROR
 ```

 %% Cell type:markdown id: tags:

 The thresholds for gain switching are then defined as the mean value between in individual gain bit levels. Note that these thresholds need to be refined with charge induced thresholds, as the two are not the same.

 %% Cell type:code id: tags:

 ``` python
 if not fixed_gain_mode:
    thresholds_g = {}
    for qm in gain_g.keys():
        thresholds_g[qm] = np.zeros((gain_g[qm].shape[0], gain_g[qm].shape[1], gain_g[qm].shape[2], 5))
        thresholds_g[qm][...,0] = (gain_g[qm][...,1]+gain_g[qm][...,0])/2
        thresholds_g[qm][...,1] = (gain_g[qm][...,2]+gain_g[qm][...,1])/2
        for i in range(3):
            thresholds_g[qm][...,2+i] = gain_g[qm][...,i]
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()
 for i in modules:
    qm = module_index_to_qm(i)
    res[qm] = {
        'Offset': offset_g[qm],
        'Noise': noise_g[qm],
        'BadPixelsDark': badpix_g[qm]
    }
    if not fixed_gain_mode:
        res[qm]['ThresholdsDark'] = thresholds_g[qm]
 ```

 %% Cell type:code id: tags:

 ``` python
 # set the operating condition
 # note: iCalibrationDB only adds gain_mode if it is truthy, so we don't need to handle None
 condition = iCalibrationDB.Conditions.Dark.AGIPD(
    memory_cells=mem_cells,
    bias_voltage=bias_voltage,
    acquisition_rate=acq_rate,
    gain_setting=gain_setting,
    gain_mode=fixed_gain_mode,
    integration_time=integration_time
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create mapping from module(s) (qm) to karabo_da(s) and PDU(s)
 qm_dict = OrderedDict()
 all_pdus = get_pdu_from_db(
    karabo_id,
    karabo_da,
    constant=iCalibrationDB.CalibrationConstant(),
    condition=condition,
    cal_db_interface=cal_db_interface,
    snapshot_at=creation_time.isoformat() if creation_time else None,
    timeout=cal_db_timeout
 )
 for module_index, module_da, module_pdu in zip(modules, karabo_da, all_pdus):
    qm = module_index_to_qm(module_index)
    qm_dict[qm] = {
        "karabo_da": module_da,
        "db_module": module_pdu
    }
 ```

 %% Cell type:markdown id: tags:

 ## Sending calibration constants to the database.

 %% Cell type:code id: tags:

 ``` python
 md = None

 for qm in res:
    db_module = qm_dict[qm]["db_module"]
    for const in res[qm]:
        dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()
        dconst.data = res[qm][const]

        if db_output:
            md = send_to_db(db_module, karabo_id, dconst, condition, file_loc,
                            report, cal_db_interface, creation_time=creation_time,
                            timeout=cal_db_timeout)

        if local_output:
            md = save_const_to_h5(db_module, karabo_id, dconst, condition, dconst.data,
                                  file_loc, report, creation_time, out_folder)
            print(f"Calibration constant {const} for {qm} is stored locally in {file_loc}.\n")

 print("Constants parameter conditions are:\n")
 print(f"• memory_cells: {mem_cells}\n• bias_voltage: {bias_voltage}\n"
      f"• acquisition_rate: {acq_rate}\n• gain_setting: {gain_setting}\n"
      f"• gain_mode: {fixed_gain_mode}\n• integration_time: {integration_time}\n"
      f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:markdown id: tags:

 ## Retrieving previous calibration constants for comparison.

 %% Cell type:code id: tags:

 ``` python
 # Start retrieving existing constants for comparison
 qm_x_const = [(qm, const) for const in res[qm] for qm in res]


 def retrieve_old_constant(qm, const):
    dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()

    data, mdata = get_from_db(
        karabo_id=karabo_id,
        karabo_da=qm_dict[qm]["karabo_da"],
        constant=dconst,
        condition=condition,
        empty_constant=None,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time-timedelta(seconds=1) if creation_time else None,
        strategy="pdu_prior_in_time",
        verbosity=1,
        timeout=cal_db_timeout
    )

    if mdata is None or data is None:
        timestamp = "Not found"
        filepath = None
        h5path = None
    else:
        timestamp = mdata.calibration_constant_version.begin_at.isoformat()
        filepath = os.path.join(
            mdata.calibration_constant_version.hdf5path,
            mdata.calibration_constant_version.filename
        )
        h5path = mdata.calibration_constant_version.h5path

    return data, timestamp, filepath, h5path


 old_retrieval_pool = multiprocessing.Pool()
 old_retrieval_res = old_retrieval_pool.starmap_async(
    retrieve_old_constant, qm_x_const
 )
 old_retrieval_pool.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 mnames=[]
 for i in modules:
    qm = module_index_to_qm(i)
    mnames.append(qm)
    display(Markdown(f'## Position of the module {qm} and its ASICs'))
 show_processed_modules(dinstance, constants=None, mnames=mnames, mode="position")
 ```

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:markdown id: tags:

 ### High Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 0
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))
 ```

 %% Cell type:markdown id: tags:

 ### Medium Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 1
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))
 ```

 %% Cell type:markdown id: tags:

 ### Low Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 2
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))
 ```

 %% Cell type:code id: tags:

 ``` python
 if high_res_badpix_3d:
    cols = {
        BadPixels.NOISE_OUT_OF_THRESHOLD: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.GAIN_THRESHOLDING_ERROR: (BadPixels.GAIN_THRESHOLDING_ERROR.name, '#FF40FF40'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD | BadPixels.NOISE_OUT_OF_THRESHOLD: ('OFFSET_OUT_OF_THRESHOLD + NOISE_OUT_OF_THRESHOLD', '#DD00DD80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD | BadPixels.NOISE_OUT_OF_THRESHOLD |
        BadPixels.GAIN_THRESHOLDING_ERROR: ('MIXED', '#BFDF009F')
    }

    display(Markdown("""

    ## Global Bad Pixel Behaviour ##

    The following plots show the results of bad pixel evaluation for all evaluated memory cells.
    Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2.
    This excludes single bad pixels present only in disconnected pixels.
    Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
    Colors encode the bad pixel type, or mixed type.

    """))

    gnames = ['High Gain', 'Medium Gain', 'Low Gain']
    for gain in range(3):
        display(Markdown(f'### {gnames[gain]} ###'))
        for mod, data in badpix_g.items():
            plot_badpix_3d(data[...,gain], cols, title=mod, rebin_fac=1)
            plt.show()
 ```

 %% Cell type:markdown id: tags:


 ## Aggregate values, and per Cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior.

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(offset_g, "Offset (ADU)", mem_cells, 4000, 8000,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(noise_g, "Noise (ADU)", mem_cells, 0, 100,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 if not fixed_gain_mode:
    # Plot only three gain threshold maps.
    bp_thresh = OrderedDict()
    for mod, con in badpix_g.items():
        bp_thresh[mod] = np.zeros((con.shape[0], con.shape[1], con.shape[2], 5), dtype=con.dtype)
        bp_thresh[mod][...,:2] = con[...,:2]
        bp_thresh[mod][...,2:] = con

    create_constant_overview(thresholds_g, "Threshold (ADU)", mem_cells, 4000, 10000, 5,
                             badpixels=[bp_thresh, np.nan],
                             gmap=['HG-MG Threshold', 'MG-LG Threshold', 'High gain', 'Medium gain', 'low gain'],
                             marker=['d','d','','','']
                             )
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixel_aggregate_g = OrderedDict()
 for m, d in badpix_g.items():
    bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)
 create_constant_overview(bad_pixel_aggregate_g, "Bad pixel fraction", mem_cells, 0, 0.10, 3)
 ```

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 # now we need the old constants
 old_const = {}
 old_mdata = {}
 old_retrieval_res.wait()

 for (qm, const), (data, timestamp, filepath, h5path) in zip(qm_x_const, old_retrieval_res.get()):
    old_const.setdefault(qm, {})[const] = data
    old_mdata.setdefault(qm, {})[const] = {
        "timestamp": timestamp,
        "filepath": filepath,
        "h5path": h5path
    }
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown("The following pre-existing constants are used for comparison:"))
 for qm, consts in old_mdata.items():
    display(Markdown(f"- {qm}"))
    for const in consts:
        display(Markdown(f"    - {const} at {consts[const]['timestamp']}"))
    # saving locations of old constants for summary notebook
    with open(f"{out_folder}/module_metadata_{qm}.yml", "w") as fd:
        yaml.safe_dump(
            {
                "module": qm,
                "pdu": qm_dict[qm]["db_module"],
                "old-constants": old_mdata[qm]
            },
            fd,
        )
 ```

 %% Cell type:code id: tags:

 ``` python
 table = []
 gain_names = ['High', 'Medium', 'Low']
 bits = [BadPixels.NOISE_OUT_OF_THRESHOLD, BadPixels.OFFSET_OUT_OF_THRESHOLD, BadPixels.OFFSET_NOISE_EVAL_ERROR, BadPixels.GAIN_THRESHOLDING_ERROR]
 for qm in badpix_g.keys():
    for gain in range(3):
        l_data = []
        l_data_old = []

        data = np.copy(badpix_g[qm][:,:,:,gain])
        datau32 = data.astype(np.uint32)
        l_data.append(len(datau32[datau32>0].flatten()))
        for bit in bits:
            l_data.append(np.count_nonzero(badpix_g[qm][:,:,:,gain] & bit))

        if old_const[qm]['BadPixelsDark'] is not None:
            dataold = np.copy(old_const[qm]['BadPixelsDark'][:, :, :, gain])
            datau32old = dataold.astype(np.uint32)
            l_data_old.append(len(datau32old[datau32old>0].flatten()))
            for bit in bits:
                l_data_old.append(np.count_nonzero(old_const[qm]['BadPixelsDark'][:, :, :, gain] & bit))

        l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',
                       'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR', 'GAIN_THRESHOLDING_ERROR']

        l_threshold = ['', f'{thresholds_noise_sigma}' f'{thresholds_noise_hard[gain]}',
                       f'{thresholds_offset_sigma}' f'{thresholds_offset_hard[gain]}',
                       '', f'{thresholds_gain_sigma}']

        for i in range(len(l_data)):
            line = [f'{l_data_name[i]}, {gain_names[gain]} gain', l_threshold[i], l_data[i]]

            if old_const[qm]['BadPixelsDark'] is not None:
                line += [l_data_old[i]]
            else:
                line += ['-']

            table.append(line)
        table.append(['', '', '', ''])

 display(Markdown('''
 ### Number of bad pixels

 One pixel can be bad for different reasons, therefore, the sum of all types of bad pixels can be more than the number of all bad pixels.

 '''))
 if len(table)>0:
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                         headers=["Pixel type", "Threshold",
                                                  "New constant", "Old constant"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 if fixed_gain_mode:
    constants = ['Offset', 'Noise']
 else:
    constants = ['Offset', 'Noise', 'ThresholdsDark']

 constants_x_qms = list(itertools.product(constants, res.keys()))


 def compute_table(const, qm):
    if const == 'ThresholdsDark':
        table = [['','HG-MG threshold', 'HG-MG threshold', 'MG-LG threshold', 'MG-LG threshold']]
    else:
        table = [['','High gain', 'High gain', 'Medium gain', 'Medium gain', 'Low gain', 'Low gain']]

    compare_with_old_constant = old_const[qm][const] is not None and \
        old_const[qm]['BadPixelsDark'] is not None

    data = np.copy(res[qm][const])

    if const == 'ThresholdsDark':
        data[...,0][res[qm]['BadPixelsDark'][...,0]>0] = np.nan
        data[...,1][res[qm]['BadPixelsDark'][...,1]>0] = np.nan
    else:
        data[res[qm]['BadPixelsDark']>0] = np.nan

    if compare_with_old_constant:
        data_old = np.copy(old_const[qm][const])
        if const == 'ThresholdsDark':
            data_old[...,0][old_const[qm]['BadPixelsDark'][...,0]>0] = np.nan
            data_old[...,1][old_const[qm]['BadPixelsDark'][...,1]>0] = np.nan
        else:
            data_old[old_const[qm]['BadPixelsDark']>0] = np.nan

    f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
    n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

    def compute_row(i):
        line = [n_list[i]]
        for gain in range(3):
            # Compare only 3 threshold gain-maps
            if gain == 2 and const == 'ThresholdsDark':
                continue
            stat_measure = f_list[i](data[...,gain])
            line.append(f"{stat_measure:6.1f}")
            if compare_with_old_constant:
                old_stat_measure = f_list[i](data_old[...,gain])
                line.append(f"{old_stat_measure:6.1f}")
            else:
                line.append("-")
        return line


    with multiprocessing.pool.ThreadPool(processes=multiprocessing.cpu_count() // len(constants_x_qms)) as pool:
        rows = pool.map(compute_row, range(len(f_list)))

    table.extend(rows)

    return table


 with multiprocessing.Pool(processes=len(constants_x_qms)) as pool:
    tables = pool.starmap(compute_table, constants_x_qms)

 for (const, qm), table in zip(constants_x_qms, tables):
    display(Markdown(f"### {qm}: {const} [ADU], good pixels only"))
    display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
 ```

 %% Cell type:markdown id: tags:

 # AGIPD Characterize Dark Images #

 Author: European XFEL Detector Group, Version: 2.0

 The following code analyzes a set of dark images taken with the AGIPD detector to deduce detector offsets , noise, bad-pixel maps and thresholding. All four types of constants are evaluated per-pixel and per-memory cell. Data for the detector's three gain stages needs to be present, separated into separate runs.

 The evaluated calibration constants are stored locally and injected in the calibration data base.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/d/raw/CALLAB/202031/p900113" # path to input data, required
 out_folder = "" # path to output to, required
 modules = [-1]  # list of modules to evaluate, RANGE ALLOWED
 run_high = 9985 # run number in which high gain data was recorded, required
 run_med = 9984 # run number in which medium gain data was recorded, required
 run_low = 9983 # run number in which low gain data was recorded, required
 operation_mode = "ADAPTIVE_GAIN"  # Detector operation mode, optional (defaults to "ADAPTIVE_GAIN")

 karabo_id = "HED_DET_AGIPD500K2G" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_template = "{}CH0" # inset for receiver devices
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information
 karabo_id_control = "HED_EXP_AGIPD500K2G" # karabo-id for control device '

 use_dir_creation_date = True  # use dir creation date as data production reference date
 cal_db_interface = "tcp://max-exfl016:8020" # the database interface to use
 cal_db_timeout = 3000000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database

 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 0 # bias voltage, set to 0 to use stored value in slow data.
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode, use -1 to use value stored in slow data.
 integration_time = -1 # integration time, negative values for auto-detection.
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 interlaced = False # assume interlaced data format, for data prior to Dec. 2017

 thresholds_offset_sigma = 3. # offset sigma thresholds for offset deduced bad pixels
 thresholds_offset_hard = [0, 0]  # For setting the same threshold offset for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.
 thresholds_offset_hard_hg = [3000, 7000]  # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_mg = [6000, 10000]  # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_lg = [6000, 10000]  # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_hg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_hg, but for fixed gain operation
 thresholds_offset_hard_mg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_mg, but for fixed gain operation
 thresholds_offset_hard_lg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_lg, but for fixed gain operation

 thresholds_noise_sigma = 5. # noise sigma thresholds for offset deduced bad pixels
 thresholds_noise_hard = [0, 0] # For setting the same threshold noise for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.
 thresholds_noise_hard_hg = [4, 20] # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_mg = [4, 20] # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_lg = [4, 20] # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels

 thresholds_gain_sigma = 5.  # Gain separation sigma threshold
-max_trains = 0  # Maximum number of trains to use for processing dark. Set to 0 to process all available trains.
-min_trains = 1  # Miniumum number of trains for processing dark. If raw folder has less than minimum trains processing is stopped.
+max_trains = 550  # Maximum number of trains to use for processing dark. Set to 0 to process all available trains. 550 added for ~500GB nodes to temporarely avoid memory issues.
+min_trains = 1  # Miniumum number of trains for processing dark. If run folder has less than minimum trains, processing is stopped.
 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. ~7mins extra time for 64 memory cells

 # This is used if modules is not specified:
 def find_modules(in_folder, run_high, modules):
    if (modules is not None) and modules != [-1]:
        return modules
    from pathlib import Path
    import re
    modules = set()
    for file in Path(in_folder, f'r{run_high:04d}').iterdir():
        m = re.search(r'-AGIPD(\d{2})-', file.name)
        if m:
            modules.add(int(m.group(1)))
    return sorted(modules)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import multiprocessing
 import os
 from collections import OrderedDict
 from datetime import timedelta
 from pathlib import Path
 from typing import List, Tuple

 import dateutil.parser
 import matplotlib
 import numpy as np
 import pasha as psh
-import psutil
 import tabulate
 import yaml
 from IPython.display import Latex, Markdown, display
 from extra_data import RunDirectory

 matplotlib.use('agg')

 import iCalibrationDB
 import matplotlib.pyplot as plt
 from cal_tools.agipdlib import AgipdCtrl
 from cal_tools.enums import AgipdGainMode, BadPixels
 from cal_tools.plotting import (
    create_constant_overview,
    plot_badpix_3d,
    show_overview,
    show_processed_modules,
 )
 from cal_tools.tools import (
    get_dir_creation_date,
    get_from_db,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    map_gain_stages,
    module_index_to_qm,
    run_prop_seq_from_path,
    save_const_to_h5,
    send_to_db,
 )

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 # insert control device if format string (does nothing otherwise)
 ctrl_src = ctrl_source_template.format(karabo_id_control)

 runs_dict = OrderedDict()
+run_numbers = [run_high, run_med, run_low]

-for gain_idx, (run_name, run_number) in enumerate(zip(
-    ["high", "med", "low"],
-    [run_high, run_med, run_low]
-)):
+for gain_idx, (run_name, run_number) in enumerate(zip(["high", "med", "low"], run_numbers)):
    runs_dict[run_name] = {
        "number": run_number,
        "gain": gain_idx,
        "dc": RunDirectory(f'{in_folder}/r{run_number:04d}/')
    }

 creation_time=None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)

 print(f"Using {creation_time} as creation time of constant.")

 run, prop, seq = run_prop_seq_from_path(in_folder)

 # Read report path and create file location tuple to add with the injection
 file_loc = f"proposal:{prop} runs:{run_low} {run_med} {run_high}"

 report = get_report(out_folder)
 cal_db_interface = get_random_db_interface(cal_db_interface)
 print(f'Calibration database interface: {cal_db_interface}')

 instrument = karabo_id.split("_")[0]

 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
-run_numbers = [run_high, run_med, run_low]

 def create_karabo_da_list(modules):
    return(["AGIPD{:02d}".format(i) for i in modules])

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = create_karabo_da_list(modules)
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create out_folder if it doesn't exist.
 Path(out_folder).mkdir(parents=True, exist_ok=True)

-max_trains_list = []
 file_sizes = []
 for run_dict in runs_dict.values():
-    missing_modules = []
+    missing_modules = []  # modules with no images within a run.
+    n_trains_list = []   # list of the number of trains for each module within a run.
    # This is important in case of no slurm parallelization over modules is done.
    # (e.g. running notebook interactively)
-    dc = run_dict["dc"].select(
-        [(instrument_src.format(m), "*") for m in modules],
-        require_all=True
-    )
-    # validate that there are trains and that data sources are
-    # present for any of the selected modules.
-    n_trains = len(dc.train_ids)
+    for m in modules:
+        # validate that there are trains for the selected modules and run.
+        dc = run_dict["dc"].select(
+            instrument_src.format(m), "*", require_all=True)
+        n_trains = len(dc.train_ids)
+
+        if n_trains == 0:
+            print(f"WARNING: No images for module AGIPD{m:02d}, run {run_dict['number']}.")
+            missing_modules.append(m)
+        # Raise a warning if the module has less trains than expected.
+        elif n_trains < min_trains:
+            print(f"WARNING: AGIPD{m:02d}, run {run_dict['number']} "
+                  f"has trains less than minimum trains: {min_trains}.")
+        else:
+            print(f"Processing {max_trains if max_trains < n_trains else n_trains} "
+                  f"for AGIPD{m:02d}, run {run_dict['number']} ")

-    if n_trains == 0:
-        raise ValueError(f"No images to process for run: {run_dict['number']}")
+        n_trains_list.append(n_trains)
+
+        file_sizes += [os.path.getsize(f.filename) / 1e9 for f in dc.files]

-    max_trains_list.append(n_trains)
-    file_sizes += [os.path.getsize(f.filename) / 1e9 for f in dc.files]
+    if max(n_trains_list) == 0:
+        raise ValueError(f"No images to process for run: {run_dict['number']}")
+    elif max(n_trains_list) < min_trains:
+        raise ValueError(f"{run_dict['number']} has less than minimum trains: {min_trains}")

 # Update modules and karabo_da lists based on available modules to processes.
 modules = [m for m in modules if m not in missing_modules]
 karabo_da = create_karabo_da_list(modules)

 print(f"Will process data in a total of {len(file_sizes)} files ({sum(file_sizes):.02f} GB).")
 ```

 %% Cell type:markdown id: tags:

 ## Read and validate the runs control data.

 %% Cell type:code id: tags:

 ``` python
 def read_run_conditions(runs_dict: dict):
    agipd_cond = AgipdCtrl(
        run_dc=runs_dict["dc"],
        image_src=instrument_src_mod,
        ctrl_src=ctrl_src,
    )
    cond_dict["runs"].append(runs_dict["number"])
    if acq_rate == 0:
        cond_dict["acq_rate"].append(agipd_cond.get_acq_rate())
    if mem_cells == 0:
        cond_dict["mem_cells"].append(agipd_cond.get_num_cells())
    if gain_setting == -1:
        cond_dict["gain_setting"].append(
            agipd_cond.get_gain_setting(creation_time))
    if bias_voltage == 0.:
        cond_dict["bias_voltage"].append(
            agipd_cond.get_bias_voltage(karabo_id_control))
    if integration_time == -1:
        cond_dict["integration_time"].append(
            agipd_cond.get_integration_time())
    if gain_mode == -1:
        cond_dict["gain_mode"].append(agipd_cond.get_gain_mode())
    else:
        cond_dict["gain_mode"].append(AgipdGainMode(gain_mode))
 ```

 %% Cell type:code id: tags:

 ``` python
 def validate_gain_modes(gain_modes: List[AgipdGainMode]):
    # Validate that gain modes are not a mix of adaptive and fixed gain.
    if all(
        gm == AgipdGainMode.ADAPTIVE_GAIN for gm in gain_modes
    ):
        fixed_gain_mode = False
+    # Some runs are adaptive by mistake.
    elif any(
        gm == AgipdGainMode.ADAPTIVE_GAIN for gm in gain_modes
    ):
        raise ValueError(
-            f"ERROR: Given runs {self.read_conditions['run_number']}"
+            f"ERROR: Given runs {run_numbers}"
            " have a mix of ADAPTIVE and FIXED gain modes: "
-            f"{self.read_conditions['gain_mode']}."
+            f"{gain_modes}."
    )
-    else:
+    elif list(gain_modes) == [
+        AgipdGainMode.FIXED_HIGH_GAIN,
+        AgipdGainMode.FIXED_MEDIUM_GAIN,
+        AgipdGainMode.FIXED_LOW_GAIN
+    ]:
        fixed_gain_mode = True
+    else:
+        raise ValueError(
+        "ERROR: Wrong arrangment of given dark runs. "
+        f"Given runs' gain_modes are {gain_modes} for runs: {run_numbers}."
+    )
    return fixed_gain_mode
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read slow data from 1st channel only.
 # Read all modules in one notebook and validate the conditions across detectors?
 # Currently slurm jobs run per one module.

 # TODO: what if first module is not available. Maybe only channel 2 available
 instrument_src_mod = instrument_src.format(modules[0])

 cond_dict = dict()
 fixed_gain_mode = None

 with multiprocessing.Manager() as manager:
    cond_dict["runs"] = manager.list()
    cond_dict["acq_rate"] = manager.list()
    cond_dict["mem_cells"] = manager.list()
    cond_dict["gain_setting"] = manager.list()
    cond_dict["gain_mode"] = manager.list()
    cond_dict["bias_voltage"] = manager.list()
    cond_dict["integration_time"] = manager.list()

    with multiprocessing.Pool(processes=len(modules)) as pool:
        pool.starmap(read_run_conditions, zip(runs_dict.values()))

    for cond, vlist in cond_dict.items():
        if cond == "runs":
            continue
        elif cond == "gain_mode":
            fixed_gain_mode = validate_gain_modes(cond_dict["gain_mode"])
-        if not all(x == vlist[0] for x in vlist):
+        elif not all(x == vlist[0] for x in vlist):
            # TODO: raise ERROR??
            print(
                f"WARNING: {cond} is not the same for the runs "
                f"{cond_dict['runs']} with values"
                f" of {cond_dict[cond]}, respectively."
            )
    if cond_dict["acq_rate"]: acq_rate = cond_dict["acq_rate"][0]
    if cond_dict["mem_cells"]: mem_cells = cond_dict["mem_cells"][0]
    if cond_dict["gain_setting"]: gain_setting = cond_dict["gain_setting"][0]
    if cond_dict["gain_mode"]: gain_mode = list(cond_dict["gain_mode"])
    if cond_dict["bias_voltage"]: bias_voltage = cond_dict["bias_voltage"][0]
    if cond_dict["integration_time"]: integration_time = cond_dict["integration_time"][0]
 ```

 %% Cell type:code id: tags:

 ``` python
 # Determine the gain operation mode based on the gain_mode stored in control h5file.
 if operation_mode not in ("ADAPTIVE_GAIN", "FIXED_GAIN"):
    print(f"WARNING: unknown operation_mode \"{operation_mode}\" parameter set")

-if (
-    gain_mode == [
-        AgipdGainMode.FIXED_HIGH_GAIN,
-        AgipdGainMode.FIXED_MEDIUM_GAIN,
-        AgipdGainMode.FIXED_LOW_GAIN
-    ] and
-    operation_mode == "ADAPTIVE_GAIN"
-):
+if fixed_gain_mode and operation_mode == "ADAPTIVE_GAIN":
    print(
-        "WARNING: operation_mode parameter is ADAPTIVE_GAIN, "
-        "slow data indicates FIXED_GAIN.")
+        "WARNING: Operation_mode parameter is ADAPTIVE_GAIN, but"
+        "slow data indicates FIXED_GAIN. Processing fixed gain constants.")
 elif not fixed_gain_mode and operation_mode == "FIXED_GAIN":
    print(
-        "WARNING: operation_mode parameter is FIXED_GAIN, "
-        "slow data indicates ADAPTIVE_GAIN")
-elif not all(gm == AgipdGainMode.ADAPTIVE_GAIN for gm in gain_mode):
-    raise ValueError(
-        "ERROR: Wrong arrangment of given dark runs. "
-        f"Given runs' gain_modes are {gain_mode} for runs: {runs}."
-    )
+        "WARNING: Operation_mode parameter is FIXED_GAIN, "
+        "slow data indicates ADAPTIVE_GAIN. Processing adaptive gain constants.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Parameters are:")
 print(f"Proposal: {prop}")
 print(f"Acquisition rate: {acq_rate}")
 print(f"Memory cells: {mem_cells}")
 print(f"Runs: {run_numbers}")
 print(f"Interlaced mode: {interlaced}")
 print(f"Using DB: {db_output}")
 print(f"Input: {in_folder}")
 print(f"Output: {out_folder}")
 print(f"Bias voltage: {bias_voltage}V")
 print(f"Gain setting: {gain_setting}")
 print(f"Integration time: {integration_time}")
 print(f"Operation mode is {'fixed' if fixed_gain_mode else 'adaptive'} gain mode")
 ```

 %% Cell type:code id: tags:

 ``` python
 if thresholds_offset_hard != [0, 0]:
    # if set, this will override the individual parameters
    thresholds_offset_hard = [thresholds_offset_hard] * 3
 elif fixed_gain_mode:
    thresholds_offset_hard = [
        thresholds_offset_hard_hg_fixed,
        thresholds_offset_hard_mg_fixed,
        thresholds_offset_hard_lg_fixed,
    ]
 else:
    thresholds_offset_hard = [
        thresholds_offset_hard_hg,
        thresholds_offset_hard_mg,
        thresholds_offset_hard_lg,
    ]
 print("Will use the following hard offset thresholds")
 for name, value in zip(("High", "Medium", "Low"), thresholds_offset_hard):
    print(f"- {name} gain: {value}")

 if thresholds_noise_hard != [0, 0]:
    thresholds_noise_hard = [thresholds_noise_hard] * 3
 else:
    thresholds_noise_hard = [
        thresholds_noise_hard_hg,
        thresholds_noise_hard_mg,
        thresholds_noise_hard_lg,
    ]
 ```

-%% Cell type:code id: tags:
-
-``` python
-# Check if max_trains can be processed.
-
-# more relevant if running on multiple modules (i.e. within notebook)
-# mem_cells * gains * n_constants * modules * agipd_[x,y]image_size * 2
-av_mem = psutil.virtual_memory().available
-possible_trains = av_mem // (352 * 3 * 3 * len(modules) * 131072 * 2)
-if max_trains == 0:
-    max_trains = max(max_trains_list)
-if max_trains > possible_trains:
-    max_trains = possible_trains
-    print(
-        f"WARNING: available memory for processing is { av_mem / 1e9:.02f} GB."
-        f" Modifing max_trains to process to {max_trains}")
-
-for run_dict in runs_dict.values():
-    run_dict["dc"] = run_dict["dc"].select_trains(np.s_[:max_trains])
-```
-
 %% Cell type:markdown id: tags:

 ## Calculate Offsets, Noise and Thresholds ##

 The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array.

 %% Cell type:code id: tags:

 ``` python
-parallel_num_procs = min(12, len(modules)*3)
+parallel_num_procs = min(6, len(modules)*3)
 parallel_num_threads = multiprocessing.cpu_count() // parallel_num_procs
 print(f"Will use {parallel_num_procs} processes with {parallel_num_threads} threads each")

 def characterize_module(
    channel: int, runs_dict: dict,
 ) -> Tuple[int, int, np.array, np.array, np.array, np.array, np.array]:

    # Select the corresponding module channel.
    instrument_src_mod = instrument_src.format(channel)

-    run_dc = runs_dict["dc"]
+    run_dc = runs_dict["dc"].select(instrument_src_mod, require_all=True)
+    if max_trains != 0:
+        run_dc = run_dc.select_trains(np.s_[:max_trains])
    gain_index = runs_dict["gain"]

-    if run_dc[instrument_src_mod, "image.data"].shape[0] < min_trains:
-        print(
-            f"WARNING: {run_dc.files} have less than "
-            "minimum trains: {min_trains}.")
-
    # Read module's image and cellId data.
    im = run_dc[instrument_src_mod, "image.data"].ndarray()
    cell_ids = np.squeeze(run_dc[instrument_src_mod, "image.cellId"].ndarray())

    local_thresholds_offset_hard = thresholds_offset_hard[gain_index]
    local_thresholds_noise_hard = thresholds_noise_hard[gain_index]

    if interlaced:
        if not fixed_gain_mode:
            ga = im[1::2, 0, ...]
        im = im[0::2, 0, ...].astype(np.float32)
        cell_ids = cell_ids[::2]
    else:
        if not fixed_gain_mode:
            ga = im[:, 1, ...]
        im = im[:, 0, ...].astype(np.float32)
    im = np.transpose(im)
    if not fixed_gain_mode:
        ga = np.transpose(ga)

    context = psh.context.ThreadContext(num_workers=parallel_num_threads)
    offset = context.alloc(shape=(im.shape[0], im.shape[1], mem_cells), dtype=np.float64)
    noise = context.alloc(like=offset)

    if fixed_gain_mode:
        gains = None
        gains_std = None
    else:
        gains = context.alloc(like=offset)
        gains_std = context.alloc(like=offset)

    def process_cell(worker_id, array_index, cell_number):
        cell_slice_index = (cell_ids == cell_number)
        im_slice = im[..., cell_slice_index]
        offset[..., cell_number] = np.median(im_slice, axis=2)
        noise[..., cell_number] = np.std(im_slice, axis=2)
        if not fixed_gain_mode:
            ga_slice = ga[..., cell_slice_index]
            gains[..., cell_number] = np.median(ga_slice, axis=2)
            gains_std[..., cell_number] = np.std(ga_slice, axis=2)
    context.map(process_cell, np.unique(cell_ids))

    # bad pixels
    bp = np.zeros_like(offset, dtype=np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0,1))
    offset_std = np.nanstd(offset, axis=(0,1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD
    bp[(offset < local_thresholds_offset_hard[0]) |
       (offset > local_thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0,1))
    noise_std = np.nanstd(noise, axis=(0,1))
    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD
    bp[(noise < local_thresholds_noise_hard[0]) | (noise > local_thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR

    return channel, gain_index, offset, noise, gains, gains_std, bp
 ```

 %% Cell type:code id: tags:

 ``` python
 with multiprocessing.Pool(processes=parallel_num_procs) as pool:
    results = pool.starmap(
        characterize_module, itertools.product(modules, list(runs_dict.values())))

 # mapped values for processing 2 modules example:
 # [
 #     0, {"gain": 0, "run_number": <run-high>, "dc": <high-dc>},
 #     0, {"gain": 1, "run_number": <run-med>, "dc": <med-dc>},
 #     0, {"gain": 2, "run_number": <run-low>, "dc": <low-dc>},
 #     1, {"gain": 0, "run_number": <run-high>, "dc": <high-dc>},
 #     1, {"gain": 1, "run_number": <run-med>, "dc": <med-dc>},
 #     1, {"gain": 2, "run_number": <run-low>, "dc": <low-dc>},
 # ]
 ```

 %% Cell type:code id: tags:

 ``` python
 offset_g = OrderedDict()
 noise_g = OrderedDict()
 badpix_g = OrderedDict()
 if not fixed_gain_mode:
    gain_g = OrderedDict()
    gainstd_g = OrderedDict()


 for module_index, gain_index, offset, noise, gains, gains_std, bp in results:
    qm = module_index_to_qm(module_index)
    if qm not in offset_g:
        offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2], 3))
        noise_g[qm] = np.zeros_like(offset_g[qm])
        badpix_g[qm] = np.zeros_like(offset_g[qm], np.uint32)
        if not fixed_gain_mode:
            gain_g[qm] = np.zeros_like(offset_g[qm])
            gainstd_g[qm] = np.zeros_like(offset_g[qm])

    offset_g[qm][..., gain_index] = offset
    noise_g[qm][..., gain_index] = noise
    badpix_g[qm][..., gain_index] = bp
    if not fixed_gain_mode:
        gain_g[qm][..., gain_index] = gains
        gainstd_g[qm][..., gain_index] = gains_std
 ```

 %% Cell type:code id: tags:

 ``` python
 # Add bad pixels due to bad gain separation
 if not fixed_gain_mode:
    for qm in gain_g.keys():
        for g in range(2):
            # Bad pixels during bad gain separation.
            # Fraction of pixels in the module with separation lower than "thresholds_gain_sigma".
            bad_sep = (gain_g[qm][..., g+1] - gain_g[qm][..., g]) / \
                np.sqrt(gainstd_g[qm][..., g+1]**2 + gainstd_g[qm][..., g]**2)
            badpix_g[qm][...,g+1][bad_sep<thresholds_gain_sigma] |= \
                BadPixels.GAIN_THRESHOLDING_ERROR
 ```

 %% Cell type:markdown id: tags:

 The thresholds for gain switching are then defined as the mean value between in individual gain bit levels. Note that these thresholds need to be refined with charge induced thresholds, as the two are not the same.

 %% Cell type:code id: tags:

 ``` python
 if not fixed_gain_mode:
    thresholds_g = {}
    for qm in gain_g.keys():
        thresholds_g[qm] = np.zeros((gain_g[qm].shape[0], gain_g[qm].shape[1], gain_g[qm].shape[2], 5))
        thresholds_g[qm][...,0] = (gain_g[qm][...,1]+gain_g[qm][...,0])/2
        thresholds_g[qm][...,1] = (gain_g[qm][...,2]+gain_g[qm][...,1])/2
        for i in range(3):
            thresholds_g[qm][...,2+i] = gain_g[qm][...,i]
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()
 for i in modules:
    qm = module_index_to_qm(i)
    res[qm] = {
        'Offset': offset_g[qm],
        'Noise': noise_g[qm],
        'BadPixelsDark': badpix_g[qm]
    }
    if not fixed_gain_mode:
        res[qm]['ThresholdsDark'] = thresholds_g[qm]
 ```

 %% Cell type:code id: tags:

 ``` python
 # set the operating condition
 # note: iCalibrationDB only adds gain_mode if it is truthy, so we don't need to handle None
 condition = iCalibrationDB.Conditions.Dark.AGIPD(
    memory_cells=mem_cells,
    bias_voltage=bias_voltage,
    acquisition_rate=acq_rate,
    gain_setting=gain_setting,
    gain_mode=fixed_gain_mode,
    integration_time=integration_time
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create mapping from module(s) (qm) to karabo_da(s) and PDU(s)
 qm_dict = OrderedDict()
 all_pdus = get_pdu_from_db(
    karabo_id,
    karabo_da,
    constant=iCalibrationDB.CalibrationConstant(),
    condition=condition,
    cal_db_interface=cal_db_interface,
    snapshot_at=creation_time.isoformat() if creation_time else None,
    timeout=cal_db_timeout
 )
 for module_index, module_da, module_pdu in zip(modules, karabo_da, all_pdus):
    qm = module_index_to_qm(module_index)
    qm_dict[qm] = {
        "karabo_da": module_da,
        "db_module": module_pdu
    }
 ```

 %% Cell type:markdown id: tags:

 ## Sending calibration constants to the database.

 %% Cell type:code id: tags:

 ``` python
 md = None

 for qm in res:
    db_module = qm_dict[qm]["db_module"]
    for const in res[qm]:
        dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()
        dconst.data = res[qm][const]

        if db_output:
            md = send_to_db(db_module, karabo_id, dconst, condition, file_loc,
                            report, cal_db_interface, creation_time=creation_time,
                            timeout=cal_db_timeout)

        if local_output:
            md = save_const_to_h5(db_module, karabo_id, dconst, condition, dconst.data,
                                  file_loc, report, creation_time, out_folder)
            print(f"Calibration constant {const} for {qm} is stored locally in {file_loc}.\n")

 print("Constants parameter conditions are:\n")
 print(f"• memory_cells: {mem_cells}\n• bias_voltage: {bias_voltage}\n"
      f"• acquisition_rate: {acq_rate}\n• gain_setting: {gain_setting}\n"
      f"• gain_mode: {fixed_gain_mode}\n• integration_time: {integration_time}\n"
      f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:markdown id: tags:

 ## Retrieving previous calibration constants for comparison.

 %% Cell type:code id: tags:

 ``` python
 # Start retrieving existing constants for comparison
 qm_x_const = [(qm, const) for const in res[qm] for qm in res]


 def retrieve_old_constant(qm, const):
    dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()

    data, mdata = get_from_db(
        karabo_id=karabo_id,
        karabo_da=qm_dict[qm]["karabo_da"],
        constant=dconst,
        condition=condition,
        empty_constant=None,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time-timedelta(seconds=1) if creation_time else None,
        strategy="pdu_prior_in_time",
        verbosity=1,
        timeout=cal_db_timeout
    )

    if mdata is None or data is None:
        timestamp = "Not found"
        filepath = None
        h5path = None
    else:
        timestamp = mdata.calibration_constant_version.begin_at.isoformat()
        filepath = os.path.join(
            mdata.calibration_constant_version.hdf5path,
            mdata.calibration_constant_version.filename
        )
        h5path = mdata.calibration_constant_version.h5path

    return data, timestamp, filepath, h5path


 old_retrieval_pool = multiprocessing.Pool()
 old_retrieval_res = old_retrieval_pool.starmap_async(
    retrieve_old_constant, qm_x_const
 )
 old_retrieval_pool.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 mnames=[]
 for i in modules:
    qm = module_index_to_qm(i)
    mnames.append(qm)
    display(Markdown(f'## Position of the module {qm} and its ASICs'))
 show_processed_modules(dinstance, constants=None, mnames=mnames, mode="position")
 ```

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:markdown id: tags:

 ### High Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 0
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))
 ```

 %% Cell type:markdown id: tags:

 ### Medium Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 1
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))
 ```

 %% Cell type:markdown id: tags:

 ### Low Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 2
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))
 ```

 %% Cell type:code id: tags:

 ``` python
 if high_res_badpix_3d:
    cols = {
        BadPixels.NOISE_OUT_OF_THRESHOLD: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.GAIN_THRESHOLDING_ERROR: (BadPixels.GAIN_THRESHOLDING_ERROR.name, '#FF40FF40'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD | BadPixels.NOISE_OUT_OF_THRESHOLD: ('OFFSET_OUT_OF_THRESHOLD + NOISE_OUT_OF_THRESHOLD', '#DD00DD80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD | BadPixels.NOISE_OUT_OF_THRESHOLD |
        BadPixels.GAIN_THRESHOLDING_ERROR: ('MIXED', '#BFDF009F')
    }

    display(Markdown("""

    ## Global Bad Pixel Behaviour ##

    The following plots show the results of bad pixel evaluation for all evaluated memory cells.
    Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2.
    This excludes single bad pixels present only in disconnected pixels.
    Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
    Colors encode the bad pixel type, or mixed type.

    """))

    gnames = ['High Gain', 'Medium Gain', 'Low Gain']
    for gain in range(3):
        display(Markdown(f'### {gnames[gain]} ###'))
        for mod, data in badpix_g.items():
            plot_badpix_3d(data[...,gain], cols, title=mod, rebin_fac=1)
            plt.show()
 ```

 %% Cell type:markdown id: tags:


 ## Aggregate values, and per Cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior.

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(offset_g, "Offset (ADU)", mem_cells, 4000, 8000,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(noise_g, "Noise (ADU)", mem_cells, 0, 100,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 if not fixed_gain_mode:
    # Plot only three gain threshold maps.
    bp_thresh = OrderedDict()
    for mod, con in badpix_g.items():
        bp_thresh[mod] = np.zeros((con.shape[0], con.shape[1], con.shape[2], 5), dtype=con.dtype)
        bp_thresh[mod][...,:2] = con[...,:2]
        bp_thresh[mod][...,2:] = con

    create_constant_overview(thresholds_g, "Threshold (ADU)", mem_cells, 4000, 10000, 5,
                             badpixels=[bp_thresh, np.nan],
                             gmap=['HG-MG Threshold', 'MG-LG Threshold', 'High gain', 'Medium gain', 'low gain'],
                             marker=['d','d','','','']
                             )
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixel_aggregate_g = OrderedDict()
 for m, d in badpix_g.items():
    bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)
 create_constant_overview(bad_pixel_aggregate_g, "Bad pixel fraction", mem_cells, 0, 0.10, 3)
 ```

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 # now we need the old constants
 old_const = {}
 old_mdata = {}
 old_retrieval_res.wait()

 for (qm, const), (data, timestamp, filepath, h5path) in zip(qm_x_const, old_retrieval_res.get()):
    old_const.setdefault(qm, {})[const] = data
    old_mdata.setdefault(qm, {})[const] = {
        "timestamp": timestamp,
        "filepath": filepath,
        "h5path": h5path
    }
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown("The following pre-existing constants are used for comparison:"))
 for qm, consts in old_mdata.items():
    display(Markdown(f"- {qm}"))
    for const in consts:
        display(Markdown(f"    - {const} at {consts[const]['timestamp']}"))
    # saving locations of old constants for summary notebook
    with open(f"{out_folder}/module_metadata_{qm}.yml", "w") as fd:
        yaml.safe_dump(
            {
                "module": qm,
                "pdu": qm_dict[qm]["db_module"],
                "old-constants": old_mdata[qm]
            },
            fd,
        )
 ```

 %% Cell type:code id: tags:

 ``` python
 table = []
 gain_names = ['High', 'Medium', 'Low']
 bits = [BadPixels.NOISE_OUT_OF_THRESHOLD, BadPixels.OFFSET_OUT_OF_THRESHOLD, BadPixels.OFFSET_NOISE_EVAL_ERROR, BadPixels.GAIN_THRESHOLDING_ERROR]
 for qm in badpix_g.keys():
    for gain in range(3):
        l_data = []
        l_data_old = []

        data = np.copy(badpix_g[qm][:,:,:,gain])
        datau32 = data.astype(np.uint32)
        l_data.append(len(datau32[datau32>0].flatten()))
        for bit in bits:
            l_data.append(np.count_nonzero(badpix_g[qm][:,:,:,gain] & bit))

        if old_const[qm]['BadPixelsDark'] is not None:
            dataold = np.copy(old_const[qm]['BadPixelsDark'][:, :, :, gain])
            datau32old = dataold.astype(np.uint32)
            l_data_old.append(len(datau32old[datau32old>0].flatten()))
            for bit in bits:
                l_data_old.append(np.count_nonzero(old_const[qm]['BadPixelsDark'][:, :, :, gain] & bit))

        l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',
                       'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR', 'GAIN_THRESHOLDING_ERROR']

        l_threshold = ['', f'{thresholds_noise_sigma}' f'{thresholds_noise_hard[gain]}',
                       f'{thresholds_offset_sigma}' f'{thresholds_offset_hard[gain]}',
                       '', f'{thresholds_gain_sigma}']

        for i in range(len(l_data)):
            line = [f'{l_data_name[i]}, {gain_names[gain]} gain', l_threshold[i], l_data[i]]

            if old_const[qm]['BadPixelsDark'] is not None:
                line += [l_data_old[i]]
            else:
                line += ['-']

            table.append(line)
        table.append(['', '', '', ''])

 display(Markdown('''
 ### Number of bad pixels

 One pixel can be bad for different reasons, therefore, the sum of all types of bad pixels can be more than the number of all bad pixels.

 '''))
 if len(table)>0:
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                         headers=["Pixel type", "Threshold",
                                                  "New constant", "Old constant"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 if fixed_gain_mode:
    constants = ['Offset', 'Noise']
 else:
    constants = ['Offset', 'Noise', 'ThresholdsDark']

 constants_x_qms = list(itertools.product(constants, res.keys()))


 def compute_table(const, qm):
    if const == 'ThresholdsDark':
        table = [['','HG-MG threshold', 'HG-MG threshold', 'MG-LG threshold', 'MG-LG threshold']]
    else:
        table = [['','High gain', 'High gain', 'Medium gain', 'Medium gain', 'Low gain', 'Low gain']]

    compare_with_old_constant = old_const[qm][const] is not None and \
        old_const[qm]['BadPixelsDark'] is not None

    data = np.copy(res[qm][const])

    if const == 'ThresholdsDark':
        data[...,0][res[qm]['BadPixelsDark'][...,0]>0] = np.nan
        data[...,1][res[qm]['BadPixelsDark'][...,1]>0] = np.nan
    else:
        data[res[qm]['BadPixelsDark']>0] = np.nan

    if compare_with_old_constant:
        data_old = np.copy(old_const[qm][const])
        if const == 'ThresholdsDark':
            data_old[...,0][old_const[qm]['BadPixelsDark'][...,0]>0] = np.nan
            data_old[...,1][old_const[qm]['BadPixelsDark'][...,1]>0] = np.nan
        else:
            data_old[old_const[qm]['BadPixelsDark']>0] = np.nan

    f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
    n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

    def compute_row(i):
        line = [n_list[i]]
        for gain in range(3):
            # Compare only 3 threshold gain-maps
            if gain == 2 and const == 'ThresholdsDark':
                continue
            stat_measure = f_list[i](data[...,gain])
            line.append(f"{stat_measure:6.1f}")
            if compare_with_old_constant:
                old_stat_measure = f_list[i](data_old[...,gain])
                line.append(f"{old_stat_measure:6.1f}")
            else:
                line.append("-")
        return line


    with multiprocessing.pool.ThreadPool(processes=multiprocessing.cpu_count() // len(constants_x_qms)) as pool:
        rows = pool.map(compute_row, range(len(f_list)))

    table.extend(rows)

    return table


 with multiprocessing.Pool(processes=len(constants_x_qms)) as pool:
    tables = pool.starmap(compute_table, constants_x_qms)

 for (const, qm), table in zip(constants_x_qms, tables):
    display(Markdown(f"### {qm}: {const} [ADU], good pixels only"))
    display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
 ```

--- a/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_NBC.ipynb
+++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Gain Characterization #


 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202030/p900138/scratch/karnem/r0203_r0204_v01/" # the folder to read histograms from, required
 out_folder = ""  # the folder to output to, required
 hist_file_template = "hists_m{:02d}_sum.h5" # the template to use to access histograms
 modules = [10] # modules to correct, set to -1 for all, range allowed

 raw_folder = "/gpfs/exfel/exp/MID/202030/p900137/raw" # Path to raw image data used to create histograms
 proc_folder = "" # Path to corrected image data used to create histograms

 run = 449 # of the run of image data used to create histograms

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information
 karabo_id_control = "MID_IRU_AGIPD1M1" # karabo-id for control device
 karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milli seconds
 local_output = True # output constants locally
 db_output = False # output constants to database

 # Fit parameters
 peak_range = [-30, 30, 35, 70, 95, 135, 145, 220] # where to look for the peaks, [a0, b0, a1, b1, ...] exactly 8 elements
 peak_width_range = [0, 30, 0, 35, 0, 40, 0, 45] # fit limits on the peak widths, [a0, b0, a1, b1, ...] exactly 8 elements
 peak_norm_range = [0.0, -1, 0, -1, 0, -1, 0, -1] #

 # Bad-pixel thresholds (gain evaluation error). Contribute to BadPixel bit "Gain_Evaluation_Error"
 peak_lim = [-30, 30] # Limit of position of noise peak
 d0_lim = [10, 80] # hard limits for distance between noise and first peak
 peak_width_lim = [0.9, 1.55, 0.95, 1.65] # hard limits on the peak widths for first and second peak, in units of the noise peak. 4 parameters.
 chi2_lim = [0, 3.0] # Hard limit on chi2/nDOF value

 intensity_lim = 15 # Threshold on standard deviation of a histogram in ADU. Contribute to BadPixel bit "No_Entry"
 gain_lim = [0.8, 1.2] # Threshold on gain in relative number. Contribute to BadPixel bit "Gain_deviation"

 cell_range = [1, 3] # range of cell to be considered, [0,0] for all
 pixel_range = [0, 0, 32, 32] # range of pixels x1,y1,x2,y2 to consider [0,0,512,128] for all
 max_bins = 0 # Maximum number of bins to consider, 0 for all bins
 batch_size = [1, 8, 8] # batch size: [cell,x,y]
 fit_range = [0, 0] # range of a histogram considered for fitting in ADU. Dynamically evaluated in case [0,0]
 n_peaks_fit = 4 # Number of gaussian peaks to fit including noise peak
 fix_peaks = False # Fix distance between photon peaks
 do_minos = False # This is additional feature of minuit to evaluate errors.
 sigma_limit = 0. # If >0, repeat fit keeping only bins within mu +- sigma_limit*sigma

 # Detector conditions
 max_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300  # Bias voltage
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
 photon_energy = 8.05 # photon energy in keV
 integration_time = -1 # integration time, negative values for auto-detection.
 ```

 %% Cell type:code id: tags:

 ``` python
 import glob
 import os
 import traceback
 import warnings
 from multiprocessing import Pool

 import h5py
 import matplotlib.pyplot as plt
 import numpy as np
 import sharedmem
 import XFELDetAna.xfelpyanatools as xana
-from cal_tools.agipdlib import get_bias_voltage
 from cal_tools.agipdutils_ff import (
    BadPixelsFF,
    any_in,
    fit_n_peaks,
    gaussian,
    gaussian_sum,
    get_mask,
    get_starting_parameters,
    set_par_limits,
 )
 from cal_tools.ana_tools import get_range, save_dict_to_hdf5
 from iminuit import Minuit
 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.simpleplot import simplePlot

 # %load_ext autotime
 %matplotlib inline
 warnings.filterwarnings('ignore')
 ```

 %% Cell type:code id: tags:

 ``` python
 peak_range = np.reshape(peak_range,(4,2))
 peak_width_range = np.reshape(peak_width_range,(4,2))
 peak_width_lim = np.reshape(peak_width_lim,(2,2))
 peak_norm_range = [None if x == -1 else x for x in peak_norm_range]
 peak_norm_range = np.reshape(peak_norm_range,(4,2))
 module = modules[0]
 ```

 %% Cell type:code id: tags:

 ``` python
-# This is never used in this notebook and should be removed
-
-# if bias_voltage == 0:
-#     # Read the bias voltage from files, if recorded.
-#     # If not available, make use of the historical voltage the detector is running at
-#     control_filename = f'{raw_folder}/r{run:04d}/RAW-R{run:04d}-{karabo_da_control}-S00000.h5'
-#     bias_voltage = get_bias_voltage(control_filename, karabo_id_control)
-#     bias_voltage = bias_voltage if bias_voltage is not None else 300
-# print(f"Bias voltage: {bias_voltage}V")
-```
-
-%% Cell type:code id: tags:
-
-``` python
 def idx_gen(batch_start, batch_size):
    """
    This generator iterate across pixels and memory cells starting
    from batch_start until batch_start+batch_size
    """
    for c_idx in range(batch_start[0], batch_start[0]+batch_size[0]):
        for x_idx in range(batch_start[1], batch_start[1]+batch_size[1]):
            for y_idx in range(batch_start[2], batch_start[2]+batch_size[2]):
                yield(c_idx, x_idx, y_idx)
 ```

 %% Cell type:code id: tags:

 ``` python
 n_pixels_x = pixel_range[2]-pixel_range[0]
 n_pixels_y = pixel_range[3]-pixel_range[1]

 hist_data = {}
 with h5py.File(f"{in_folder}/{hist_file_template.format(module)}", 'r') as hf:
    hist_data['cellId'] = np.array(hf['cellId'][()])
    hist_data['hRange'] = np.array(hf['hRange'][()])
    hist_data['nBins'] = np.array(hf['nBins'][()])

    if cell_range == [0,0]:
        cell_range[1] = hist_data['cellId'].shape[0]

    if max_bins == 0:
        max_bins = hist_data['nBins']

    hist_data['cellId'] = hist_data['cellId'][cell_range[0]:cell_range[1]]
    hist_data['hist'] = np.array(hf['hist'][cell_range[0]:cell_range[1], :max_bins, :])

 n_cells = cell_range[1]-cell_range[0]
 hist_data['hist'] = hist_data['hist'].reshape(n_cells, max_bins, 512, 128)
 hist_data['hist'] = hist_data['hist'][:,:,pixel_range[0]:pixel_range[2],pixel_range[1]:pixel_range[3]]

 print(f'Data shape {hist_data["hist"].shape}')

 bin_edges = np.linspace(hist_data['hRange'][0], hist_data['hRange'][1], int(hist_data['nBins']+1))
 x = (bin_edges[1:] + bin_edges[:-1])[:max_bins] * 0.5


 batches = []
 for c_idx in range(0, n_cells, batch_size[0]):
    for x_idx in range(0, n_pixels_x, batch_size[1]):
        for y_idx in range(0, n_pixels_y, batch_size[2]):
            batches.append([c_idx,x_idx,y_idx])

 print(f'Number of batches {len(batches)}')
 ```

 %% Cell type:code id: tags:

 ``` python
 def fit_batch(batch_start):
    current_result = {}
    prev = None
    for c_idx, x_idx, y_idx in idx_gen(batch_start, batch_size):
        try:
            y = hist_data['hist'][c_idx, :, x_idx, y_idx]

            if prev is None:
                prev, _ = get_starting_parameters(x, y, peak_range, n_peaks=n_peaks_fit)

            if fit_range == [0, 0]:
                frange = (prev[f'g0mean']-2*prev[f'g0sigma'],
                          prev[f'g{n_peaks_fit-1}mean'] + prev[f'g{n_peaks_fit-1}sigma'])
            else:
                frange = fit_range

            set_par_limits(prev, peak_range, peak_norm_range,
                           peak_width_range, n_peaks_fit)
            minuit = fit_n_peaks(x, y, prev, frange,
                                 do_minos=do_minos, n_peaks=n_peaks_fit,
                                 fix_d01=fix_peaks, sigma_limit=sigma_limit,)

            ndof = np.rint(frange[1]-frange[0])-len(minuit.args) ## FIXME: this line is wrong if fix_peaks is True
            current_result['chi2_ndof'] = minuit.fval/ndof
            res = minuit.fitarg
            if fix_peaks : ## set g2 and g3 mean correctly
                for i in range(2,n_peaks_fit):
                    d = res[f'g1mean'] - res[f'g0mean']
                    res[f'g{i}mean'] = res[f'g0mean'] + d*i
            current_result.update(res)
            current_result.update(minuit.get_fmin())

            fit_result['chi2_ndof'][c_idx, x_idx, y_idx] = current_result['chi2_ndof']

            for key in res.keys():
                if key in fit_result:
                    fit_result[key][c_idx, x_idx, y_idx] = res[key]

            fit_result['mask'][c_idx, x_idx, y_idx] = get_mask(current_result,
                                                                    peak_lim,
                                                                    d0_lim, chi2_lim,
                                                                    peak_width_lim)
        except Exception as e:
            fit_result['mask'][c_idx, x_idx,
                                    y_idx] = BadPixelsFF.FIT_FAILED.value
            print(c_idx, x_idx, y_idx, e, traceback.format_exc())

        if fit_result['mask'][c_idx, x_idx, y_idx] == 0:
            prev = res
        else:
            prev = None
 ```

 %% Cell type:markdown id: tags:

 # Single fit ##

 Left plot shows starting parameters for fitting. Right plot shows result of the fit. Errors are evaluated with minos.

 %% Cell type:code id: tags:

 ``` python
 hist = hist_data['hist'][1,:,1, 1]
 prev, shapes = get_starting_parameters(x, hist, peak_range, n_peaks=n_peaks_fit)

 if fit_range == [0, 0]:
    frange = (prev[f'g0mean']-2*prev[f'g0sigma'],
              prev[f'g3mean'] + prev[f'g3sigma'])
 else:
    frange = fit_range

 set_par_limits(prev, peak_range, peak_norm_range,
               peak_width_range, n_peaks=n_peaks_fit)
 minuit = fit_n_peaks(x, hist, prev, frange,
                     do_minos=True, n_peaks=n_peaks_fit,
                     fix_d01=fix_peaks,
                     sigma_limit=sigma_limit,
                    )
 print (minuit.get_fmin())
 minuit.print_matrix()
 print(minuit.get_param_states())
 ```

 %% Cell type:code id: tags:

 ``` python
 res = minuit.fitarg
 if fix_peaks :
    for i in range(2,n_peaks_fit):
        d = res[f'g1mean'] - res[f'g0mean']
        res[f'g{i}mean'] = res[f'g0mean'] + d*i
 err = minuit.errors
 p = minuit.args
 ya = np.arange(0,1e4)
 y = gaussian_sum(x,n_peaks_fit, *p)
 peak_colors = ['g', 'y', 'b', 'orange']

 peak_hist = hist.copy()
 d=[]
 if sigma_limit > 0 :
    sel2 = (np.abs(x - res['g0mean']) < sigma_limit*res['g0sigma']) | \
           (np.abs(x - res['g1mean']) < sigma_limit*res['g1sigma']) | \
           (np.abs(x - res['g2mean']) < sigma_limit*res['g2sigma']) | \
           (np.abs(x - res['g3mean']) < sigma_limit*res['g3sigma'])
    peak_hist[~sel2] = 0
    valley_hist = hist.copy()
    valley_hist[sel2] = 0
    d.append({'x': x,
              'y': valley_hist.astype(np.float64),
              'y_err': np.sqrt(valley_hist),
              'drawstyle': 'bars',
              'errorstyle': 'bars',
              'transparency': '95%',
              'errorcoarsing': 3,
              'label': f'X-ray Data)'
             })
    htitle = f'X-ray Data, (μ±{sigma_limit:0.1f}σ)'
 else :
    htitle = 'X-ray Data'

 d.append({'x': x,
          'y': peak_hist.astype(np.float64),
          'y_err': np.sqrt(peak_hist),
          'drawstyle': 'bars',
          'errorstyle': 'bars',
          'errorcoarsing': 3,
          'label': htitle,
         }
        )
 d.append({'x': x,
          'y': y,
          'y2': (hist-y)/np.sqrt(hist),
          'drawstyle':'line',
          'drawstyle2': 'steps-mid',
          'label': 'Fit'
         }
        )

 for i in range(n_peaks_fit):
    d.append({'x': x,
             'y': gaussian(x, res[f'g{i}n'], res[f'g{i}mean'], res[f'g{i}sigma']),
             'drawstyle':'line',
             'color': peak_colors[i],
             })
    d.append({'x': np.full_like(ya, res[f'g{i}mean']),
              'y': ya,
              'drawstyle': 'line',
              'linestyle': 'dashed',
              'color': peak_colors[i],
              'label': f'peak {i} = {res[f"g{i}mean"]:0.1f} $ \pm $ {err[f"g{i}mean"]:0.2f} ADU' })
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, (ax1, ax2) = plt.subplots(1, 2)
 fig.set_size_inches(16, 7)
 for i, shape in enumerate(shapes):
    idx = shape[3]
    ax1.errorbar(
        x[idx], hist[idx],
        np.sqrt(hist[idx]),
        marker='+', ls='',
    )
    yg = gaussian(x[idx], *shape[:3])
    l = f'Peak {i}: {shape[1]:0.1f} $ \pm $ {shape[2]:0.2f} ADU'
    ax1.plot(x[idx], yg, label=l)
 ax1.grid(True)
 ax1.set_xlabel("Signal [ADU]")
 ax1.set_ylabel("Counts")
 ax1.legend(ncol=2)

 _ = xana.simplePlot(
    d,
    use_axis=ax2,
    x_label='Signal [ADU]',
    y_label='Counts',
    secondpanel=True, y_log=False,
    x_range=(frange[0], frange[1]),
    y_range=(1., np.max(hist)*1.6),
    legend='top-left-frame-ncol2',
 )

 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## All fits ##

 %% Cell type:code id: tags:

 ``` python
 # Allocate memory for fit results
 fit_result = {}
 keys = list(minuit.fitarg.keys())
 keys = [x for x in keys if 'limit_' not in x and 'fix_' not in x]
 keys += ['chi2_ndof', 'mask', 'gain']
 for key in keys:
    dtype = 'f4'
    if key == 'mask':
        dtype = 'i4'
    fit_result[key] = sharedmem.empty([n_cells, n_pixels_x, n_pixels_y], dtype=dtype)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Perform fitting
 with Pool() as pool:
    const_out = pool.map(fit_batch, batches)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate bad pixels
 fit_result['gain'] = (fit_result['g1mean'] - fit_result['g0mean'])/photon_energy

 # Calculate histogram width and evaluate cut
 h_sums = np.sum(hist_data['hist'], axis=1)
 hist_norm = hist_data['hist'] / h_sums[:, None, :, :]
 hist_mean = np.sum(hist_norm[:, :max_bins, ...] *
                   x[None, :, None, None], axis=1)
 hist_sqr = (x[None, :, None, None] - hist_mean[:, None, ...])**2
 hist_std = np.sqrt(np.sum(hist_norm[:, :max_bins, ...] * hist_sqr, axis=1))

 fit_result['mask'][hist_std<intensity_lim] |= BadPixelsFF.NO_ENTRY.value

 # Bad pixel on gain deviation
 gains = np.copy(fit_result['gain'])
 gains[fit_result['mask']>0] = np.nan
 gain_mean = np.nanmean(gains, axis=(1,2))

 fit_result['mask'][fit_result['gain'] > gain_mean[:,None,None]*gain_lim[1] ] |=  BadPixelsFF.GAIN_DEVIATION.value
 fit_result['mask'][fit_result['gain'] < gain_mean[:,None,None]*gain_lim[0] ] |=  BadPixelsFF.GAIN_DEVIATION.value
 ```

 %% Cell type:code id: tags:

 ``` python
 # Save fit results
 os.makedirs(out_folder, exist_ok=True)
 out_name = f'{out_folder}/fits_m{module:02d}.h5'
 print(f'Save to file: {out_name}')
 save_dict_to_hdf5({'data': fit_result}, out_name)
 ```

 %% Cell type:markdown id: tags:

 ## Summary across cells ##

 %% Cell type:code id: tags:

 ``` python
 labels = [
    "Noise peak [ADU]",
    "First photon peak [ADU]",
    f"gain [ADU/keV] $\gamma$={photon_energy} [keV]",
    "$\chi^2$/nDOF",
    "Fraction of bad pixels",
 ]

 for i, key in enumerate(['g0mean', 'g1mean', 'gain', 'chi2_ndof', 'mask']):
    fig = plt.figure(figsize=(20,5))
    ax = fig.add_subplot(121)
    data = fit_result[key]
    if key == 'mask':
        data = data > 0
        vmin, vmax = [0, 1]
    else:
        vmin, vmax = get_range(data, 5)
    _ = heatmapPlot(
        np.mean(data, axis=0).T,
        add_panels=False, cmap='viridis', use_axis=ax,
        vmin=vmin, vmax=vmax, lut_label=labels[i]
    )

    if key != 'mask':
        vmin, vmax = get_range(data, 7)
        ax = fig.add_subplot(122)
        _ = xana.histPlot(
            ax, data.flatten(),
            bins=45,range=[vmin, vmax],
            log=True,color='red',histtype='stepfilled'
        )
        ax.set_xlabel(labels[i])
        ax.set_ylabel("Counts")
 ```

 %% Cell type:markdown id: tags:

 ## histograms of fit parameters ##

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 5))
 ax = fig.add_subplot(111)
 a = ax.hist(hist_std.flatten(), bins=100, range=(0,100) )
 ax.plot([intensity_lim, intensity_lim], [0, np.nanmax(a[0])], linewidth=1.5, color='red' )
 ax.set_xlabel('Histogram width [ADU]', fontsize=14)
 ax.set_ylabel('Number of histograms', fontsize=14)
 ax.set_title(f'{hist_std[hist_std<intensity_lim].shape[0]} histograms below threshold in {intensity_lim} ADU',
              fontsize=14, fontweight='bold')
 ax.grid()
 ax.set_yscale('log')
 ```

 %% Cell type:code id: tags:

 ``` python
 def plot_par_distr(par):
    fig = plt.figure(figsize=(16, 5))
    sel = fit_result['mask'] == 0

    for i in range(n_peaks_fit) :
        data=fit_result[f"g{i}{par}"]
        plt_range=(-1,50)
        if par =='mean':
            plt_range=[peak_range[i][0] ,peak_range[i][1]]

        num_bins = int(plt_range[1] - plt_range[0])
        ax = fig.add_subplot(1,n_peaks_fit,i+1)
        _ = xana.histPlot(ax,data.flatten(),
                          bins= num_bins,range=plt_range,
                          log=True,color='red',
                          label='all fits',)

        a = ax.hist(data[sel].flatten(),
                    bins=num_bins, range=plt_range,
                    log=True,color='g',
                    label='good fits only',
                   )
        ax.set_xlabel(f"g{i} {par} [ADU]")
        ax.legend()

 plot_par_distr('mean')
 plot_par_distr('sigma')
 ```

 %% Cell type:code id: tags:

 ``` python
 sel = fit_result['mask'] == 0

 dsets = {'d01 [ADU]':fit_result[f"g1mean"]-fit_result[f"g0mean"],
         'gain [ADU/keV]':fit_result[f"gain"],
         'gain relative to module mean':fit_result[f"gain"]/np.nanmean(gain_mean),
        }
 fig = plt.figure(figsize=(16,5))
 for i, (par, data) in enumerate(dsets.items()):
    ax = fig.add_subplot(1, 3, i+1)
    plt_range=get_range(data, 10)
    num_bins = 100
    _ = xana.histPlot(ax,data.flatten(),
                      bins= num_bins,range=plt_range,
                      log=True,color='red',
                      label='all fits',)

    a = ax.hist(data[sel].flatten(),
                bins=num_bins, range=plt_range,
                log=True,color='g',
                label='good fits only',
               )
    ax.set_xlabel(f"{par}")
    ax.legend()
    if 'd01' in par :
        ax.axvline(d0_lim[0])
        ax.axvline(d0_lim[1])
    if 'rel' in par :
        ax.axvline(gain_lim[0])
        ax.axvline(gain_lim[1])
 ```

 %% Cell type:markdown id: tags:

 ## Summary across pixels ##

 Mean and median values are calculated across all pixels for each memory cell.

 %% Cell type:code id: tags:

 ``` python
 def plot_error_band(key, x, ax):

    cdata = np.copy(fit_result[key])
    cdata[fit_result['mask']>0] = np.nan

    mean = np.nanmean(cdata, axis=(1,2))
    median = np.nanmedian(cdata, axis=(1,2))
    std = np.nanstd(cdata, axis=(1,2))
    mad = np.nanmedian(np.abs(cdata - median[:,None,None]), axis=(1,2))

    ax.plot(x, mean, 'k', color='#3F7F4C', label=" mean value ")
    ax.plot(x, median, 'o', color='red', label=" median value ")
    ax.fill_between(x, mean-std, mean+std,
                     alpha=0.6, edgecolor='#3F7F4C', facecolor='#7EFF99',
                     linewidth=1, linestyle='dashdot', antialiased=True,
                     label=" mean value $ \pm $ std ")

    ax.fill_between(x, median-mad, median+mad,
                     alpha=0.3, edgecolor='red', facecolor='red',
                     linewidth=1, linestyle='dashdot', antialiased=True,
                     label=" median value $ \pm $ mad ")

    if f'error_{key}' in fit_result:
        cerr = np.copy(fit_result[f'error_{key}'])
        cerr[fit_result['mask']>0] = np.nan

        meanerr = np.nanmean(cerr, axis=(1,2))
        ax.fill_between(x, mean-meanerr, mean+meanerr,
                 alpha=0.6, edgecolor='#089FFF', facecolor='#089FFF',
                 linewidth=1, linestyle='dashdot', antialiased=True,
                 label=" mean fit error ")


 x = np.linspace(*cell_range, n_cells)

 for i, key in enumerate(['g0mean', 'g1mean', 'gain', 'chi2_ndof']):

    fig = plt.figure(figsize=(10, 5))
    ax = fig.add_subplot(111)
    plot_error_band(key, x, ax)

    ax.set_xlabel('Memory Cell ID', fontsize=14)
    ax.set_ylabel(labels[i], fontsize=14)
    ax.grid()
    ax.legend()
 ```

 %% Cell type:markdown id: tags:

 ## Cut flow ##

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots()
 fig.set_size_inches(10, 5)

 n_bars = 8
 x = np.arange(n_bars)
 width = 0.3

 msk = fit_result['mask']
 n_fits = np.prod(msk.shape)
 y = [any_in(msk, BadPixelsFF.FIT_FAILED.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value
           | BadPixelsFF.NO_ENTRY.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value
           | BadPixelsFF.NO_ENTRY.value| BadPixelsFF.GAIN_DEVIATION.value)
    ]

 y2 = [any_in(msk, BadPixelsFF.FIT_FAILED.value),
     any_in(msk, BadPixelsFF.ACCURATE_COVAR.value),
     any_in(msk, BadPixelsFF.CHI2_THRESHOLD.value),
     any_in(msk, BadPixelsFF.GAIN_THRESHOLD.value),
     any_in(msk, BadPixelsFF.NOISE_PEAK_THRESHOLD.value),
     any_in(msk, BadPixelsFF.PEAK_WIDTH_THRESHOLD.value),
     any_in(msk, BadPixelsFF.NO_ENTRY.value),
     any_in(msk, BadPixelsFF.GAIN_DEVIATION.value)
    ]

 y = (1 - np.sum(y, axis=(1,2,3))/n_fits)*100
 y2 = (1 - np.sum(y2, axis=(1,2,3))/n_fits)*100

 labels = ['Fit failes',
         'Accurate covar',
         'Chi2/nDOF',
         'Gain',
         'Noise peak',
         'Peak width',
         'No Entry',
         'Gain deviation']

 ax.bar(x, y2, width, label='Only this cut')
 ax.bar(x, y, width, label='Cut flow')
 ax.set_xticks(x)
 ax.set_xticklabels(labels, rotation=90)
 ax.set_ylim(y[5]-0.5, 100)
 ax.grid(True)
 ax.legend()
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 # Gain Characterization #


 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202030/p900138/scratch/karnem/r0203_r0204_v01/" # the folder to read histograms from, required
 out_folder = ""  # the folder to output to, required
 hist_file_template = "hists_m{:02d}_sum.h5" # the template to use to access histograms
 modules = [10] # modules to correct, set to -1 for all, range allowed

 raw_folder = "/gpfs/exfel/exp/MID/202030/p900137/raw" # Path to raw image data used to create histograms
 proc_folder = "" # Path to corrected image data used to create histograms

 run = 449 # of the run of image data used to create histograms

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information
 karabo_id_control = "MID_IRU_AGIPD1M1" # karabo-id for control device
 karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milli seconds
 local_output = True # output constants locally
 db_output = False # output constants to database

 # Fit parameters
 peak_range = [-30, 30, 35, 70, 95, 135, 145, 220] # where to look for the peaks, [a0, b0, a1, b1, ...] exactly 8 elements
 peak_width_range = [0, 30, 0, 35, 0, 40, 0, 45] # fit limits on the peak widths, [a0, b0, a1, b1, ...] exactly 8 elements
 peak_norm_range = [0.0, -1, 0, -1, 0, -1, 0, -1] #

 # Bad-pixel thresholds (gain evaluation error). Contribute to BadPixel bit "Gain_Evaluation_Error"
 peak_lim = [-30, 30] # Limit of position of noise peak
 d0_lim = [10, 80] # hard limits for distance between noise and first peak
 peak_width_lim = [0.9, 1.55, 0.95, 1.65] # hard limits on the peak widths for first and second peak, in units of the noise peak. 4 parameters.
 chi2_lim = [0, 3.0] # Hard limit on chi2/nDOF value

 intensity_lim = 15 # Threshold on standard deviation of a histogram in ADU. Contribute to BadPixel bit "No_Entry"
 gain_lim = [0.8, 1.2] # Threshold on gain in relative number. Contribute to BadPixel bit "Gain_deviation"

 cell_range = [1, 3] # range of cell to be considered, [0,0] for all
 pixel_range = [0, 0, 32, 32] # range of pixels x1,y1,x2,y2 to consider [0,0,512,128] for all
 max_bins = 0 # Maximum number of bins to consider, 0 for all bins
 batch_size = [1, 8, 8] # batch size: [cell,x,y]
 fit_range = [0, 0] # range of a histogram considered for fitting in ADU. Dynamically evaluated in case [0,0]
 n_peaks_fit = 4 # Number of gaussian peaks to fit including noise peak
 fix_peaks = False # Fix distance between photon peaks
 do_minos = False # This is additional feature of minuit to evaluate errors.
 sigma_limit = 0. # If >0, repeat fit keeping only bins within mu +- sigma_limit*sigma

 # Detector conditions
 max_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300  # Bias voltage
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
 photon_energy = 8.05 # photon energy in keV
 integration_time = -1 # integration time, negative values for auto-detection.
 ```

 %% Cell type:code id: tags:

 ``` python
 import glob
 import os
 import traceback
 import warnings
 from multiprocessing import Pool

 import h5py
 import matplotlib.pyplot as plt
 import numpy as np
 import sharedmem
 import XFELDetAna.xfelpyanatools as xana
-from cal_tools.agipdlib import get_bias_voltage
 from cal_tools.agipdutils_ff import (
    BadPixelsFF,
    any_in,
    fit_n_peaks,
    gaussian,
    gaussian_sum,
    get_mask,
    get_starting_parameters,
    set_par_limits,
 )
 from cal_tools.ana_tools import get_range, save_dict_to_hdf5
 from iminuit import Minuit
 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.simpleplot import simplePlot

 # %load_ext autotime
 %matplotlib inline
 warnings.filterwarnings('ignore')
 ```

 %% Cell type:code id: tags:

 ``` python
 peak_range = np.reshape(peak_range,(4,2))
 peak_width_range = np.reshape(peak_width_range,(4,2))
 peak_width_lim = np.reshape(peak_width_lim,(2,2))
 peak_norm_range = [None if x == -1 else x for x in peak_norm_range]
 peak_norm_range = np.reshape(peak_norm_range,(4,2))
 module = modules[0]
 ```

 %% Cell type:code id: tags:

 ``` python
-# This is never used in this notebook and should be removed
-
-# if bias_voltage == 0:
-#     # Read the bias voltage from files, if recorded.
-#     # If not available, make use of the historical voltage the detector is running at
-#     control_filename = f'{raw_folder}/r{run:04d}/RAW-R{run:04d}-{karabo_da_control}-S00000.h5'
-#     bias_voltage = get_bias_voltage(control_filename, karabo_id_control)
-#     bias_voltage = bias_voltage if bias_voltage is not None else 300
-# print(f"Bias voltage: {bias_voltage}V")
-```
-
-%% Cell type:code id: tags:
-
-``` python
 def idx_gen(batch_start, batch_size):
    """
    This generator iterate across pixels and memory cells starting
    from batch_start until batch_start+batch_size
    """
    for c_idx in range(batch_start[0], batch_start[0]+batch_size[0]):
        for x_idx in range(batch_start[1], batch_start[1]+batch_size[1]):
            for y_idx in range(batch_start[2], batch_start[2]+batch_size[2]):
                yield(c_idx, x_idx, y_idx)
 ```

 %% Cell type:code id: tags:

 ``` python
 n_pixels_x = pixel_range[2]-pixel_range[0]
 n_pixels_y = pixel_range[3]-pixel_range[1]

 hist_data = {}
 with h5py.File(f"{in_folder}/{hist_file_template.format(module)}", 'r') as hf:
    hist_data['cellId'] = np.array(hf['cellId'][()])
    hist_data['hRange'] = np.array(hf['hRange'][()])
    hist_data['nBins'] = np.array(hf['nBins'][()])

    if cell_range == [0,0]:
        cell_range[1] = hist_data['cellId'].shape[0]

    if max_bins == 0:
        max_bins = hist_data['nBins']

    hist_data['cellId'] = hist_data['cellId'][cell_range[0]:cell_range[1]]
    hist_data['hist'] = np.array(hf['hist'][cell_range[0]:cell_range[1], :max_bins, :])

 n_cells = cell_range[1]-cell_range[0]
 hist_data['hist'] = hist_data['hist'].reshape(n_cells, max_bins, 512, 128)
 hist_data['hist'] = hist_data['hist'][:,:,pixel_range[0]:pixel_range[2],pixel_range[1]:pixel_range[3]]

 print(f'Data shape {hist_data["hist"].shape}')

 bin_edges = np.linspace(hist_data['hRange'][0], hist_data['hRange'][1], int(hist_data['nBins']+1))
 x = (bin_edges[1:] + bin_edges[:-1])[:max_bins] * 0.5


 batches = []
 for c_idx in range(0, n_cells, batch_size[0]):
    for x_idx in range(0, n_pixels_x, batch_size[1]):
        for y_idx in range(0, n_pixels_y, batch_size[2]):
            batches.append([c_idx,x_idx,y_idx])

 print(f'Number of batches {len(batches)}')
 ```

 %% Cell type:code id: tags:

 ``` python
 def fit_batch(batch_start):
    current_result = {}
    prev = None
    for c_idx, x_idx, y_idx in idx_gen(batch_start, batch_size):
        try:
            y = hist_data['hist'][c_idx, :, x_idx, y_idx]

            if prev is None:
                prev, _ = get_starting_parameters(x, y, peak_range, n_peaks=n_peaks_fit)

            if fit_range == [0, 0]:
                frange = (prev[f'g0mean']-2*prev[f'g0sigma'],
                          prev[f'g{n_peaks_fit-1}mean'] + prev[f'g{n_peaks_fit-1}sigma'])
            else:
                frange = fit_range

            set_par_limits(prev, peak_range, peak_norm_range,
                           peak_width_range, n_peaks_fit)
            minuit = fit_n_peaks(x, y, prev, frange,
                                 do_minos=do_minos, n_peaks=n_peaks_fit,
                                 fix_d01=fix_peaks, sigma_limit=sigma_limit,)

            ndof = np.rint(frange[1]-frange[0])-len(minuit.args) ## FIXME: this line is wrong if fix_peaks is True
            current_result['chi2_ndof'] = minuit.fval/ndof
            res = minuit.fitarg
            if fix_peaks : ## set g2 and g3 mean correctly
                for i in range(2,n_peaks_fit):
                    d = res[f'g1mean'] - res[f'g0mean']
                    res[f'g{i}mean'] = res[f'g0mean'] + d*i
            current_result.update(res)
            current_result.update(minuit.get_fmin())

            fit_result['chi2_ndof'][c_idx, x_idx, y_idx] = current_result['chi2_ndof']

            for key in res.keys():
                if key in fit_result:
                    fit_result[key][c_idx, x_idx, y_idx] = res[key]

            fit_result['mask'][c_idx, x_idx, y_idx] = get_mask(current_result,
                                                                    peak_lim,
                                                                    d0_lim, chi2_lim,
                                                                    peak_width_lim)
        except Exception as e:
            fit_result['mask'][c_idx, x_idx,
                                    y_idx] = BadPixelsFF.FIT_FAILED.value
            print(c_idx, x_idx, y_idx, e, traceback.format_exc())

        if fit_result['mask'][c_idx, x_idx, y_idx] == 0:
            prev = res
        else:
            prev = None
 ```

 %% Cell type:markdown id: tags:

 # Single fit ##

 Left plot shows starting parameters for fitting. Right plot shows result of the fit. Errors are evaluated with minos.

 %% Cell type:code id: tags:

 ``` python
 hist = hist_data['hist'][1,:,1, 1]
 prev, shapes = get_starting_parameters(x, hist, peak_range, n_peaks=n_peaks_fit)

 if fit_range == [0, 0]:
    frange = (prev[f'g0mean']-2*prev[f'g0sigma'],
              prev[f'g3mean'] + prev[f'g3sigma'])
 else:
    frange = fit_range

 set_par_limits(prev, peak_range, peak_norm_range,
               peak_width_range, n_peaks=n_peaks_fit)
 minuit = fit_n_peaks(x, hist, prev, frange,
                     do_minos=True, n_peaks=n_peaks_fit,
                     fix_d01=fix_peaks,
                     sigma_limit=sigma_limit,
                    )
 print (minuit.get_fmin())
 minuit.print_matrix()
 print(minuit.get_param_states())
 ```

 %% Cell type:code id: tags:

 ``` python
 res = minuit.fitarg
 if fix_peaks :
    for i in range(2,n_peaks_fit):
        d = res[f'g1mean'] - res[f'g0mean']
        res[f'g{i}mean'] = res[f'g0mean'] + d*i
 err = minuit.errors
 p = minuit.args
 ya = np.arange(0,1e4)
 y = gaussian_sum(x,n_peaks_fit, *p)
 peak_colors = ['g', 'y', 'b', 'orange']

 peak_hist = hist.copy()
 d=[]
 if sigma_limit > 0 :
    sel2 = (np.abs(x - res['g0mean']) < sigma_limit*res['g0sigma']) | \
           (np.abs(x - res['g1mean']) < sigma_limit*res['g1sigma']) | \
           (np.abs(x - res['g2mean']) < sigma_limit*res['g2sigma']) | \
           (np.abs(x - res['g3mean']) < sigma_limit*res['g3sigma'])
    peak_hist[~sel2] = 0
    valley_hist = hist.copy()
    valley_hist[sel2] = 0
    d.append({'x': x,
              'y': valley_hist.astype(np.float64),
              'y_err': np.sqrt(valley_hist),
              'drawstyle': 'bars',
              'errorstyle': 'bars',
              'transparency': '95%',
              'errorcoarsing': 3,
              'label': f'X-ray Data)'
             })
    htitle = f'X-ray Data, (μ±{sigma_limit:0.1f}σ)'
 else :
    htitle = 'X-ray Data'

 d.append({'x': x,
          'y': peak_hist.astype(np.float64),
          'y_err': np.sqrt(peak_hist),
          'drawstyle': 'bars',
          'errorstyle': 'bars',
          'errorcoarsing': 3,
          'label': htitle,
         }
        )
 d.append({'x': x,
          'y': y,
          'y2': (hist-y)/np.sqrt(hist),
          'drawstyle':'line',
          'drawstyle2': 'steps-mid',
          'label': 'Fit'
         }
        )

 for i in range(n_peaks_fit):
    d.append({'x': x,
             'y': gaussian(x, res[f'g{i}n'], res[f'g{i}mean'], res[f'g{i}sigma']),
             'drawstyle':'line',
             'color': peak_colors[i],
             })
    d.append({'x': np.full_like(ya, res[f'g{i}mean']),
              'y': ya,
              'drawstyle': 'line',
              'linestyle': 'dashed',
              'color': peak_colors[i],
              'label': f'peak {i} = {res[f"g{i}mean"]:0.1f} $ \pm $ {err[f"g{i}mean"]:0.2f} ADU' })
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, (ax1, ax2) = plt.subplots(1, 2)
 fig.set_size_inches(16, 7)
 for i, shape in enumerate(shapes):
    idx = shape[3]
    ax1.errorbar(
        x[idx], hist[idx],
        np.sqrt(hist[idx]),
        marker='+', ls='',
    )
    yg = gaussian(x[idx], *shape[:3])
    l = f'Peak {i}: {shape[1]:0.1f} $ \pm $ {shape[2]:0.2f} ADU'
    ax1.plot(x[idx], yg, label=l)
 ax1.grid(True)
 ax1.set_xlabel("Signal [ADU]")
 ax1.set_ylabel("Counts")
 ax1.legend(ncol=2)

 _ = xana.simplePlot(
    d,
    use_axis=ax2,
    x_label='Signal [ADU]',
    y_label='Counts',
    secondpanel=True, y_log=False,
    x_range=(frange[0], frange[1]),
    y_range=(1., np.max(hist)*1.6),
    legend='top-left-frame-ncol2',
 )

 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## All fits ##

 %% Cell type:code id: tags:

 ``` python
 # Allocate memory for fit results
 fit_result = {}
 keys = list(minuit.fitarg.keys())
 keys = [x for x in keys if 'limit_' not in x and 'fix_' not in x]
 keys += ['chi2_ndof', 'mask', 'gain']
 for key in keys:
    dtype = 'f4'
    if key == 'mask':
        dtype = 'i4'
    fit_result[key] = sharedmem.empty([n_cells, n_pixels_x, n_pixels_y], dtype=dtype)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Perform fitting
 with Pool() as pool:
    const_out = pool.map(fit_batch, batches)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate bad pixels
 fit_result['gain'] = (fit_result['g1mean'] - fit_result['g0mean'])/photon_energy

 # Calculate histogram width and evaluate cut
 h_sums = np.sum(hist_data['hist'], axis=1)
 hist_norm = hist_data['hist'] / h_sums[:, None, :, :]
 hist_mean = np.sum(hist_norm[:, :max_bins, ...] *
                   x[None, :, None, None], axis=1)
 hist_sqr = (x[None, :, None, None] - hist_mean[:, None, ...])**2
 hist_std = np.sqrt(np.sum(hist_norm[:, :max_bins, ...] * hist_sqr, axis=1))

 fit_result['mask'][hist_std<intensity_lim] |= BadPixelsFF.NO_ENTRY.value

 # Bad pixel on gain deviation
 gains = np.copy(fit_result['gain'])
 gains[fit_result['mask']>0] = np.nan
 gain_mean = np.nanmean(gains, axis=(1,2))

 fit_result['mask'][fit_result['gain'] > gain_mean[:,None,None]*gain_lim[1] ] |=  BadPixelsFF.GAIN_DEVIATION.value
 fit_result['mask'][fit_result['gain'] < gain_mean[:,None,None]*gain_lim[0] ] |=  BadPixelsFF.GAIN_DEVIATION.value
 ```

 %% Cell type:code id: tags:

 ``` python
 # Save fit results
 os.makedirs(out_folder, exist_ok=True)
 out_name = f'{out_folder}/fits_m{module:02d}.h5'
 print(f'Save to file: {out_name}')
 save_dict_to_hdf5({'data': fit_result}, out_name)
 ```

 %% Cell type:markdown id: tags:

 ## Summary across cells ##

 %% Cell type:code id: tags:

 ``` python
 labels = [
    "Noise peak [ADU]",
    "First photon peak [ADU]",
    f"gain [ADU/keV] $\gamma$={photon_energy} [keV]",
    "$\chi^2$/nDOF",
    "Fraction of bad pixels",
 ]

 for i, key in enumerate(['g0mean', 'g1mean', 'gain', 'chi2_ndof', 'mask']):
    fig = plt.figure(figsize=(20,5))
    ax = fig.add_subplot(121)
    data = fit_result[key]
    if key == 'mask':
        data = data > 0
        vmin, vmax = [0, 1]
    else:
        vmin, vmax = get_range(data, 5)
    _ = heatmapPlot(
        np.mean(data, axis=0).T,
        add_panels=False, cmap='viridis', use_axis=ax,
        vmin=vmin, vmax=vmax, lut_label=labels[i]
    )

    if key != 'mask':
        vmin, vmax = get_range(data, 7)
        ax = fig.add_subplot(122)
        _ = xana.histPlot(
            ax, data.flatten(),
            bins=45,range=[vmin, vmax],
            log=True,color='red',histtype='stepfilled'
        )
        ax.set_xlabel(labels[i])
        ax.set_ylabel("Counts")
 ```

 %% Cell type:markdown id: tags:

 ## histograms of fit parameters ##

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 5))
 ax = fig.add_subplot(111)
 a = ax.hist(hist_std.flatten(), bins=100, range=(0,100) )
 ax.plot([intensity_lim, intensity_lim], [0, np.nanmax(a[0])], linewidth=1.5, color='red' )
 ax.set_xlabel('Histogram width [ADU]', fontsize=14)
 ax.set_ylabel('Number of histograms', fontsize=14)
 ax.set_title(f'{hist_std[hist_std<intensity_lim].shape[0]} histograms below threshold in {intensity_lim} ADU',
              fontsize=14, fontweight='bold')
 ax.grid()
 ax.set_yscale('log')
 ```

 %% Cell type:code id: tags:

 ``` python
 def plot_par_distr(par):
    fig = plt.figure(figsize=(16, 5))
    sel = fit_result['mask'] == 0

    for i in range(n_peaks_fit) :
        data=fit_result[f"g{i}{par}"]
        plt_range=(-1,50)
        if par =='mean':
            plt_range=[peak_range[i][0] ,peak_range[i][1]]

        num_bins = int(plt_range[1] - plt_range[0])
        ax = fig.add_subplot(1,n_peaks_fit,i+1)
        _ = xana.histPlot(ax,data.flatten(),
                          bins= num_bins,range=plt_range,
                          log=True,color='red',
                          label='all fits',)

        a = ax.hist(data[sel].flatten(),
                    bins=num_bins, range=plt_range,
                    log=True,color='g',
                    label='good fits only',
                   )
        ax.set_xlabel(f"g{i} {par} [ADU]")
        ax.legend()

 plot_par_distr('mean')
 plot_par_distr('sigma')
 ```

 %% Cell type:code id: tags:

 ``` python
 sel = fit_result['mask'] == 0

 dsets = {'d01 [ADU]':fit_result[f"g1mean"]-fit_result[f"g0mean"],
         'gain [ADU/keV]':fit_result[f"gain"],
         'gain relative to module mean':fit_result[f"gain"]/np.nanmean(gain_mean),
        }
 fig = plt.figure(figsize=(16,5))
 for i, (par, data) in enumerate(dsets.items()):
    ax = fig.add_subplot(1, 3, i+1)
    plt_range=get_range(data, 10)
    num_bins = 100
    _ = xana.histPlot(ax,data.flatten(),
                      bins= num_bins,range=plt_range,
                      log=True,color='red',
                      label='all fits',)

    a = ax.hist(data[sel].flatten(),
                bins=num_bins, range=plt_range,
                log=True,color='g',
                label='good fits only',
               )
    ax.set_xlabel(f"{par}")
    ax.legend()
    if 'd01' in par :
        ax.axvline(d0_lim[0])
        ax.axvline(d0_lim[1])
    if 'rel' in par :
        ax.axvline(gain_lim[0])
        ax.axvline(gain_lim[1])
 ```

 %% Cell type:markdown id: tags:

 ## Summary across pixels ##

 Mean and median values are calculated across all pixels for each memory cell.

 %% Cell type:code id: tags:

 ``` python
 def plot_error_band(key, x, ax):

    cdata = np.copy(fit_result[key])
    cdata[fit_result['mask']>0] = np.nan

    mean = np.nanmean(cdata, axis=(1,2))
    median = np.nanmedian(cdata, axis=(1,2))
    std = np.nanstd(cdata, axis=(1,2))
    mad = np.nanmedian(np.abs(cdata - median[:,None,None]), axis=(1,2))

    ax.plot(x, mean, 'k', color='#3F7F4C', label=" mean value ")
    ax.plot(x, median, 'o', color='red', label=" median value ")
    ax.fill_between(x, mean-std, mean+std,
                     alpha=0.6, edgecolor='#3F7F4C', facecolor='#7EFF99',
                     linewidth=1, linestyle='dashdot', antialiased=True,
                     label=" mean value $ \pm $ std ")

    ax.fill_between(x, median-mad, median+mad,
                     alpha=0.3, edgecolor='red', facecolor='red',
                     linewidth=1, linestyle='dashdot', antialiased=True,
                     label=" median value $ \pm $ mad ")

    if f'error_{key}' in fit_result:
        cerr = np.copy(fit_result[f'error_{key}'])
        cerr[fit_result['mask']>0] = np.nan

        meanerr = np.nanmean(cerr, axis=(1,2))
        ax.fill_between(x, mean-meanerr, mean+meanerr,
                 alpha=0.6, edgecolor='#089FFF', facecolor='#089FFF',
                 linewidth=1, linestyle='dashdot', antialiased=True,
                 label=" mean fit error ")


 x = np.linspace(*cell_range, n_cells)

 for i, key in enumerate(['g0mean', 'g1mean', 'gain', 'chi2_ndof']):

    fig = plt.figure(figsize=(10, 5))
    ax = fig.add_subplot(111)
    plot_error_band(key, x, ax)

    ax.set_xlabel('Memory Cell ID', fontsize=14)
    ax.set_ylabel(labels[i], fontsize=14)
    ax.grid()
    ax.legend()
 ```

 %% Cell type:markdown id: tags:

 ## Cut flow ##

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots()
 fig.set_size_inches(10, 5)

 n_bars = 8
 x = np.arange(n_bars)
 width = 0.3

 msk = fit_result['mask']
 n_fits = np.prod(msk.shape)
 y = [any_in(msk, BadPixelsFF.FIT_FAILED.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value
           | BadPixelsFF.NO_ENTRY.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value
           | BadPixelsFF.NO_ENTRY.value| BadPixelsFF.GAIN_DEVIATION.value)
    ]

 y2 = [any_in(msk, BadPixelsFF.FIT_FAILED.value),
     any_in(msk, BadPixelsFF.ACCURATE_COVAR.value),
     any_in(msk, BadPixelsFF.CHI2_THRESHOLD.value),
     any_in(msk, BadPixelsFF.GAIN_THRESHOLD.value),
     any_in(msk, BadPixelsFF.NOISE_PEAK_THRESHOLD.value),
     any_in(msk, BadPixelsFF.PEAK_WIDTH_THRESHOLD.value),
     any_in(msk, BadPixelsFF.NO_ENTRY.value),
     any_in(msk, BadPixelsFF.GAIN_DEVIATION.value)
    ]

 y = (1 - np.sum(y, axis=(1,2,3))/n_fits)*100
 y2 = (1 - np.sum(y2, axis=(1,2,3))/n_fits)*100

 labels = ['Fit failes',
         'Accurate covar',
         'Chi2/nDOF',
         'Gain',
         'Noise peak',
         'Peak width',
         'No Entry',
         'Gain deviation']

 ax.bar(x, y2, width, label='Only this cut')
 ax.bar(x, y, width, label='Cut flow')
 ax.set_xticks(x)
 ax.set_xticklabels(labels, rotation=90)
 ax.set_ylim(y[5]-0.5, 100)
 ax.grid(True)
 ax.legend()
 plt.show()
 ```

--- a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Jungfrau Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the Jungfrau Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202130/p900204/raw"  # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/remove"  # the folder to output to, required
 run = 112  # run to process, required
 sequences = [-1]  # sequences to correct, set to [-1] for all, range allowed
 sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel

 # Parameters used to access raw data.
 karabo_id = "SPB_IRDA_JF4M"  # karabo prefix of Jungfrau devices
 karabo_da = ['JNGFR01', 'JNGFR02', 'JNGFR03', 'JNGFR04', 'JNGFR05', 'JNGFR06', 'JNGFR07', 'JNGFR08']  # data aggregators
 receiver_template = "JNGFR{:02d}"  # Detector receiver template for accessing raw data files. e.g. "JNGFR{:02d}"
 instrument_source_template = '{}/DET/{}:daqOutput'  # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = '{}/DET/CONTROL'  # template for control source name (filled with karabo_id_control)
 karabo_id_control = ""  # if control is on a different ID, set to empty string if it is the same a karabo-id

 # Parameters for calibration database.
 use_dir_creation_date = True  # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8017#8025" # the database interface to use
 cal_db_timeout = 180000  # timeout on caldb requests

 # Parameters affecting corrected data.
 overwrite = True  # set to True if existing data should be overwritten
 relative_gain = True  # do relative gain correction
 plt_images = 100  # Number of images to plot after applying selected corrections.
 limit_images = 0  # ONLY FOR TESTING. process only first N images, Use 0 to process all.

 # Parameters for retrieving calibration constants
 manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values
 integration_time = 4.96  # integration time in us, will be overwritten by value in file
 gain_setting = 0  # 0 for dynamic gain, 1 for dynamic HG0, will be overwritten by value in file
 gain_mode = 0  # 0 for runs with dynamic gain setting, 1 for fixgain. It will be overwritten by value in file, if manual_slow_data is set to True.
 mem_cells = 0  # leave memory cells equal 0, as it is saved in control information starting 2019.
 bias_voltage = 180  # will be overwritten by value in file

 # Parameters for plotting
 skip_plots = False  # exit after writing corrected files


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import multiprocessing
 import warnings
 from functools import partial
 from pathlib import Path

 import h5py
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
 import pasha as psh
 import tabulate
 from IPython.display import Latex, Markdown, display
 from extra_data import H5File, RunDirectory
 from matplotlib.colors import LogNorm

 from cal_tools import h5_copy_except
 from cal_tools.jungfraulib import JungfrauCtrl
 from cal_tools.enums import BadPixels
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    get_constant_from_db_and_time,
    get_dir_creation_date,
+    get_pdu_from_db,
    map_seq_files,
    write_compressed_frames,
 )
 from iCalibrationDB import Conditions, Constants

 warnings.filterwarnings('ignore')

 matplotlib.use('agg')
 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_dc = RunDirectory(in_folder / f'r{run:04d}')
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)

 if out_folder.exists() and not overwrite:
    raise AttributeError("Output path exists! Exiting")
 else:
    out_folder.mkdir(parents=True, exist_ok=True)

 print(f"Run is: {run}")
 print(f"Instrument H5File source: {instrument_src}")
 print(f"Process modules: {karabo_da}")

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time} as creation time")

 if karabo_id_control == "":
    karabo_id_control = karabo_id
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read available sequence files to correct.
 mapped_files, num_seq_files = map_seq_files(
    run_dc, karabo_id, karabo_da, sequences)

 if not len(mapped_files):
    raise IndexError(
        "No sequence files available to correct for the selected sequences and karabo_da.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Processing a total of {num_seq_files} sequence files")
 table = []
 fi = 0
 for kda, sfiles in mapped_files.items():
    for k, f in enumerate(sfiles):
        if k == 0:
            table.append((fi, kda, k, f))
        else:
            table.append((fi, "", k,  f))
        fi += 1
 md = display(Latex(tabulate.tabulate(
    table, tablefmt='latex',
    headers=["#", "module", "# module", "file"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 ctrl_src = ctrl_source_template.format(karabo_id_control)
 ctrl_data = JungfrauCtrl(run_dc, ctrl_src)
 try:
    this_run_mcells, sc_start = ctrl_data.get_memory_cells()

    if this_run_mcells == 1:
        memory_cells = 1
        print("Run is in single cell mode.\n"
              f"Storage cell start: {sc_start:02d}")
    else:
        memory_cells = 16
        print(f"Run is in burst mode.\n"
              f"Storage cell start: {sc_start:02d}")
 except KeyError as e:
    print("WARNING: KeyError while reading number of memory cells.")
    if mem_cells == 0:
        memory_cells = 1
    else:
        memory_cells = mem_cells
    print(f"WARNING: Set memory cells to {memory_cells}, as "
          "it is not saved in control information.")

 if not manual_slow_data:
    integration_time = ctrl_data.get_integration_time()
    bias_voltage = ctrl_data.get_bias_voltage()
    gain_setting = ctrl_data.get_gain_setting()
    gain_mode = ctrl_data.get_gain_mode()

 print(f"Integration time is {integration_time} us")
 print(f"Gain setting is {gain_setting} (run settings: "
      f"{ctrl_data.run_settings.value if ctrl_data.run_settings else ctrl_data.run_settings})")  # noqa
 print(f"Gain mode is {gain_mode}")
 print(f"Bias voltage is {bias_voltage} V")
 print(f"Number of memory cells are {memory_cells}")
 ```

 %% Cell type:markdown id: tags:

 ### Retrieving calibration constants ###

 %% Cell type:code id: tags:

 ``` python
 condition = Conditions.Dark.jungfrau(
    memory_cells=memory_cells,
    bias_voltage=bias_voltage,
    integration_time=integration_time,
    gain_setting=gain_setting,
    gain_mode=gain_mode,
 )

-
 def get_constants_for_module(karabo_da: str):
    """ Get calibration constants for given module of Jungfrau

    :return:
        offset_map (offset map),
        mask (mask of bad pixels),
        gain_map (map of relative gain factors),
        db_module (name of DB module),
        when (dictionaty: constant - creation time)
    """

    when = {}
    retrieval_function = partial(
        get_constant_from_db_and_time,
        karabo_id=karabo_id,
        karabo_da=karabo_da,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time,
        timeout=cal_db_timeout,
        print_once=False,
    )
    offset_map, when["Offset"] = retrieval_function(
        condition=condition,
        constant=Constants.jungfrau.Offset(),
        empty_constant=np.zeros((512, 1024, memory_cells, 3))
    )
    mask, when["BadPixelsDark"] = retrieval_function(
        condition=condition,
        constant=Constants.jungfrau.BadPixelsDark(),
        empty_constant=np.zeros((512, 1024, memory_cells, 3), dtype=np.uint32),
    )
    mask_ff, when["BadPixelsFF"] = retrieval_function(
        condition=condition,
        constant=Constants.jungfrau.BadPixelsFF(),
        empty_constant=None
    )
    gain_map, when["Gain"] = retrieval_function(
        condition=condition,
        constant=Constants.jungfrau.RelativeGain(),
        empty_constant=None
    )

    # combine masks
    if mask_ff is not None:
        mask |= np.moveaxis(mask_ff, 0, 1)

    if memory_cells > 1:
        # move from x, y, cell, gain to cell, x, y, gain
        offset_map = np.moveaxis(offset_map, [0, 1], [1, 2])
        mask = np.moveaxis(mask, [0, 1], [1, 2])
    else:
        offset_map = np.squeeze(offset_map)
        mask = np.squeeze(mask)

    if gain_map is not None:
        if memory_cells > 1:
            gain_map = np.moveaxis(gain_map, [0, 2], [2, 0])
            # add extra empty cell constant
            b = np.ones(((1,)+gain_map.shape[1:]))
            gain_map = np.concatenate((gain_map, b), axis=0)
        else:
            gain_map = np.moveaxis(np.squeeze(gain_map), 1, 0)

    return offset_map, mask, gain_map, karabo_da, when

 with multiprocessing.Pool() as pool:
    r = pool.map(get_constants_for_module, karabo_da)

 # Print timestamps for the retrieved constants.
 constants = {}
 for offset_map, mask, gain_map, k_da, when in r:
    print(f'Constants for module {k_da}:')
    for const in when:
        print(f'  {const} injected at {when[const]}')

    if gain_map is None:
        print("No gain map found")
        relative_gain = False

    constants[k_da] = (offset_map, mask, gain_map)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Correct a chunk of images for offset and gain
 def correct_train(wid, index, d):
    d = d.astype(np.float32)  # [cells, x, y]
    g = gain[index]

    # Jungfrau gains 0[00], 1[01], 3[11]
    g[g==3] = 2

    # Select memory cells
    if memory_cells > 1:
        """
        Even though it is correct to assume that memory cells pattern
        can be the same across all trains (for one correction run
        taken with one acquisition), it is preferred to not assume
        this to account for exceptions that can happen.
        """
        m = memcells[index].copy()
        # 255 is a cell value pointing to no cell image data (image of 0 pixels).
        # Corresponding image will be corrected with constant of cell 0. To avoid values of 0.
        # This line is depending on not storing the modified memory cells in the corrected data.
        m[m==255] = 0

        offset_map_cell = offset_map[m, ...]  # [16 + empty cell, x, y]
        mask_cell = mask[m, ...]
    else:
        offset_map_cell = offset_map
        mask_cell = mask

    # Offset correction
    offset = np.choose(g, np.moveaxis(offset_map_cell, -1, 0))

    d -= offset

    # Gain correction
    if relative_gain:
        if memory_cells > 1:
            gain_map_cell = gain_map[m, ...]
        else:
            gain_map_cell = gain_map
        cal = np.choose(g, np.moveaxis(gain_map_cell, -1, 0))
        d /= cal

    msk = np.choose(g, np.moveaxis(mask_cell, -1, 0))

    data_corr[index, ...] = d
    mask_corr[index, ...] = msk
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()

 n_cpus = multiprocessing.cpu_count()
 context = psh.context.ProcessContext(num_workers=n_cpus)
 print(f"Using {n_cpus} workers for correction.")
 ```

 %% Cell type:markdown id: tags:

 ### Correcting RAW data ###

 %% Cell type:code id: tags:

 ``` python
 fim_data = {}
 gim_data = {}
 rim_data = {}
 msk_data = {}

 # Loop over modules
 for local_karabo_da, mapped_files_module in mapped_files.items():
    instrument_src_kda = instrument_src.format(int(local_karabo_da[-2:]))
    data_path = "INSTRUMENT/"+instrument_src_kda+"/data"

    for sequence_file in mapped_files_module:  # noqa
        sequence_file = Path(sequence_file)
        seq_dc = H5File(sequence_file)

        # Save corrected data in an output file with name
        # of corresponding raw sequence file.
        ofile_name = sequence_file.name.replace("RAW", "CORR")
        out_file = out_folder / ofile_name

        # load shape of data for memory cells, and detector size (imgs, cells, x, y)
        # dshape[0] = number of available images to correct.
        dshape = seq_dc[instrument_src_kda, "data.adc"].shape

        if dshape[0] == 0:
            print(f"\t- WARNING: No image data for {ofile_name}: data shape is {dshape}")
            continue

        # load number of data available, including trains with empty data.
        n_trains = len(seq_dc.train_ids)

        n_imgs = dshape[0]

        # For testing, only correct limit_images
        if limit_images > 0:
            n_imgs = min(n_imgs, limit_images)

        print(f"\nNumber of images to correct: {n_imgs} for {ofile_name}")
        if n_trains - dshape[0] != 0:
            print(f"\t- WARNING: {sequence_file.name} has {n_trains - dshape[0]} "
                  "trains with empty data.")

        # Just in case if n_imgs is less than the chosen plt_images.
        plt_images = min(plt_images, n_imgs)

        # load constants from the constants dictionary.
        offset_map, mask, gain_map = constants[local_karabo_da]

        # Allocate shared arrays.
        data_corr = context.alloc(shape=dshape, dtype=np.float32)
        mask_corr = context.alloc(shape=dshape, dtype=np.uint32)

        step_timer.start()
        seq_dc = seq_dc.select(
            instrument_src_kda, "*", require_all=True).select_trains(np.s_[:n_imgs])

        data = seq_dc[instrument_src_kda, "data.adc"].ndarray()
        gain = seq_dc[instrument_src_kda, "data.gain"].ndarray()
        memcells = seq_dc[instrument_src_kda, "data.memoryCell"].ndarray()

        rim_data[local_karabo_da] = data[:plt_images, 0, ...].copy()

        context.map(correct_train, data)
        step_timer.done_step(f'Correction time.')

        step_timer.start()
        # Create CORR files and add corrected data sources.
        # Exclude raw data images (data/adc)
        with h5py.File(out_file, 'w') as ofile:
            # Copy RAW non-calibrated sources.
            with h5py.File(sequence_file, 'r') as sfile:
                h5_copy_except.h5_copy_except_paths(
                    sfile, ofile, [f"{data_path}/adc"])

            # Create datasets with the available corrected data
            ddset = ofile.create_dataset(
                f"{data_path}/adc",
                data=data_corr,
                chunks=((1,) + dshape[1:]),  # 1 chunk == 1 image
                dtype=np.float32,
            )

            write_compressed_frames(
                mask_corr,
                ofile,
                dataset_path=f"{data_path}/mask",
                comp_threads=n_cpus,
            )

        step_timer.done_step(f'Saving data time.')

        # Prepare plotting arrays
        step_timer.start()

        # TODO: Print out which cell is being selected for plotting
        fim_data[local_karabo_da] = data_corr[:plt_images, 0, ...].copy()
        msk_data[local_karabo_da] = mask_corr[:plt_images, 0, ...].copy()
        gim_data[local_karabo_da] = gain[:plt_images, 0, ...].copy()

        step_timer.done_step(f'Preparing plotting data of {plt_images} images.')
 ```

 %% Cell type:markdown id: tags:

 ### Processing time summary ###

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print('Skipping plots')
    import sys
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis, title):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [
        np.min(edges[1]),
        np.max(edges[1]),
        np.min(edges[0]),
        np.max(edges[0]),
    ]

    im = ax.imshow(
        data[::-1, :],
        extent=extent,
        aspect="auto",
        norm=LogNorm(vmin=1, vmax=np.max(data))
    )
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_title(title)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
-for mod in rim_data:
+db_modules = get_pdu_from_db(
+    karabo_id=karabo_id,
+    karabo_da=karabo_da,
+    constant=Constants.jungfrau.Offset(),
+    condition=condition,
+    cal_db_interface=cal_db_interface,
+    snapshot_at=creation_time)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+for pdu, mod in zip(db_modules, karabo_da):
    h, ex, ey = np.histogram2d(
        rim_data[mod].flatten(),
        gim_data[mod].flatten(),
        bins=[100, 4],
        range=[[0, 10000], [0, 4]],
    )
    do_2d_plot(
        h, (ex, ey),
        "Signal (ADU)",
        "Gain Bit Value (high gain=0[00], medium gain=1[01], low gain=3[11])",
-        f"Module {mod}")
+        f"Module {mod} ({pdu})")
 ```

 %% Cell type:markdown id: tags:

 ### Mean RAW Preview ###

 The per pixel mean of the sequence file of RAW data

 %% Cell type:code id: tags:

 ``` python
-for mod in rim_data:
+for pdu, mod in zip(db_modules, karabo_da):
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)

    im = ax.imshow(
        np.mean(rim_data[mod],axis=0),
        vmin=min(0.75*np.median(rim_data[mod][rim_data[mod] > 0]), 2000),
        vmax=max(1.5*np.median(rim_data[mod][rim_data[mod] > 0]), 16000),
        cmap="jet")

-    ax.set_title(f'Module {mod}')
+    ax.set_title(f'Module {mod} ({pdu})')
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Mean CORRECTED Preview ###

 The per pixel mean of the sequence file of CORR data

 %% Cell type:code id: tags:

 ``` python
-for mod in rim_data:
+for pdu, mod in zip(db_modules, karabo_da):
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)

    im = ax.imshow(
        np.mean(fim_data[mod],axis=0),
        vmin=min(0.75*np.median(fim_data[mod][fim_data[mod] > 0]), -0.5),
        vmax=max(2.*np.median(fim_data[mod][fim_data[mod] > 0]), 100),
        cmap="jet")

-    ax.set_title(f'Module {mod}', size=18)
+    ax.set_title(f'Module {mod} ({pdu})', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Single Train Preview ###

 A single image from the first train

 %% Cell type:code id: tags:

 ``` python
-for mod in rim_data:
+for pdu, mod in zip(db_modules, karabo_da):
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)

    im = ax.imshow(
        fim_data[mod][0, ...],
        vmin=min(0.75*np.median(fim_data[mod][0, ...]), -0.5),
        vmax=max(2.*np.median(fim_data[mod][0, ...]), 100),
        cmap="jet")

-    ax.set_title(f'Module {mod}', size=18)
+    ax.set_title(f'Module {mod} ({pdu})', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ## Signal Distribution ##

 %% Cell type:code id: tags:

 ``` python
-for mod in rim_data:
+for pdu, mod in zip(db_modules, karabo_da):
    fig = plt.figure(figsize=(20,10))
    ax = fig.add_subplot(211)
    h = ax.hist(
        fim_data[mod].flatten(),
        bins=1000,
        range=(-100, 1000),
        log=True,
    )
    l = ax.set_xlabel("Signal (keV)")
    l = ax.set_ylabel("Counts")
-    _ = ax.set_title(f'Module {mod}')
+    _ = ax.set_title(f'Module {mod} ({pdu})')

    ax = fig.add_subplot(212)
    h = ax.hist(
        fim_data[mod].flatten(),
        bins=1000,
        range=(-1000, 10000),
        log=True,
    )
    l = ax.set_xlabel("Signal (keV)")
    l = ax.set_ylabel("Counts")
-    _ = ax.set_title(f'Module {mod}')
+    _ = ax.set_title(f'Module {mod} ({pdu})')
 ```

 %% Cell type:markdown id: tags:

 ### Maximum GAIN Preview ###

 The per pixel maximum of the first train of the GAIN data

 %% Cell type:code id: tags:

 ``` python
-for mod in rim_data:
+for pdu, mod in zip(db_modules, karabo_da):
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    im = ax.imshow(
        np.max(gim_data[mod], axis=0),
        vmin=0, vmax=3, cmap="jet")
-    ax.set_title(f'Module {mod}', size=18)
+    ax.set_title(f'Module {mod} ({pdu})', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append(
        (item.name, f"{item.value:016b}"))
 md = display(Latex(tabulate.tabulate(
    table, tablefmt='latex',
    headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:markdown id: tags:

 ### Single Image Bad Pixels ###

 A single image bad pixel map for the first image of the first train

 %% Cell type:code id: tags:

 ``` python
-for mod in rim_data:
+for pdu, mod in zip(db_modules, karabo_da):
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    im = ax.imshow(
        np.log2(msk_data[mod][0,...]),
        vmin=0, vmax=32, cmap="jet")
-    ax.set_title(f'Module {mod}', size=18)
+    ax.set_title(f'Module {mod} ({pdu})', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 # Jungfrau Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the Jungfrau Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202130/p900204/raw"  # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/remove"  # the folder to output to, required
 run = 112  # run to process, required
 sequences = [-1]  # sequences to correct, set to [-1] for all, range allowed
 sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel

 # Parameters used to access raw data.
 karabo_id = "SPB_IRDA_JF4M"  # karabo prefix of Jungfrau devices
 karabo_da = ['JNGFR01', 'JNGFR02', 'JNGFR03', 'JNGFR04', 'JNGFR05', 'JNGFR06', 'JNGFR07', 'JNGFR08']  # data aggregators
 receiver_template = "JNGFR{:02d}"  # Detector receiver template for accessing raw data files. e.g. "JNGFR{:02d}"
 instrument_source_template = '{}/DET/{}:daqOutput'  # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = '{}/DET/CONTROL'  # template for control source name (filled with karabo_id_control)
 karabo_id_control = ""  # if control is on a different ID, set to empty string if it is the same a karabo-id

 # Parameters for calibration database.
 use_dir_creation_date = True  # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8017#8025" # the database interface to use
 cal_db_timeout = 180000  # timeout on caldb requests

 # Parameters affecting corrected data.
 overwrite = True  # set to True if existing data should be overwritten
 relative_gain = True  # do relative gain correction
 plt_images = 100  # Number of images to plot after applying selected corrections.
 limit_images = 0  # ONLY FOR TESTING. process only first N images, Use 0 to process all.

 # Parameters for retrieving calibration constants
 manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values
 integration_time = 4.96  # integration time in us, will be overwritten by value in file
 gain_setting = 0  # 0 for dynamic gain, 1 for dynamic HG0, will be overwritten by value in file
 gain_mode = 0  # 0 for runs with dynamic gain setting, 1 for fixgain. It will be overwritten by value in file, if manual_slow_data is set to True.
 mem_cells = 0  # leave memory cells equal 0, as it is saved in control information starting 2019.
 bias_voltage = 180  # will be overwritten by value in file

 # Parameters for plotting
 skip_plots = False  # exit after writing corrected files


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import multiprocessing
 import warnings
 from functools import partial
 from pathlib import Path

 import h5py
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
 import pasha as psh
 import tabulate
 from IPython.display import Latex, Markdown, display
 from extra_data import H5File, RunDirectory
 from matplotlib.colors import LogNorm

 from cal_tools import h5_copy_except
 from cal_tools.jungfraulib import JungfrauCtrl
 from cal_tools.enums import BadPixels
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    get_constant_from_db_and_time,
    get_dir_creation_date,
+    get_pdu_from_db,
    map_seq_files,
    write_compressed_frames,
 )
 from iCalibrationDB import Conditions, Constants

 warnings.filterwarnings('ignore')

 matplotlib.use('agg')
 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_dc = RunDirectory(in_folder / f'r{run:04d}')
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)

 if out_folder.exists() and not overwrite:
    raise AttributeError("Output path exists! Exiting")
 else:
    out_folder.mkdir(parents=True, exist_ok=True)

 print(f"Run is: {run}")
 print(f"Instrument H5File source: {instrument_src}")
 print(f"Process modules: {karabo_da}")

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time} as creation time")

 if karabo_id_control == "":
    karabo_id_control = karabo_id
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read available sequence files to correct.
 mapped_files, num_seq_files = map_seq_files(
    run_dc, karabo_id, karabo_da, sequences)

 if not len(mapped_files):
    raise IndexError(
        "No sequence files available to correct for the selected sequences and karabo_da.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Processing a total of {num_seq_files} sequence files")
 table = []
 fi = 0
 for kda, sfiles in mapped_files.items():
    for k, f in enumerate(sfiles):
        if k == 0:
            table.append((fi, kda, k, f))
        else:
            table.append((fi, "", k,  f))
        fi += 1
 md = display(Latex(tabulate.tabulate(
    table, tablefmt='latex',
    headers=["#", "module", "# module", "file"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 ctrl_src = ctrl_source_template.format(karabo_id_control)
 ctrl_data = JungfrauCtrl(run_dc, ctrl_src)
 try:
    this_run_mcells, sc_start = ctrl_data.get_memory_cells()

    if this_run_mcells == 1:
        memory_cells = 1
        print("Run is in single cell mode.\n"
              f"Storage cell start: {sc_start:02d}")
    else:
        memory_cells = 16
        print(f"Run is in burst mode.\n"
              f"Storage cell start: {sc_start:02d}")
 except KeyError as e:
    print("WARNING: KeyError while reading number of memory cells.")
    if mem_cells == 0:
        memory_cells = 1
    else:
        memory_cells = mem_cells
    print(f"WARNING: Set memory cells to {memory_cells}, as "
          "it is not saved in control information.")

 if not manual_slow_data:
    integration_time = ctrl_data.get_integration_time()
    bias_voltage = ctrl_data.get_bias_voltage()
    gain_setting = ctrl_data.get_gain_setting()
    gain_mode = ctrl_data.get_gain_mode()

 print(f"Integration time is {integration_time} us")
 print(f"Gain setting is {gain_setting} (run settings: "
      f"{ctrl_data.run_settings.value if ctrl_data.run_settings else ctrl_data.run_settings})")  # noqa
 print(f"Gain mode is {gain_mode}")
 print(f"Bias voltage is {bias_voltage} V")
 print(f"Number of memory cells are {memory_cells}")
 ```

 %% Cell type:markdown id: tags:

 ### Retrieving calibration constants ###

 %% Cell type:code id: tags:

 ``` python
 condition = Conditions.Dark.jungfrau(
    memory_cells=memory_cells,
    bias_voltage=bias_voltage,
    integration_time=integration_time,
    gain_setting=gain_setting,
    gain_mode=gain_mode,
 )

-
 def get_constants_for_module(karabo_da: str):
    """ Get calibration constants for given module of Jungfrau

    :return:
        offset_map (offset map),
        mask (mask of bad pixels),
        gain_map (map of relative gain factors),
        db_module (name of DB module),
        when (dictionaty: constant - creation time)
    """

    when = {}
    retrieval_function = partial(
        get_constant_from_db_and_time,
        karabo_id=karabo_id,
        karabo_da=karabo_da,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time,
        timeout=cal_db_timeout,
        print_once=False,
    )
    offset_map, when["Offset"] = retrieval_function(
        condition=condition,
        constant=Constants.jungfrau.Offset(),
        empty_constant=np.zeros((512, 1024, memory_cells, 3))
    )
    mask, when["BadPixelsDark"] = retrieval_function(
        condition=condition,
        constant=Constants.jungfrau.BadPixelsDark(),
        empty_constant=np.zeros((512, 1024, memory_cells, 3), dtype=np.uint32),
    )
    mask_ff, when["BadPixelsFF"] = retrieval_function(
        condition=condition,
        constant=Constants.jungfrau.BadPixelsFF(),
        empty_constant=None
    )
    gain_map, when["Gain"] = retrieval_function(
        condition=condition,
        constant=Constants.jungfrau.RelativeGain(),
        empty_constant=None
    )

    # combine masks
    if mask_ff is not None:
        mask |= np.moveaxis(mask_ff, 0, 1)

    if memory_cells > 1:
        # move from x, y, cell, gain to cell, x, y, gain
        offset_map = np.moveaxis(offset_map, [0, 1], [1, 2])
        mask = np.moveaxis(mask, [0, 1], [1, 2])
    else:
        offset_map = np.squeeze(offset_map)
        mask = np.squeeze(mask)

    if gain_map is not None:
        if memory_cells > 1:
            gain_map = np.moveaxis(gain_map, [0, 2], [2, 0])
            # add extra empty cell constant
            b = np.ones(((1,)+gain_map.shape[1:]))
            gain_map = np.concatenate((gain_map, b), axis=0)
        else:
            gain_map = np.moveaxis(np.squeeze(gain_map), 1, 0)

    return offset_map, mask, gain_map, karabo_da, when

 with multiprocessing.Pool() as pool:
    r = pool.map(get_constants_for_module, karabo_da)

 # Print timestamps for the retrieved constants.
 constants = {}
 for offset_map, mask, gain_map, k_da, when in r:
    print(f'Constants for module {k_da}:')
    for const in when:
        print(f'  {const} injected at {when[const]}')

    if gain_map is None:
        print("No gain map found")
        relative_gain = False

    constants[k_da] = (offset_map, mask, gain_map)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Correct a chunk of images for offset and gain
 def correct_train(wid, index, d):
    d = d.astype(np.float32)  # [cells, x, y]
    g = gain[index]

    # Jungfrau gains 0[00], 1[01], 3[11]
    g[g==3] = 2

    # Select memory cells
    if memory_cells > 1:
        """
        Even though it is correct to assume that memory cells pattern
        can be the same across all trains (for one correction run
        taken with one acquisition), it is preferred to not assume
        this to account for exceptions that can happen.
        """
        m = memcells[index].copy()
        # 255 is a cell value pointing to no cell image data (image of 0 pixels).
        # Corresponding image will be corrected with constant of cell 0. To avoid values of 0.
        # This line is depending on not storing the modified memory cells in the corrected data.
        m[m==255] = 0

        offset_map_cell = offset_map[m, ...]  # [16 + empty cell, x, y]
        mask_cell = mask[m, ...]
    else:
        offset_map_cell = offset_map
        mask_cell = mask

    # Offset correction
    offset = np.choose(g, np.moveaxis(offset_map_cell, -1, 0))

    d -= offset

    # Gain correction
    if relative_gain:
        if memory_cells > 1:
            gain_map_cell = gain_map[m, ...]
        else:
            gain_map_cell = gain_map
        cal = np.choose(g, np.moveaxis(gain_map_cell, -1, 0))
        d /= cal

    msk = np.choose(g, np.moveaxis(mask_cell, -1, 0))

    data_corr[index, ...] = d
    mask_corr[index, ...] = msk
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()

 n_cpus = multiprocessing.cpu_count()
 context = psh.context.ProcessContext(num_workers=n_cpus)
 print(f"Using {n_cpus} workers for correction.")
 ```

 %% Cell type:markdown id: tags:

 ### Correcting RAW data ###

 %% Cell type:code id: tags:

 ``` python
 fim_data = {}
 gim_data = {}
 rim_data = {}
 msk_data = {}

 # Loop over modules
 for local_karabo_da, mapped_files_module in mapped_files.items():
    instrument_src_kda = instrument_src.format(int(local_karabo_da[-2:]))
    data_path = "INSTRUMENT/"+instrument_src_kda+"/data"

    for sequence_file in mapped_files_module:  # noqa
        sequence_file = Path(sequence_file)
        seq_dc = H5File(sequence_file)

        # Save corrected data in an output file with name
        # of corresponding raw sequence file.
        ofile_name = sequence_file.name.replace("RAW", "CORR")
        out_file = out_folder / ofile_name

        # load shape of data for memory cells, and detector size (imgs, cells, x, y)
        # dshape[0] = number of available images to correct.
        dshape = seq_dc[instrument_src_kda, "data.adc"].shape

        if dshape[0] == 0:
            print(f"\t- WARNING: No image data for {ofile_name}: data shape is {dshape}")
            continue

        # load number of data available, including trains with empty data.
        n_trains = len(seq_dc.train_ids)

        n_imgs = dshape[0]

        # For testing, only correct limit_images
        if limit_images > 0:
            n_imgs = min(n_imgs, limit_images)

        print(f"\nNumber of images to correct: {n_imgs} for {ofile_name}")
        if n_trains - dshape[0] != 0:
            print(f"\t- WARNING: {sequence_file.name} has {n_trains - dshape[0]} "
                  "trains with empty data.")

        # Just in case if n_imgs is less than the chosen plt_images.
        plt_images = min(plt_images, n_imgs)

        # load constants from the constants dictionary.
        offset_map, mask, gain_map = constants[local_karabo_da]

        # Allocate shared arrays.
        data_corr = context.alloc(shape=dshape, dtype=np.float32)
        mask_corr = context.alloc(shape=dshape, dtype=np.uint32)

        step_timer.start()
        seq_dc = seq_dc.select(
            instrument_src_kda, "*", require_all=True).select_trains(np.s_[:n_imgs])

        data = seq_dc[instrument_src_kda, "data.adc"].ndarray()
        gain = seq_dc[instrument_src_kda, "data.gain"].ndarray()
        memcells = seq_dc[instrument_src_kda, "data.memoryCell"].ndarray()

        rim_data[local_karabo_da] = data[:plt_images, 0, ...].copy()

        context.map(correct_train, data)
        step_timer.done_step(f'Correction time.')

        step_timer.start()
        # Create CORR files and add corrected data sources.
        # Exclude raw data images (data/adc)
        with h5py.File(out_file, 'w') as ofile:
            # Copy RAW non-calibrated sources.
            with h5py.File(sequence_file, 'r') as sfile:
                h5_copy_except.h5_copy_except_paths(
                    sfile, ofile, [f"{data_path}/adc"])

            # Create datasets with the available corrected data
            ddset = ofile.create_dataset(
                f"{data_path}/adc",
                data=data_corr,
                chunks=((1,) + dshape[1:]),  # 1 chunk == 1 image
                dtype=np.float32,
            )

            write_compressed_frames(
                mask_corr,
                ofile,
                dataset_path=f"{data_path}/mask",
                comp_threads=n_cpus,
            )

        step_timer.done_step(f'Saving data time.')

        # Prepare plotting arrays
        step_timer.start()

        # TODO: Print out which cell is being selected for plotting
        fim_data[local_karabo_da] = data_corr[:plt_images, 0, ...].copy()
        msk_data[local_karabo_da] = mask_corr[:plt_images, 0, ...].copy()
        gim_data[local_karabo_da] = gain[:plt_images, 0, ...].copy()

        step_timer.done_step(f'Preparing plotting data of {plt_images} images.')
 ```

 %% Cell type:markdown id: tags:

 ### Processing time summary ###

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print('Skipping plots')
    import sys
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis, title):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [
        np.min(edges[1]),
        np.max(edges[1]),
        np.min(edges[0]),
        np.max(edges[0]),
    ]

    im = ax.imshow(
        data[::-1, :],
        extent=extent,
        aspect="auto",
        norm=LogNorm(vmin=1, vmax=np.max(data))
    )
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_title(title)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
-for mod in rim_data:
+db_modules = get_pdu_from_db(
+    karabo_id=karabo_id,
+    karabo_da=karabo_da,
+    constant=Constants.jungfrau.Offset(),
+    condition=condition,
+    cal_db_interface=cal_db_interface,
+    snapshot_at=creation_time)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+for pdu, mod in zip(db_modules, karabo_da):
    h, ex, ey = np.histogram2d(
        rim_data[mod].flatten(),
        gim_data[mod].flatten(),
        bins=[100, 4],
        range=[[0, 10000], [0, 4]],
    )
    do_2d_plot(
        h, (ex, ey),
        "Signal (ADU)",
        "Gain Bit Value (high gain=0[00], medium gain=1[01], low gain=3[11])",
-        f"Module {mod}")
+        f"Module {mod} ({pdu})")
 ```

 %% Cell type:markdown id: tags:

 ### Mean RAW Preview ###

 The per pixel mean of the sequence file of RAW data

 %% Cell type:code id: tags:

 ``` python
-for mod in rim_data:
+for pdu, mod in zip(db_modules, karabo_da):
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)

    im = ax.imshow(
        np.mean(rim_data[mod],axis=0),
        vmin=min(0.75*np.median(rim_data[mod][rim_data[mod] > 0]), 2000),
        vmax=max(1.5*np.median(rim_data[mod][rim_data[mod] > 0]), 16000),
        cmap="jet")

-    ax.set_title(f'Module {mod}')
+    ax.set_title(f'Module {mod} ({pdu})')
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Mean CORRECTED Preview ###

 The per pixel mean of the sequence file of CORR data

 %% Cell type:code id: tags:

 ``` python
-for mod in rim_data:
+for pdu, mod in zip(db_modules, karabo_da):
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)

    im = ax.imshow(
        np.mean(fim_data[mod],axis=0),
        vmin=min(0.75*np.median(fim_data[mod][fim_data[mod] > 0]), -0.5),
        vmax=max(2.*np.median(fim_data[mod][fim_data[mod] > 0]), 100),
        cmap="jet")

-    ax.set_title(f'Module {mod}', size=18)
+    ax.set_title(f'Module {mod} ({pdu})', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Single Train Preview ###

 A single image from the first train

 %% Cell type:code id: tags:

 ``` python
-for mod in rim_data:
+for pdu, mod in zip(db_modules, karabo_da):
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)

    im = ax.imshow(
        fim_data[mod][0, ...],
        vmin=min(0.75*np.median(fim_data[mod][0, ...]), -0.5),
        vmax=max(2.*np.median(fim_data[mod][0, ...]), 100),
        cmap="jet")

-    ax.set_title(f'Module {mod}', size=18)
+    ax.set_title(f'Module {mod} ({pdu})', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ## Signal Distribution ##

 %% Cell type:code id: tags:

 ``` python
-for mod in rim_data:
+for pdu, mod in zip(db_modules, karabo_da):
    fig = plt.figure(figsize=(20,10))
    ax = fig.add_subplot(211)
    h = ax.hist(
        fim_data[mod].flatten(),
        bins=1000,
        range=(-100, 1000),
        log=True,
    )
    l = ax.set_xlabel("Signal (keV)")
    l = ax.set_ylabel("Counts")
-    _ = ax.set_title(f'Module {mod}')
+    _ = ax.set_title(f'Module {mod} ({pdu})')

    ax = fig.add_subplot(212)
    h = ax.hist(
        fim_data[mod].flatten(),
        bins=1000,
        range=(-1000, 10000),
        log=True,
    )
    l = ax.set_xlabel("Signal (keV)")
    l = ax.set_ylabel("Counts")
-    _ = ax.set_title(f'Module {mod}')
+    _ = ax.set_title(f'Module {mod} ({pdu})')
 ```

 %% Cell type:markdown id: tags:

 ### Maximum GAIN Preview ###

 The per pixel maximum of the first train of the GAIN data

 %% Cell type:code id: tags:

 ``` python
-for mod in rim_data:
+for pdu, mod in zip(db_modules, karabo_da):
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    im = ax.imshow(
        np.max(gim_data[mod], axis=0),
        vmin=0, vmax=3, cmap="jet")
-    ax.set_title(f'Module {mod}', size=18)
+    ax.set_title(f'Module {mod} ({pdu})', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append(
        (item.name, f"{item.value:016b}"))
 md = display(Latex(tabulate.tabulate(
    table, tablefmt='latex',
    headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:markdown id: tags:

 ### Single Image Bad Pixels ###

 A single image bad pixel map for the first image of the first train

 %% Cell type:code id: tags:

 ``` python
-for mod in rim_data:
+for pdu, mod in zip(db_modules, karabo_da):
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    im = ax.imshow(
        np.log2(msk_data[mod][0,...]),
        vmin=0, vmax=32, cmap="jet")
-    ax.set_title(f'Module {mod}', size=18)
+    ax.set_title(f'Module {mod} ({pdu})', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```

--- a/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Jungfrau Dark Image Characterization #

 Author: European XFEL Detector Group, Version: 2.0

 Analyzes Jungfrau dark image data to deduce offset, noise and resulting bad pixel maps

 %% Cell type:code id: tags:

 ``` python
 in_folder = '/gpfs/exfel/exp/SPB/202130/p900204/raw/'  # folder under which runs are located, required
 out_folder = '/gpfs/exfel/data/scratch/ahmedk/test/remove' # path to place reports at, required
 run_high = 141 # run number for G0 dark run, required
 run_med = 142 # run number for G1 dark run, required
 run_low = 143 # run number for G2 dark run, required

 # Parameters used to access raw data.
 karabo_da = ['JNGFR01', 'JNGFR02','JNGFR03','JNGFR04', 'JNGFR05', 'JNGFR06','JNGFR07','JNGFR08'] # list of data aggregators, which corresponds to different JF modules
 karabo_id = 'SPB_IRDA_JF4M'  # karabo_id (detector identifier) prefix of Jungfrau detector to process.
 karabo_id_control = ''  # if control is on a different ID, set to empty string if it is the same a karabo-id
 receiver_template = 'JNGFR{:02}' # inset for receiver devices
 instrument_source_template = '{}/DET/{}:daqOutput'  # template for instrument source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = '{}/DET/CONTROL'  # template for control source name (filled with karabo_id_control)

 # Parameters for calibration database and storing constants.
 use_dir_creation_date = True  # use dir creation date
 cal_db_interface = 'tcp://max-exfl016:8016'  # calibrate db interface to connect to
 cal_db_timeout = 300000 # timeout on caldb requests
 local_output = True  # output constants locally
 db_output = False  # output constants to database

 # Parameters affecting creating dark calibration constants.
 badpixel_threshold_sigma = 5.  # bad pixels defined by values outside n times this std from median
 offset_abs_threshold_low = [1000, 10000, 10000]  # absolute bad pixel threshold in terms of offset, lower values
 offset_abs_threshold_high = [8000, 15000, 15000]  # absolute bad pixel threshold in terms of offset, upper values
 max_trains = 0  # Maximum trains to process darks. Set to 0 to process all available train images.
 min_trains = 1  # Minimum number of trains that should be available to process dark constants. Default 1.
 manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values
 time_limits = 0.025  # to find calibration constants later on, the integration time is allowed to vary by 0.5 us

 # Parameters to be used for injecting dark calibration constants.
 integration_time = 1000 # integration time in us, will be overwritten by value in file
 gain_setting = 0  # 0 for dynamic, forceswitchg1, forceswitchg2, 1 for dynamichg0, fixgain1, fixgain2. Will be overwritten by value in file
 gain_mode = 0  # 1 if medium and low runs are  fixgain1 and fixgain2, otherwise 0. It will be overwritten by value in file, if manual_slow_data
 bias_voltage = 90  # sensor bias voltage in V, will be overwritten by value in file
 memory_cells = 16  # number of memory cells

 # TODO: this is used for only Warning check at AGIPD dark.
 # Need to rethink if it makes sense to use it here as well.
 operation_mode = 'ADAPTIVE_GAIN'  # Detector operation mode, optional
-
-# TODO: Remove
-db_module = [""]  # ID of module in calibration database.  TODO: remove from calibration_configurations.
 ```

 %% Cell type:code id: tags:

 ``` python
 import glob
 import os
 import warnings
 from pathlib import Path
 warnings.filterwarnings('ignore')

 import matplotlib
 import matplotlib.pyplot as plt
 import multiprocessing
 import numpy as np
 import pasha as psh
 from IPython.display import Markdown, display
 from extra_data import RunDirectory

 matplotlib.use('agg')
 %matplotlib inline

 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.histogram import histPlot
 from cal_tools import jungfraulib, step_timing
 from cal_tools.ana_tools import save_dict_to_hdf5
 from cal_tools.enums import BadPixels, JungfrauSettings
 from cal_tools.tools import (
    get_dir_creation_date,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    save_const_to_h5,
    send_to_db,
 )
 from iCalibrationDB import Conditions, Constants
 ```

 %% Cell type:code id: tags:

 ``` python
 # Constants relevant for the analysis
 run_nums = [run_high, run_med, run_low]  # run number for G0/HG0, G1, G2
 sensor_size = (1024, 512)
 gains = [0, 1, 2]

 fixed_settings = [
    JungfrauSettings.FIX_GAIN_1.value, JungfrauSettings.FIX_GAIN_2.value]  # noqa
 dynamic_settings = [
    JungfrauSettings.FORCE_SWITCH_HG1.value, JungfrauSettings.FORCE_SWITCH_HG2.value]  # noqa

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)
    print(f"Using {creation_time} as creation time")
 os.makedirs(out_folder, exist_ok=True)

 cal_db_interface = get_random_db_interface(cal_db_interface)
 print(f'Calibration database interface: {cal_db_interface}')

 if karabo_id_control == "":
    karabo_id_control = karabo_id
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = f"proposal:{proposal} runs:{run_high} {run_med} {run_low}"

 report = get_report(out_folder)

 step_timer = step_timing.StepTimer()
 ```

 %% Cell type:markdown id: tags:

 ## Reading control data

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 gain_runs = dict()

 med_low_settings = []

 ctrl_src = ctrl_source_template.format(karabo_id_control)

 for gain, run_n in enumerate(run_nums):
    run_dc = RunDirectory(f"{in_folder}/r{run_n:04d}/")
    gain_runs[run_n] = [gain, run_dc]
    ctrl_data = jungfraulib.JungfrauCtrl(run_dc, ctrl_src)
    run_settings = ctrl_data.run_settings.value if ctrl_data.run_settings else ctrl_data.run_settings  # noqa
    # Read control data for the high gain run only.
    if run_n == run_high:

        run_mcells, sc_start = ctrl_data.get_memory_cells()

        if not manual_slow_data:
            integration_time = ctrl_data.get_integration_time()
            bias_voltage = ctrl_data.get_bias_voltage()
            gain_setting = ctrl_data.get_gain_setting()
            print(f"Gain setting is {gain_setting} ({run_settings})")
            print(f"Integration time is {integration_time} us")
            print(f"Bias voltage is {bias_voltage} V")
        if run_mcells == 1:
            memory_cells = 1
            print('Dark runs in single cell mode, '
                  f'storage cell start: {sc_start:02d}')
        else:
            memory_cells = 16
            print('Dark runs in burst mode, '
                  f'storage cell start: {sc_start:02d}')
    else:
        gain_mode = ctrl_data.get_gain_mode()

        med_low_settings.append(run_settings)

 # A transperent workaround for old raw data with wrong/missing medium and low settings
 if med_low_settings == [None, None]:
    print("WARNING: run.settings is not stored in the data to read. "
          f"Hence assuming gain_mode = {gain_mode} for adaptive old data.")
 elif med_low_settings == ["dynamicgain", "forceswitchg1"]:
    print(f"WARNING: run.settings for medium and low gain runs are wrong {med_low_settings}. "
          f"This is an expected bug for old raw data. Setting gain_mode to {gain_mode}.")
 # Validate that low_med_settings is not a mix of adaptive and fixed settings.
 elif not (sorted(med_low_settings) in [fixed_settings, dynamic_settings]):  # noqa
    raise ValueError(
        "Medium and low run settings are not as expected. "
        f"Either {dynamic_settings} or {fixed_settings} are expected.\n"
        f"Got {sorted(med_low_settings)} for both runs, respectively.")

 print(f"Gain mode is {gain_mode} ({med_low_settings})")

 step_timer.done_step(f'Reading control data.')
 ```

 %% Cell type:code id: tags:

 ``` python
+# set the operating condition
+condition = Conditions.Dark.jungfrau(
+    memory_cells=memory_cells,
+    bias_voltage=bias_voltage,
+    integration_time=integration_time,
+    gain_setting=gain_setting,
+    gain_mode=gain_mode,
+)
+
+db_modules = get_pdu_from_db(
+    karabo_id=karabo_id,
+    karabo_da=karabo_da,
+    constant=Constants.jungfrau.Offset(),
+    condition=condition,
+    cal_db_interface=cal_db_interface,
+    snapshot_at=creation_time)
+```
+
+%% Cell type:code id: tags:
+
+``` python
 # Use only high gain threshold for all gains in case of fixed_gain.

 if gain_mode:  # fixed_gain
    offset_abs_threshold = [[offset_abs_threshold_low[0]]*3, [offset_abs_threshold_high[0]]*3]
 else:
    offset_abs_threshold = [offset_abs_threshold_low, offset_abs_threshold_high]
 ```

 %% Cell type:code id: tags:

 ``` python
 context = psh.context.ThreadContext(num_workers=multiprocessing.cpu_count())
 ```

 %% Cell type:code id: tags:

 ``` python
 """
 All jungfrau runs are taken through one acquisition, except for the forceswitch runs.
 While taking non-fixed dark runs, a procedure of multiple acquisitions is used to switch the storage cell indices.

 This is done for medium and low gain dark dynamic runs, only [forceswitchg1, forceswitchg2]:
 Switching the cell indices in burst mode is a work around for hardware procedure
 deficiency that produces wrong data for dark runs except for the first storage cell.
 This is why multiple acquisitions are taken to switch the used storage cells and
 acquire data through two cells for each of the 16 cells instead of acquiring darks through all 16 cells.
 """

 print(f"Maximum trains to process is set to {max_trains}")

 noise_map = dict()
 offset_map = dict()
 bad_pixels_map = dict()

 for mod in karabo_da:
    step_timer.start()
    instrument_src = instrument_source_template.format(
        karabo_id, receiver_template.format(int(mod[-2:])))

    print(f"\n- Instrument data path for {mod} is {instrument_src}.")

    offset_map[mod] = context.alloc(shape=(sensor_size+(memory_cells, 3)), fill=0)
    noise_map[mod] = context.alloc(like=offset_map[mod], fill=0)
    bad_pixels_map[mod] = context.alloc(like=offset_map[mod], dtype=np.uint32, fill=0)

    for run_n, [gain, run_dc] in gain_runs.items():

        def process_cell(worker_id, array_index, cell_number):
            cell_slice_idx = acelltable == cell_number
            thiscell = images[..., cell_slice_idx]

            offset_map[mod][..., cell_number, gain] = np.mean(thiscell, axis=2)
            noise_map[mod][..., cell_number, gain] = np.std(thiscell, axis=2)

            # Check if there are wrong bad gain values.
            # Indicate pixels with wrong gain value across all trains for each cell.
            bad_pixels_map[mod][
                np.average(gain_vals[..., cell_slice_idx], axis=2) != raw_g] |= BadPixels.WRONG_GAIN_VALUE.value
        print(f"Gain stage {gain}, run {run_n}")

        # load shape of data for memory cells, and detector size (imgs, cells, x, y)
        n_imgs = run_dc[instrument_src, "data.adc"].shape[0]
        # load number of data available, including trains with empty data.
        n_trains = len(run_dc.train_ids)
        instr_dc = run_dc.select(instrument_src, require_all=True)
        empty_trains = n_trains - n_imgs
        if empty_trains != 0:
            print(
                f"\tWARNING: {mod} has {empty_trains} trains with empty data out of {n_trains} trains at "  # noqa
                f"{Path(run_dc[instrument_src, 'data.adc'].files[0].filename).parent}.")
        if max_trains > 0:
            n_imgs = min(n_imgs, max_trains)
        print(f"Processing {n_imgs} images.")
        # Select only requested number of images to process darks.
        instr_dc = instr_dc.select_trains(np.s_[:n_imgs])

        if n_imgs < min_trains:
            raise ValueError(
                f"Less than {min_trains} trains are available in RAW data."
                " Not enough data to process darks.")

        images = np.transpose(
            instr_dc[instrument_src, "data.adc"].ndarray(), (3, 2, 1, 0))
        acelltable = np.transpose(instr_dc[instrument_src, "data.memoryCell"].ndarray())
        gain_vals = np.transpose(
            instr_dc[instrument_src, "data.gain"].ndarray(), (3, 2, 1, 0))

        # define gain value as saved in raw gain map
        raw_g = 3 if gain == 2 else gain

        if memory_cells == 1:
            acelltable -= sc_start
        # Only for dynamic medium and low gain runs [forceswitchg1, forceswitchg2] in burst mode.

        if gain_mode == 0 and gain > 0 and memory_cells == 16:
            # 255 similar to the receiver which uses the 255
            # value to indicate a cell without an image.
            # image shape for forceswitchg1 and forceswitchg2 = (1024, 512, 2, trains)
            # compared to expected shape of (1024, 512, 16, trains) for high gain run.
            acelltable[1:] = 255

        # Calculate offset and noise maps
        context.map(process_cell, range(memory_cells))

    step_timer.done_step(f'Creating Offset and noise constants for a module.')
 ```

 %% Cell type:markdown id: tags:

 ## Offset and Noise Maps ##

 Below offset and noise maps for the high ($g_0$) gain stage are shown, alongside the distribution of these values. One expects block-like structures mapping to the ASICs of the detector

 %% Cell type:code id: tags:

 ``` python
 g_name = ['G0', 'G1', 'G2']
 g_range = [(0, 8000), (8000, 16000), (8000, 16000)]
 n_range = [(0., 50.), (0., 50.), (0., 50.)]

 unit = '[ADCu]'
 ```

 %% Cell type:code id: tags:

 ``` python
 # TODO: Fix plots arrangment and speed for Jungfrau burst mode.
 step_timer.start()
-for mod in karabo_da:
+for pdu, mod in zip(db_modules, karabo_da):
    for g_idx in gains:
        for cell in range(0, memory_cells):
            f_o0 = heatmapPlot(
                np.swapaxes(offset_map[mod][..., cell, g_idx], 0, 1),
                y_label="Row",
                x_label="Column",
                lut_label=unit,
                aspect=1.,
                vmin=g_range[g_idx][0],
                vmax=g_range[g_idx][1],
-                title=f'Pedestal {g_name[g_idx]} - Cell {cell:02d} - Module {mod}')
+                title=f'Pedestal {g_name[g_idx]} - Cell {cell:02d} - Module {mod} ({pdu})')

            fo0, ax_o0 = plt.subplots()
            res_o0 = histPlot(
                ax_o0, offset_map[mod][..., cell, g_idx],
                bins=800,
                range=g_range[g_idx],
                facecolor='b',
                histotype='stepfilled',
            )

            ax_o0.tick_params(axis='both',which='major',labelsize=15)
            ax_o0.set_title(
-                f'Module pedestal distribution - Cell {cell:02d} - Module {mod}',
+                f'Module pedestal distribution - Cell {cell:02d} - Module {mod} ({pdu})',
                fontsize=15)
            ax_o0.set_xlabel(f'Pedestal {g_name[g_idx]} {unit}',fontsize=15)
            ax_o0.set_yscale('log')

            f_n0 = heatmapPlot(
                np.swapaxes(noise_map[mod][..., cell, g_idx], 0, 1),
                y_label="Row",
                x_label="Column",
                lut_label= unit,
                aspect=1.,
                vmin=n_range[g_idx][0],
                vmax=n_range[g_idx][1],
-                title=f"RMS noise {g_name[g_idx]} - Cell {cell:02d} - Module {mod}",
+                title=f"RMS noise {g_name[g_idx]} - Cell {cell:02d} - Module {mod} ({pdu})",
            )

            fn0, ax_n0 = plt.subplots()
            res_n0 = histPlot(
                ax_n0,
                noise_map[mod][..., cell, g_idx],
                bins=100,
                range=n_range[g_idx],
                facecolor='b',
                histotype='stepfilled',
            )

            ax_n0.tick_params(axis='both', which='major', labelsize=15)
            ax_n0.set_title(
-                f'Module noise distribution - Cell {cell:02d} - Module {mod}',
+                f'Module noise distribution - Cell {cell:02d} - Module {mod} ({pdu})',
                fontsize=15)
            ax_n0.set_xlabel(
                f'RMS noise {g_name[g_idx]} ' + unit, fontsize=15)
            plt.show()
 step_timer.done_step(f'Plotting offset and noise maps.')
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixel Map ###

 The bad pixel map is deduced by comparing offset and noise of each pixel ($v_i$) and each gain ($g$) against the median value for that gain stage:

 $$
 v_i > \mathrm{median}(v_{k,g}) + n \sigma_{v_{k,g}}
 $$
 or
 $$
 v_i < \mathrm{median}(v_{k,g}) - n \sigma_{v_{k,g}}
 $$

 Values are encoded in a 32 bit mask, where for the dark image deduced bad pixels the following non-zero entries are relevant:

 %% Cell type:code id: tags:

 ``` python
 def print_bp_entry(bp):
    print("{:<30s} {:032b} -> {}".format(bp.name, bp.value, int(bp.value)))

 print_bp_entry(BadPixels.OFFSET_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.NOISE_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.OFFSET_NOISE_EVAL_ERROR)
 print_bp_entry(BadPixels.WRONG_GAIN_VALUE)

 def eval_bpidx(d):

    mdn = np.nanmedian(d, axis=(0, 1))[None, None, :, :]
    std = np.nanstd(d, axis=(0, 1))[None, None, :, :]
    idx = (d > badpixel_threshold_sigma*std+mdn) | (d < (-badpixel_threshold_sigma)*std+mdn)

    return idx
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

-for mod in karabo_da:
-    display(Markdown(f"### Badpixels for module {mod}:"))
+for pdu, mod in zip(db_modules, karabo_da):
+    display(Markdown(f"### Badpixels for module {mod} ({pdu}):"))
    offset_abs_threshold = np.array(offset_abs_threshold)

    bad_pixels_map[mod][eval_bpidx(offset_map[mod])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value

    bad_pixels_map[mod][~np.isfinite(offset_map[mod])] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    bad_pixels_map[mod][eval_bpidx(noise_map[mod])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value

    bad_pixels_map[mod][~np.isfinite(noise_map[mod])] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    bad_pixels_map[mod][(offset_map[mod] < offset_abs_threshold[0][None, None, None, :]) | (offset_map[mod] > offset_abs_threshold[1][None, None, None, :])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value  # noqa

    for g_idx in gains:
        for cell in range(memory_cells):
            bad_pixels = bad_pixels_map[mod][:, :, cell, g_idx]
            fn_0 = heatmapPlot(
                np.swapaxes(bad_pixels, 0, 1),
                y_label="Row",
                x_label="Column",
                lut_label=f"Badpixels {g_name[g_idx]} [ADCu]",
                aspect=1.,
                vmin=0, vmax=5,
-                title=f'G{g_idx} Bad pixel map - Cell {cell:02d} - Module {mod}')
+                title=f'G{g_idx} Bad pixel map - Cell {cell:02d} - Module {mod} ({pdu})')
 step_timer.done_step(f'Creating bad pixels constant and plotting it for a module.')
 ```

-%% Cell type:code id: tags:
-
-``` python
-# set the operating condition
-condition = Conditions.Dark.jungfrau(
-    memory_cells=memory_cells,
-    bias_voltage=bias_voltage,
-    integration_time=integration_time,
-    gain_setting=gain_setting,
-    gain_mode=gain_mode,
-)
-
-db_modules = get_pdu_from_db(
-    karabo_id=karabo_id,
-    karabo_da=karabo_da,
-    constant=Constants.jungfrau.Offset(),
-    condition=condition,
-    cal_db_interface=cal_db_interface,
-    snapshot_at=creation_time)
-```
-
 %% Cell type:markdown id: tags:

 ## Inject and save calibration constants

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 for mod, db_mod in zip(karabo_da, db_modules):
    constants = {
        'Offset': np.moveaxis(offset_map[mod], 0, 1),
        'Noise': np.moveaxis(noise_map[mod], 0, 1),
        'BadPixelsDark': np.moveaxis(bad_pixels_map[mod], 0, 1),
    }

    md = None

    for key, const_data in constants.items():

        const =  getattr(Constants.jungfrau, key)()
        const.data = const_data

        for parm in condition.parameters:
            if parm.name == "Integration Time":
                parm.lower_deviation = time_limits
                parm.upper_deviation = time_limits

        if db_output:
            md = send_to_db(
                db_module=db_mod,
                karabo_id=karabo_id,
                constant=const,
                condition=condition,
                file_loc=file_loc,
                report_path=report,
                cal_db_interface=cal_db_interface,
                creation_time=creation_time,
                timeout=cal_db_timeout,
            )
        if local_output:
            md = save_const_to_h5(
                db_module=db_mod,
                karabo_id=karabo_id,
                constant=const,
                condition=condition,
                data=const.data,
                file_loc=file_loc,
                report=report,
                creation_time=creation_time,
                out_folder=out_folder,
            )
            print(f"Calibration constant {key} is stored locally at {out_folder}.\n")

 print("Constants parameter conditions are:\n")
 print(
    f"• Bias voltage: {bias_voltage}\n"
    f"• Memory cells: {memory_cells}\n"
    f"• Integration time: {integration_time}\n"
    f"• Gain setting: {gain_setting}\n"
    f"• Gain mode: {gain_mode}\n"
    f"• Creation time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")  # noqa
 step_timer.done_step("Injecting constants.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:markdown id: tags:

 # Jungfrau Dark Image Characterization #

 Author: European XFEL Detector Group, Version: 2.0

 Analyzes Jungfrau dark image data to deduce offset, noise and resulting bad pixel maps

 %% Cell type:code id: tags:

 ``` python
 in_folder = '/gpfs/exfel/exp/SPB/202130/p900204/raw/'  # folder under which runs are located, required
 out_folder = '/gpfs/exfel/data/scratch/ahmedk/test/remove' # path to place reports at, required
 run_high = 141 # run number for G0 dark run, required
 run_med = 142 # run number for G1 dark run, required
 run_low = 143 # run number for G2 dark run, required

 # Parameters used to access raw data.
 karabo_da = ['JNGFR01', 'JNGFR02','JNGFR03','JNGFR04', 'JNGFR05', 'JNGFR06','JNGFR07','JNGFR08'] # list of data aggregators, which corresponds to different JF modules
 karabo_id = 'SPB_IRDA_JF4M'  # karabo_id (detector identifier) prefix of Jungfrau detector to process.
 karabo_id_control = ''  # if control is on a different ID, set to empty string if it is the same a karabo-id
 receiver_template = 'JNGFR{:02}' # inset for receiver devices
 instrument_source_template = '{}/DET/{}:daqOutput'  # template for instrument source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = '{}/DET/CONTROL'  # template for control source name (filled with karabo_id_control)

 # Parameters for calibration database and storing constants.
 use_dir_creation_date = True  # use dir creation date
 cal_db_interface = 'tcp://max-exfl016:8016'  # calibrate db interface to connect to
 cal_db_timeout = 300000 # timeout on caldb requests
 local_output = True  # output constants locally
 db_output = False  # output constants to database

 # Parameters affecting creating dark calibration constants.
 badpixel_threshold_sigma = 5.  # bad pixels defined by values outside n times this std from median
 offset_abs_threshold_low = [1000, 10000, 10000]  # absolute bad pixel threshold in terms of offset, lower values
 offset_abs_threshold_high = [8000, 15000, 15000]  # absolute bad pixel threshold in terms of offset, upper values
 max_trains = 0  # Maximum trains to process darks. Set to 0 to process all available train images.
 min_trains = 1  # Minimum number of trains that should be available to process dark constants. Default 1.
 manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values
 time_limits = 0.025  # to find calibration constants later on, the integration time is allowed to vary by 0.5 us

 # Parameters to be used for injecting dark calibration constants.
 integration_time = 1000 # integration time in us, will be overwritten by value in file
 gain_setting = 0  # 0 for dynamic, forceswitchg1, forceswitchg2, 1 for dynamichg0, fixgain1, fixgain2. Will be overwritten by value in file
 gain_mode = 0  # 1 if medium and low runs are  fixgain1 and fixgain2, otherwise 0. It will be overwritten by value in file, if manual_slow_data
 bias_voltage = 90  # sensor bias voltage in V, will be overwritten by value in file
 memory_cells = 16  # number of memory cells

 # TODO: this is used for only Warning check at AGIPD dark.
 # Need to rethink if it makes sense to use it here as well.
 operation_mode = 'ADAPTIVE_GAIN'  # Detector operation mode, optional
-
-# TODO: Remove
-db_module = [""]  # ID of module in calibration database.  TODO: remove from calibration_configurations.
 ```

 %% Cell type:code id: tags:

 ``` python
 import glob
 import os
 import warnings
 from pathlib import Path
 warnings.filterwarnings('ignore')

 import matplotlib
 import matplotlib.pyplot as plt
 import multiprocessing
 import numpy as np
 import pasha as psh
 from IPython.display import Markdown, display
 from extra_data import RunDirectory

 matplotlib.use('agg')
 %matplotlib inline

 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.histogram import histPlot
 from cal_tools import jungfraulib, step_timing
 from cal_tools.ana_tools import save_dict_to_hdf5
 from cal_tools.enums import BadPixels, JungfrauSettings
 from cal_tools.tools import (
    get_dir_creation_date,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    save_const_to_h5,
    send_to_db,
 )
 from iCalibrationDB import Conditions, Constants
 ```

 %% Cell type:code id: tags:

 ``` python
 # Constants relevant for the analysis
 run_nums = [run_high, run_med, run_low]  # run number for G0/HG0, G1, G2
 sensor_size = (1024, 512)
 gains = [0, 1, 2]

 fixed_settings = [
    JungfrauSettings.FIX_GAIN_1.value, JungfrauSettings.FIX_GAIN_2.value]  # noqa
 dynamic_settings = [
    JungfrauSettings.FORCE_SWITCH_HG1.value, JungfrauSettings.FORCE_SWITCH_HG2.value]  # noqa

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)
    print(f"Using {creation_time} as creation time")
 os.makedirs(out_folder, exist_ok=True)

 cal_db_interface = get_random_db_interface(cal_db_interface)
 print(f'Calibration database interface: {cal_db_interface}')

 if karabo_id_control == "":
    karabo_id_control = karabo_id
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = f"proposal:{proposal} runs:{run_high} {run_med} {run_low}"

 report = get_report(out_folder)

 step_timer = step_timing.StepTimer()
 ```

 %% Cell type:markdown id: tags:

 ## Reading control data

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 gain_runs = dict()

 med_low_settings = []

 ctrl_src = ctrl_source_template.format(karabo_id_control)

 for gain, run_n in enumerate(run_nums):
    run_dc = RunDirectory(f"{in_folder}/r{run_n:04d}/")
    gain_runs[run_n] = [gain, run_dc]
    ctrl_data = jungfraulib.JungfrauCtrl(run_dc, ctrl_src)
    run_settings = ctrl_data.run_settings.value if ctrl_data.run_settings else ctrl_data.run_settings  # noqa
    # Read control data for the high gain run only.
    if run_n == run_high:

        run_mcells, sc_start = ctrl_data.get_memory_cells()

        if not manual_slow_data:
            integration_time = ctrl_data.get_integration_time()
            bias_voltage = ctrl_data.get_bias_voltage()
            gain_setting = ctrl_data.get_gain_setting()
            print(f"Gain setting is {gain_setting} ({run_settings})")
            print(f"Integration time is {integration_time} us")
            print(f"Bias voltage is {bias_voltage} V")
        if run_mcells == 1:
            memory_cells = 1
            print('Dark runs in single cell mode, '
                  f'storage cell start: {sc_start:02d}')
        else:
            memory_cells = 16
            print('Dark runs in burst mode, '
                  f'storage cell start: {sc_start:02d}')
    else:
        gain_mode = ctrl_data.get_gain_mode()

        med_low_settings.append(run_settings)

 # A transperent workaround for old raw data with wrong/missing medium and low settings
 if med_low_settings == [None, None]:
    print("WARNING: run.settings is not stored in the data to read. "
          f"Hence assuming gain_mode = {gain_mode} for adaptive old data.")
 elif med_low_settings == ["dynamicgain", "forceswitchg1"]:
    print(f"WARNING: run.settings for medium and low gain runs are wrong {med_low_settings}. "
          f"This is an expected bug for old raw data. Setting gain_mode to {gain_mode}.")
 # Validate that low_med_settings is not a mix of adaptive and fixed settings.
 elif not (sorted(med_low_settings) in [fixed_settings, dynamic_settings]):  # noqa
    raise ValueError(
        "Medium and low run settings are not as expected. "
        f"Either {dynamic_settings} or {fixed_settings} are expected.\n"
        f"Got {sorted(med_low_settings)} for both runs, respectively.")

 print(f"Gain mode is {gain_mode} ({med_low_settings})")

 step_timer.done_step(f'Reading control data.')
 ```

 %% Cell type:code id: tags:

 ``` python
+# set the operating condition
+condition = Conditions.Dark.jungfrau(
+    memory_cells=memory_cells,
+    bias_voltage=bias_voltage,
+    integration_time=integration_time,
+    gain_setting=gain_setting,
+    gain_mode=gain_mode,
+)
+
+db_modules = get_pdu_from_db(
+    karabo_id=karabo_id,
+    karabo_da=karabo_da,
+    constant=Constants.jungfrau.Offset(),
+    condition=condition,
+    cal_db_interface=cal_db_interface,
+    snapshot_at=creation_time)
+```
+
+%% Cell type:code id: tags:
+
+``` python
 # Use only high gain threshold for all gains in case of fixed_gain.

 if gain_mode:  # fixed_gain
    offset_abs_threshold = [[offset_abs_threshold_low[0]]*3, [offset_abs_threshold_high[0]]*3]
 else:
    offset_abs_threshold = [offset_abs_threshold_low, offset_abs_threshold_high]
 ```

 %% Cell type:code id: tags:

 ``` python
 context = psh.context.ThreadContext(num_workers=multiprocessing.cpu_count())
 ```

 %% Cell type:code id: tags:

 ``` python
 """
 All jungfrau runs are taken through one acquisition, except for the forceswitch runs.
 While taking non-fixed dark runs, a procedure of multiple acquisitions is used to switch the storage cell indices.

 This is done for medium and low gain dark dynamic runs, only [forceswitchg1, forceswitchg2]:
 Switching the cell indices in burst mode is a work around for hardware procedure
 deficiency that produces wrong data for dark runs except for the first storage cell.
 This is why multiple acquisitions are taken to switch the used storage cells and
 acquire data through two cells for each of the 16 cells instead of acquiring darks through all 16 cells.
 """

 print(f"Maximum trains to process is set to {max_trains}")

 noise_map = dict()
 offset_map = dict()
 bad_pixels_map = dict()

 for mod in karabo_da:
    step_timer.start()
    instrument_src = instrument_source_template.format(
        karabo_id, receiver_template.format(int(mod[-2:])))

    print(f"\n- Instrument data path for {mod} is {instrument_src}.")

    offset_map[mod] = context.alloc(shape=(sensor_size+(memory_cells, 3)), fill=0)
    noise_map[mod] = context.alloc(like=offset_map[mod], fill=0)
    bad_pixels_map[mod] = context.alloc(like=offset_map[mod], dtype=np.uint32, fill=0)

    for run_n, [gain, run_dc] in gain_runs.items():

        def process_cell(worker_id, array_index, cell_number):
            cell_slice_idx = acelltable == cell_number
            thiscell = images[..., cell_slice_idx]

            offset_map[mod][..., cell_number, gain] = np.mean(thiscell, axis=2)
            noise_map[mod][..., cell_number, gain] = np.std(thiscell, axis=2)

            # Check if there are wrong bad gain values.
            # Indicate pixels with wrong gain value across all trains for each cell.
            bad_pixels_map[mod][
                np.average(gain_vals[..., cell_slice_idx], axis=2) != raw_g] |= BadPixels.WRONG_GAIN_VALUE.value
        print(f"Gain stage {gain}, run {run_n}")

        # load shape of data for memory cells, and detector size (imgs, cells, x, y)
        n_imgs = run_dc[instrument_src, "data.adc"].shape[0]
        # load number of data available, including trains with empty data.
        n_trains = len(run_dc.train_ids)
        instr_dc = run_dc.select(instrument_src, require_all=True)
        empty_trains = n_trains - n_imgs
        if empty_trains != 0:
            print(
                f"\tWARNING: {mod} has {empty_trains} trains with empty data out of {n_trains} trains at "  # noqa
                f"{Path(run_dc[instrument_src, 'data.adc'].files[0].filename).parent}.")
        if max_trains > 0:
            n_imgs = min(n_imgs, max_trains)
        print(f"Processing {n_imgs} images.")
        # Select only requested number of images to process darks.
        instr_dc = instr_dc.select_trains(np.s_[:n_imgs])

        if n_imgs < min_trains:
            raise ValueError(
                f"Less than {min_trains} trains are available in RAW data."
                " Not enough data to process darks.")

        images = np.transpose(
            instr_dc[instrument_src, "data.adc"].ndarray(), (3, 2, 1, 0))
        acelltable = np.transpose(instr_dc[instrument_src, "data.memoryCell"].ndarray())
        gain_vals = np.transpose(
            instr_dc[instrument_src, "data.gain"].ndarray(), (3, 2, 1, 0))

        # define gain value as saved in raw gain map
        raw_g = 3 if gain == 2 else gain

        if memory_cells == 1:
            acelltable -= sc_start
        # Only for dynamic medium and low gain runs [forceswitchg1, forceswitchg2] in burst mode.

        if gain_mode == 0 and gain > 0 and memory_cells == 16:
            # 255 similar to the receiver which uses the 255
            # value to indicate a cell without an image.
            # image shape for forceswitchg1 and forceswitchg2 = (1024, 512, 2, trains)
            # compared to expected shape of (1024, 512, 16, trains) for high gain run.
            acelltable[1:] = 255

        # Calculate offset and noise maps
        context.map(process_cell, range(memory_cells))

    step_timer.done_step(f'Creating Offset and noise constants for a module.')
 ```

 %% Cell type:markdown id: tags:

 ## Offset and Noise Maps ##

 Below offset and noise maps for the high ($g_0$) gain stage are shown, alongside the distribution of these values. One expects block-like structures mapping to the ASICs of the detector

 %% Cell type:code id: tags:

 ``` python
 g_name = ['G0', 'G1', 'G2']
 g_range = [(0, 8000), (8000, 16000), (8000, 16000)]
 n_range = [(0., 50.), (0., 50.), (0., 50.)]

 unit = '[ADCu]'
 ```

 %% Cell type:code id: tags:

 ``` python
 # TODO: Fix plots arrangment and speed for Jungfrau burst mode.
 step_timer.start()
-for mod in karabo_da:
+for pdu, mod in zip(db_modules, karabo_da):
    for g_idx in gains:
        for cell in range(0, memory_cells):
            f_o0 = heatmapPlot(
                np.swapaxes(offset_map[mod][..., cell, g_idx], 0, 1),
                y_label="Row",
                x_label="Column",
                lut_label=unit,
                aspect=1.,
                vmin=g_range[g_idx][0],
                vmax=g_range[g_idx][1],
-                title=f'Pedestal {g_name[g_idx]} - Cell {cell:02d} - Module {mod}')
+                title=f'Pedestal {g_name[g_idx]} - Cell {cell:02d} - Module {mod} ({pdu})')

            fo0, ax_o0 = plt.subplots()
            res_o0 = histPlot(
                ax_o0, offset_map[mod][..., cell, g_idx],
                bins=800,
                range=g_range[g_idx],
                facecolor='b',
                histotype='stepfilled',
            )

            ax_o0.tick_params(axis='both',which='major',labelsize=15)
            ax_o0.set_title(
-                f'Module pedestal distribution - Cell {cell:02d} - Module {mod}',
+                f'Module pedestal distribution - Cell {cell:02d} - Module {mod} ({pdu})',
                fontsize=15)
            ax_o0.set_xlabel(f'Pedestal {g_name[g_idx]} {unit}',fontsize=15)
            ax_o0.set_yscale('log')

            f_n0 = heatmapPlot(
                np.swapaxes(noise_map[mod][..., cell, g_idx], 0, 1),
                y_label="Row",
                x_label="Column",
                lut_label= unit,
                aspect=1.,
                vmin=n_range[g_idx][0],
                vmax=n_range[g_idx][1],
-                title=f"RMS noise {g_name[g_idx]} - Cell {cell:02d} - Module {mod}",
+                title=f"RMS noise {g_name[g_idx]} - Cell {cell:02d} - Module {mod} ({pdu})",
            )

            fn0, ax_n0 = plt.subplots()
            res_n0 = histPlot(
                ax_n0,
                noise_map[mod][..., cell, g_idx],
                bins=100,
                range=n_range[g_idx],
                facecolor='b',
                histotype='stepfilled',
            )

            ax_n0.tick_params(axis='both', which='major', labelsize=15)
            ax_n0.set_title(
-                f'Module noise distribution - Cell {cell:02d} - Module {mod}',
+                f'Module noise distribution - Cell {cell:02d} - Module {mod} ({pdu})',
                fontsize=15)
            ax_n0.set_xlabel(
                f'RMS noise {g_name[g_idx]} ' + unit, fontsize=15)
            plt.show()
 step_timer.done_step(f'Plotting offset and noise maps.')
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixel Map ###

 The bad pixel map is deduced by comparing offset and noise of each pixel ($v_i$) and each gain ($g$) against the median value for that gain stage:

 $$
 v_i > \mathrm{median}(v_{k,g}) + n \sigma_{v_{k,g}}
 $$
 or
 $$
 v_i < \mathrm{median}(v_{k,g}) - n \sigma_{v_{k,g}}
 $$

 Values are encoded in a 32 bit mask, where for the dark image deduced bad pixels the following non-zero entries are relevant:

 %% Cell type:code id: tags:

 ``` python
 def print_bp_entry(bp):
    print("{:<30s} {:032b} -> {}".format(bp.name, bp.value, int(bp.value)))

 print_bp_entry(BadPixels.OFFSET_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.NOISE_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.OFFSET_NOISE_EVAL_ERROR)
 print_bp_entry(BadPixels.WRONG_GAIN_VALUE)

 def eval_bpidx(d):

    mdn = np.nanmedian(d, axis=(0, 1))[None, None, :, :]
    std = np.nanstd(d, axis=(0, 1))[None, None, :, :]
    idx = (d > badpixel_threshold_sigma*std+mdn) | (d < (-badpixel_threshold_sigma)*std+mdn)

    return idx
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

-for mod in karabo_da:
-    display(Markdown(f"### Badpixels for module {mod}:"))
+for pdu, mod in zip(db_modules, karabo_da):
+    display(Markdown(f"### Badpixels for module {mod} ({pdu}):"))
    offset_abs_threshold = np.array(offset_abs_threshold)

    bad_pixels_map[mod][eval_bpidx(offset_map[mod])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value

    bad_pixels_map[mod][~np.isfinite(offset_map[mod])] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    bad_pixels_map[mod][eval_bpidx(noise_map[mod])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value

    bad_pixels_map[mod][~np.isfinite(noise_map[mod])] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    bad_pixels_map[mod][(offset_map[mod] < offset_abs_threshold[0][None, None, None, :]) | (offset_map[mod] > offset_abs_threshold[1][None, None, None, :])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value  # noqa

    for g_idx in gains:
        for cell in range(memory_cells):
            bad_pixels = bad_pixels_map[mod][:, :, cell, g_idx]
            fn_0 = heatmapPlot(
                np.swapaxes(bad_pixels, 0, 1),
                y_label="Row",
                x_label="Column",
                lut_label=f"Badpixels {g_name[g_idx]} [ADCu]",
                aspect=1.,
                vmin=0, vmax=5,
-                title=f'G{g_idx} Bad pixel map - Cell {cell:02d} - Module {mod}')
+                title=f'G{g_idx} Bad pixel map - Cell {cell:02d} - Module {mod} ({pdu})')
 step_timer.done_step(f'Creating bad pixels constant and plotting it for a module.')
 ```

-%% Cell type:code id: tags:
-
-``` python
-# set the operating condition
-condition = Conditions.Dark.jungfrau(
-    memory_cells=memory_cells,
-    bias_voltage=bias_voltage,
-    integration_time=integration_time,
-    gain_setting=gain_setting,
-    gain_mode=gain_mode,
-)
-
-db_modules = get_pdu_from_db(
-    karabo_id=karabo_id,
-    karabo_da=karabo_da,
-    constant=Constants.jungfrau.Offset(),
-    condition=condition,
-    cal_db_interface=cal_db_interface,
-    snapshot_at=creation_time)
-```
-
 %% Cell type:markdown id: tags:

 ## Inject and save calibration constants

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 for mod, db_mod in zip(karabo_da, db_modules):
    constants = {
        'Offset': np.moveaxis(offset_map[mod], 0, 1),
        'Noise': np.moveaxis(noise_map[mod], 0, 1),
        'BadPixelsDark': np.moveaxis(bad_pixels_map[mod], 0, 1),
    }

    md = None

    for key, const_data in constants.items():

        const =  getattr(Constants.jungfrau, key)()
        const.data = const_data

        for parm in condition.parameters:
            if parm.name == "Integration Time":
                parm.lower_deviation = time_limits
                parm.upper_deviation = time_limits

        if db_output:
            md = send_to_db(
                db_module=db_mod,
                karabo_id=karabo_id,
                constant=const,
                condition=condition,
                file_loc=file_loc,
                report_path=report,
                cal_db_interface=cal_db_interface,
                creation_time=creation_time,
                timeout=cal_db_timeout,
            )
        if local_output:
            md = save_const_to_h5(
                db_module=db_mod,
                karabo_id=karabo_id,
                constant=const,
                condition=condition,
                data=const.data,
                file_loc=file_loc,
                report=report,
                creation_time=creation_time,
                out_folder=out_folder,
            )
            print(f"Calibration constant {key} is stored locally at {out_folder}.\n")

 print("Constants parameter conditions are:\n")
 print(
    f"• Bias voltage: {bias_voltage}\n"
    f"• Memory cells: {memory_cells}\n"
    f"• Integration time: {integration_time}\n"
    f"• Gain setting: {gain_setting}\n"
    f"• Gain mode: {gain_mode}\n"
    f"• Creation time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")  # noqa
 step_timer.done_step("Injecting constants.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

--- a/notebooks/ePix100/Characterize_Darks_ePix100_NBC.ipynb
+++ b/notebooks/ePix100/Characterize_Darks_ePix100_NBC.ipynb
--- a/setup.py
+++ b/setup.py
@@ -75,10 +75,9 @@ install_requires = [
        "nbparameterise==0.5",
        "notebook==6.1.5",
        "numpy==1.20.3",
-        "pasha==0.1.0",
+        "pasha==0.1.1",
        "prettytable==0.7.2",
        "princess==0.5",
-        "psutil==5.9.0",
        "pypandoc==1.4",
        "python-dateutil==2.8.1",
        "pyyaml==5.3",
@@ -94,7 +93,7 @@ install_requires = [

 if "readthedocs.org" not in sys.executable:
    install_requires += [
-        "iCalibrationDB @ git+ssh://git@git.xfel.eu:10022/detectors/cal_db_interactive.git@2.1.0",  # noqa
+        "iCalibrationDB @ git+ssh://git@git.xfel.eu:10022/detectors/cal_db_interactive.git@2.2.0",  # noqa
        "XFELDetectorAnalysis @ git+ssh://git@git.xfel.eu:10022/karaboDevices/pyDetLib.git@2.7.0",  # noqa
    ]


--- a/src/cal_tools/agipdlib.py
+++ b/src/cal_tools/agipdlib.py
 import os
 import posixpath
 import zlib
+from datetime import datetime
 from multiprocessing.pool import ThreadPool
 from typing import Any, Dict, List, Optional, Tuple

@@ -24,7 +25,8 @@ from cal_tools.agipdutils import (
 )
 from cal_tools.enums import AgipdGainMode, BadPixels, SnowResolution
 from cal_tools.h5_copy_except import h5_copy_except_paths
-from cal_tools.tools import get_constant_from_db_and_time
+from cal_tools.tools import get_from_db
+

 class AgipdCtrl:
    def __init__(
@@ -87,7 +89,6 @@ class AgipdCtrl:
            # float.
            # It is desired to loose precision here because the usage is
            # about bucketing the rate for managing meta-data.
-
            return round(float(self.run_dc[rep_rate_src].as_single_value()), 1)

        train_pulses = np.squeeze(
@@ -103,7 +104,7 @@ class AgipdCtrl:

    def get_gain_setting(
        self,
-        creation_time: "datetime.datetime",
+        creation_time: datetime,
    ) -> Optional[int]:
        """Retrieve Gain setting.

@@ -128,11 +129,12 @@ class AgipdCtrl:
            return

        if "gain.value" in self.run_dc.keys_for_source(self.ctrl_src):
-            return self.run_dc[self.ctrl_src, "gain.value"].as_single_value()
+            return int(
+                self.run_dc[self.ctrl_src, "gain"].as_single_value())

-        setupr = self.run_dc[self.ctrl_src, "setupr.value"].as_single_value()
+        setupr = self.run_dc[self.ctrl_src, "setupr"].as_single_value()
        pattern_type_idx = self.run_dc[
-            self.ctrl_src, "patternTypeIndex.value"].as_single_value()
+            self.ctrl_src, "patternTypeIndex"].as_single_value()

        if (setupr == 0 and pattern_type_idx < 4) or (
                setupr == 32 and pattern_type_idx == 4):
@@ -161,9 +163,8 @@ class AgipdCtrl:
            "gainModeIndex.value" in self.run_dc.keys_for_source(
                self.ctrl_src)
        ):
-            return AgipdGainMode(int(
-                self.run_dc.get_run_value(
-                    self.ctrl_src, "gainModeIndex.value")))
+            return AgipdGainMode(int(self.run_dc[
+                self.ctrl_src, "gainModeIndex"].as_single_value()))

        return AgipdGainMode.ADAPTIVE_GAIN

@@ -172,36 +173,56 @@ class AgipdCtrl:
        karabo_id_control: str,
        module: Optional[int] = 0
    ) -> int:
-        """Read the voltage information from the FPGA device of module 0.
+        """Read the voltage information from the RUN source of module 0.

        Different modules may operate at different voltages.
        In practice, they all operate at the same voltage.
        As such, it is okay to read a single module's value.

-        If the FPGA module source is not available, 300 will be returned.
-        300 is the default bias_voltage value before adding it to slow data.
+        If the FPGA/PSC RUN source is not available, 300 will be returned.
+        300 was the default bias_voltage value for
+        MID_DET_AGIPD1M-1 and SPB_DET_AGIPD1M-1.

-        :param karabo_id_control: The detector karabo id, for the control device.
+        :param karabo_id_control: The karabo deviceId for the CONTROL device.
        :param module: defaults to module 0
        :return: voltage, a uint16
        """
-        voltage_src = (
-            f"{karabo_id_control}/FPGA/M_{module}",
-            "highVoltage.actual.value")
+        # TODO: Add a breaking fix by passing the source and key through
+        # get_bias_voltage arguments.
+        if "AGIPD1M" in karabo_id_control:
+            voltage_src = (
+                f"{karabo_id_control[:-1]}/PSC/HV",
+                f"channels.U{module}.measurementSenseVoltage.value")
+            # TODO: Validate if removing this and depend on adding voltage value
+            # from the Notebook's first cell.
+            default_voltage = 300
+        else:  # AGIPD500K
+            voltage_src = (
+                f"{karabo_id_control}/FPGA/M_{module}",
+                "highVoltage.actual.value")
+            default_voltage = None

        if (
            voltage_src[0] in self.run_dc.all_sources and
            voltage_src[1] in self.run_dc.keys_for_source(voltage_src[0])
        ):
-
-            return self.run_dc[voltage_src].as_single_value(atol=1, reduce_by='max')
+            # Use RUN source for reading the bias voltage value.
+            # As HED_DET_AGIPD500K2G has a hardware issue that leads
+            # to storing arbitrary voltage values in the CONTROL source
+            # array. e.g. /gpfs/exfel/exp/HED/202230/p900248/raw
+            return self.run_dc.get_run_value(*voltage_src)
        else:
-            print(
-                "WARNING: Unable to read bias_voltage from"
-                f" {voltage_src[0]}/{voltage_src[1].replace('.','/')} "
-                "Returning 300 as default bias voltage value."
-            )
-            return 300
+            # TODO: Validate if removing this and
+            # and using NB value for old RAW data.
+            error = ("ERROR: Unable to read bias_voltage from"
+            f" {voltage_src[0]}/{voltage_src[1].replace('.','/')}.")
+
+            if default_voltage:
+                print(f"{error} Returning {default_voltage} "
+                "as default bias voltage value.")
+            else:
+                raise ValueError(error)
+            return default_voltage

    def get_integration_time(self) -> int:
        """Read integration time from the FPGA device.
@@ -216,8 +237,8 @@ class AgipdCtrl:
            'integrationTime.value' in self.run_dc.keys_for_source(
                self.ctrl_src)
        ):
-            return int(self.run_dc.get_run_value(
-                self.ctrl_src, 'integrationTime.value'))
+            return int(self.run_dc[
+                self.ctrl_src, 'integrationTime'].as_single_value())

        return 12

@@ -987,46 +1008,7 @@ class AgipdCorrections:
                                   data=cntsv,
                                   fletcher32=True)

-    def retrieve_constant_and_time(self, karabo_id: str, karabo_da: str,
-                                   const_dict: Dict[str, Any],
-                                   cal_db_interface: str,
-                                   creation_time: str
-                                   ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
-        """A wrapper around get_constant_from_db_and_time to get
-        constant image and creation time using a dictionary of
-        the names of the required constant from the data base
-        and their parameter operating conditions.
-
-        :param karabo_id: karabo identifier
-        :param karabo_da: karabo data aggregator
-        :param const_dict: (Dict) A dictionary with constants to retrieve
-        and their operating conditions.
-        e.g.{ "Offset": ["zeros", (128, 512, memory_cells, 3), "Dark"]
-              "Dark-cond": {Condition-name}: condition-value }
-        :param cal_db_interface: (Str) The port interface for
-        calibrationDBRemote
-        :param creation_time: (Str) Raw data creation time
-        :return:
-            cons_data: (np.array) Constant data
-            when: (Dict) A dictionary with all retrieved constants
-            and their creation-time. {"Constant-Name": "creation-time"}
-        """
-        when = {}
-        cons_data = {}
-        for cname, cval in const_dict.items():
-            condition = \
-                getattr(Conditions, cval[2][0]).AGIPD(**cval[2][1])
-            cons_data[cname], when[cname] = \
-                get_constant_from_db_and_time(karabo_id, karabo_da,
-                                              getattr(Constants.AGIPD, cname)(),  # noqa
-                                              condition,
-                                              getattr(np, cval[0])(cval[1]),
-                                              cal_db_interface,
-                                              creation_time,
-                                              print_once=0)
-        return cons_data, when
-
-    def init_constants(self, cons_data, when, module_idx):
+    def init_constants(self, cons_data, when, module_idx, variant):
        """
        For CI derived gain, a mean multiplication factor of 4.48 compared
        to medium gain is used, as no reliable CI data for all memory cells
@@ -1068,6 +1050,7 @@ class AgipdCorrections:
        :param when: A dictionary for the creation time
                     of each retrieved constant.
        :param module_idx: A module_idx index
+        :param variant: A dictionary for the variant of each retrieved CCV.
        :return:
        """

@@ -1241,23 +1224,31 @@ class AgipdCorrections:
        # string of the device name.
        cons_data = dict()
        when = dict()
+        variant = dict()
        db_module = const_yaml[karabo_da]["physical-detector-unit"]
        for cname, mdata in const_yaml[karabo_da]["constants"].items():
+            base_key = f"{db_module}/{cname}/0"
            when[cname] = mdata["creation-time"]
            if when[cname]:
                with h5py.File(mdata["file-path"], "r") as cf:
-                    cons_data[cname] = np.copy(cf[f"{db_module}/{cname}/0/data"])  # noqa
+                    cons_data[cname] = np.copy(cf[f"{base_key}/data"])
+                    # Set variant to 0 if the attribute is missing
+                    # as for old constants.
+                    if "variant" in cf[base_key].attrs.keys():
+                        variant[cname] = cf[base_key].attrs["variant"]
+                    else:
+                        variant[cname] = 0
            else:
                # Create empty constant using the list elements
                cons_data[cname] = getattr(np, mdata["file-path"][0])(mdata["file-path"][1])  # noqa

-        self.init_constants(cons_data, when, module_idx)
+        self.init_constants(cons_data, when, module_idx, variant)

        return when

    def initialize_from_db(self, karabo_id: str, karabo_da: str,
                           cal_db_interface: str,
-                           creation_time: 'datetime.datetime',
+                           creation_time: datetime,
                           memory_cells: float, bias_voltage: int,
                           photon_energy: float, gain_setting: float,
                           acquisition_rate: float, integration_time: int,
@@ -1330,11 +1321,32 @@ class AgipdCorrections:
            integration_time=integration_time
        )

-        cons_data, when = self.retrieve_constant_and_time(
-            karabo_id, karabo_da, const_dict, cal_db_interface, creation_time
-        )
+        when = {}
+        cons_data = {}
+        variant = {}
+
+        for cname, cval in const_dict.items():
+            condition = getattr(
+                Conditions, cval[2][0]).AGIPD(**cval[2][1])
+            cdata, md = get_from_db(
+                karabo_id=karabo_id,
+                karabo_da=karabo_da,
+                constant=getattr(Constants.AGIPD, cname)(),
+                condition=condition,
+                empty_constant=getattr(np, cval[0])(cval[1]),
+                cal_db_interface=cal_db_interface,
+                creation_time=creation_time,
+                verbosity=0,
+            )
+            cons_data[cname] = cdata
+            variant[cname] = md.calibration_constant_version.variant
+
+            when[cname] = None
+            # Read the CCV begin at if constant was retrieved successfully.
+            if md and md.comm_db_success:
+                when[cname] = md.calibration_constant_version.begin_at

-        self.init_constants(cons_data, when, module_idx)
+        self.init_constants(cons_data, when, module_idx, variant)

        return when


--- a/src/cal_tools/tools.py
+++ b/src/cal_tools/tools.py
@@ -13,13 +13,13 @@ from time import sleep
 from typing import List, Optional, Tuple, Union
 from urllib.parse import urljoin

-import dateutil.parser
 import h5py
 import ipykernel
 import numpy as np
 import requests
 import yaml
 import zmq
+from extra_data import RunDirectory
 from iCalibrationDB import ConstantMetaData, Versions
 from metadata_client.metadata_client import MetadataClient
 from notebook.notebookapp import list_running_servers
@@ -246,8 +246,7 @@ def get_notebook_name():


 def get_run_info(proposal, run):
-    """
-    Return information about run from the MDC
+    """Return information about run from the MDC

    :param proposal: proposal number
    :param run: run number
@@ -265,66 +264,132 @@ def get_run_info(proposal, run):
        base_api_url=mdc_config['base-api-url'],
    )

-    runs = mdc.get_proposal_runs(proposal_number=proposal,
-                                 run_number=run)
-    run_id = runs['data']['runs'][0]['id']
+    mdc_response = mdc.get_proposal_runs(
+        proposal_number=proposal, run_number=run)
+
+    if mdc_response["success"]:
+        return mdc_response
+    else:  # empty dictionary for wrong proposal or run.
+        raise KeyError(mdc_response['app_info'])
+
+
+def creation_date_metadata_client(
+    proposal: int, run: int
+) -> datetime.datetime:
+    """Get run directory creation date from myMDC using metadata client.
+    using method `get_proposal_runs`.
+
+    :param proposal: proposal number e.g. 2656 or 900113.
+    :param run: run number.
+    :return Optional[datetime.datetime]: Run creation date.
+    """
+
+    run_info = get_run_info(proposal, run)
+    return datetime.datetime.strptime(
+        run_info['data']['runs'][0]['begin_at'],
+        "%Y-%m-%dT%H:%M:%S.%f%z",
+    ).astimezone(datetime.timezone.utc)
+

-    resp = mdc.get_run_by_id_api(run_id)
-    return resp.json()
+def creation_date_file_metadata(
+    dc: RunDirectory
+) -> Optional[datetime.datetime]:
+    """Get run directory creation date from
+    METADATA/CreationDate of the oldest file using EXtra-data.
+    # TODO: update after DAQ store the same date as myMDC.
+
+    :param dc: EXtra-data DataCollection for the run directory.
+    :return Optional[datetime.datetime]: Run creation date.
+    """
+
+    md_dict = dc.run_metadata()
+    if md_dict["dataFormatVersion"] != "0.5":
+        oldest_file = sorted(
+            dc.files, key=lambda x: x.metadata()["creationDate"])[0]
+        return datetime.datetime.strptime(
+            oldest_file.metadata()["creationDate"],
+            "%Y%m%dT%H%M%S%z",
+        )
+    else:
+        print("WARNING: input files contains old datasets. "
+              "No `METADATA/creationDate` to read.")
+
+
+def creation_date_train_timestamp(
+    dc: RunDirectory
+) -> Optional[datetime.datetime]:
+    """Get creation date from the timestamp of the first train.
+
+    :param dc: EXtra-data DataCollection for the run directory.
+    :return Optional[datetime.datetime]: Run creation date.
+    """
+
+    creation_date = np.datetime64(
+        dc.select_trains(np.s_[0]).train_timestamps()[0], 'us').item()
+    if creation_date is None:
+        print("WARNING: input files contains old datasets without"
+              " trains timestamps.")
+        return None
+    return creation_date.replace(tzinfo=datetime.timezone.utc)


 def get_dir_creation_date(directory: Union[str, Path], run: int,
                          verbosity: int = 0) -> datetime.datetime:
-    """
-    Return run start time from MyDC.
-    If not available from MyMDC, retrieve the data from the dataset's metadata
-    in [directory]/[run] or, if the dataset is older than 2020, from the oldest
-    file's modified time.
+    """Get the directory creation data based on 3 different methods.
+
+    1) Return run start time from myMDC. (get_runtime_metadata_client)
+    2) If myMDC connection is not set,
+    get the date from the files metadata. (get_runtime_metadata_file)
+    3) If data files are older than 2020 (dataformatversion == "0.5"),
+    get the data from the oldest file's modified time.

    If the data is not available from either source,
-    this function will raise a ValueError.
+    this function will raise a FileNotFoundError.

-    :param directory: path to directory which contains runs
-    :param run: run number
+    :param directory: path to a directory which contains runs
+    (e.g. /gpfs/exfel/data/exp/callab/202031/p900113/raw/).
+    :param run: run number.
    :param verbosity: Level of verbosity (0 - silent)
-    :return: (datetime) modification time
+    :return: creation datetime for the directory.

    """
    directory = Path(directory)
+
    proposal = int(directory.parent.name[1:])
+    directory = directory / 'r{:04d}'.format(run)

+    # Validate the availability of the input folder.
+    # And show a clear error message, if it was not found.
    try:
-        run_info = get_run_info(proposal, run)
-        return dateutil.parser.parse(run_info['begin_at'])
+        dc = RunDirectory(directory)
+    except FileNotFoundError as e:
+        raise FileNotFoundError(
+            "- Failed to read creation time, wrong input folder",
+            directory) from e
+
+    try:
+        return creation_date_metadata_client(proposal, run)
    except Exception as e:
        if verbosity > 0:
            print(e)

-    directory = directory / 'r{:04d}'.format(run)
-
-    # Loop a number of times to catch stale file handle errors, due to
-    # migration or gpfs sync.
-    ntries = 100
-    while ntries > 0:
-        try:
-            rfiles = list(directory.glob('*.h5'))
-            # get creation time for oldest file,
-            # as creation time between run files
-            # should differ by a few seconds only.
-            rfile = sorted(rfiles, key=path.getmtime)[0]
-            with h5py.File(rfile, 'r') as fin:
-                cdate = fin['METADATA/creationDate'][0].decode()
-                cdate = datetime.datetime.strptime(
-                    cdate,
-                    "%Y%m%dT%H%M%SZ").replace(tzinfo=datetime.timezone.utc)
-            return cdate
-        except (IndexError, IOError, ValueError):
-            ntries -= 1
-        except KeyError:  # The files are here, but it's an older dataset
-            return datetime.datetime.fromtimestamp(rfile.stat().st_mtime)
-
-    msg = 'Could not get the creation time from the directory'
-    raise ValueError(msg, directory)
+    cdate = creation_date_train_timestamp(dc)
+
+    if cdate is not None:
+        # Exposing the method used for reading the creation_date.
+        print("Reading creation_date from input files metadata"
+              " `METADATA/creationDate`")
+    else:  # It's an older dataset.
+        print("Reading creation_date from last modification data "
+              "for the oldest input file.")
+        cdate = datetime.datetime.fromtimestamp(
+            sorted(
+                [Path(f.filename) for f in dc.files],
+                key=path.getmtime
+            )[0].stat().st_mtime,
+            tz=datetime.timezone.utc,
+        )
+    return cdate


 def _init_metadata(constant: 'iCalibrationDB.calibration_constant',
@@ -456,12 +521,11 @@ def get_pdu_from_db(karabo_id: str, karabo_da: Union[str, list],

    metadata = _init_metadata(constant, condition, None)

-    # A random interface is chosen if there is # for address range.
-    this_interface = get_random_db_interface(cal_db_interface)
    # CalibrationDBRemote expects a string.
    if snapshot_at is not None and hasattr(snapshot_at, 'isoformat'):
        snapshot_at = snapshot_at.isoformat()

+    # A random interface is chosen if there is # for address range.
    db_interface = get_random_db_interface(cal_db_interface)

    pdu_dicts = metadata.retrieve_pdus_for_detector(receiver=db_interface,
@@ -492,7 +556,7 @@ def get_pdu_from_db(karabo_id: str, karabo_da: Union[str, list],
            if i is None:
                db_modules.append(None)
            else:
-               db_modules.append(pdu_dicts[i]["pdu_physical_name"])
+                db_modules.append(pdu_dicts[i]["pdu_physical_name"])

    return db_modules

@@ -602,8 +666,10 @@ def get_from_db(karabo_id: str, karabo_da: str,
                        f" and data_set_name is {h5path}."
                    )
                with h5py.File(Path(hdf5path, filename), "r") as f:
-                    arr = f[f"{h5path}/data"][()]
-                metadata.calibration_constant.data = arr
+                    metadata.calibration_constant.data = f[f"{h5path}/data"][()]  # noqa
+                    # The variant attribute is missing for old constants.
+                    if "variant" in f[h5path].attrs.keys():
+                        metadata.calibration_constant_version.variant = f[h5path].attrs["variant"]  # noqa

            if verbosity > 0:
                if constant.name not in already_printed or verbosity > 1:
@@ -625,8 +691,9 @@ def send_to_db(db_module: str, karabo_id: str, constant, condition,
               creation_time: Optional[datetime.datetime] = None,
               timeout: int = 30000,
               ntries: int = 7,
-               doraise: bool = False):
-    """Return calibration constants and metadata requested from CalDB
+               doraise: bool = False,
+               variant: int = 0):
+    """Send new calibration constants and metadata requested to CalDB

    :param db_module: database module (PDU/Physical Detector Unit)
    :param karabo_id: karabo identifier
@@ -634,14 +701,16 @@ def send_to_db(db_module: str, karabo_id: str, constant, condition,
    :param condition: Calibration condition
    :param file_loc: Location of raw data.
    :param report_path: xfel-calbrate report path to inject along with
-                        the calibration constant versions to the database.
+        the calibration constant versions to the database.
    :param cal_db_interface: Interface string, e.g. "tcp://max-exfl016:8015"
    :param creation_time: Latest time for constant to be created
    :param timeout: Timeout for zmq request
    :param ntries: number of tries to contact the database,
-           ntries is set to 7 so that if the timeout started
-           at 30s last timeout will be ~ 1h.
+        ntries is set to 7 so that if the timeout started
+        at 30s last timeout will be ~ 1h.
    :param doraise: if True raise errors during communication with DB
+    :param variant: A calibration constant version variant attribute
+        for the constant file.
    """

    success = False
@@ -670,6 +739,7 @@ def send_to_db(db_module: str, karabo_id: str, constant, condition,
        metadata.calibration_constant_version.device_name = db_module
        metadata.calibration_constant_version.karabo_da = None
        metadata.calibration_constant_version.raw_data_location = file_loc
+        metadata.calibration_constant_version.variant = variant
        if constant.data is None:
            raise ValueError(
                "There is no data available to "

--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -13,6 +13,13 @@ def pytest_addoption(parser):
        help="Skips tests marked as requiring GPFS access",
    )

+    parser.addoption(
+        "--no-mdc",
+        action="store_true",
+        default=True,
+        help="Skips tests marked as requiring myMDC access",
+    )
+

 def pytest_configure(config):
    config.addinivalue_line(
@@ -25,6 +32,11 @@ def pytest_configure(config):
        "requires_caldb(): marks skips for tests that require calDB access",
    )

+    config.addinivalue_line(
+        "markers",
+        "requires_mdc(): marks skips for tests that require calDB access",
+    )
+

 @lru_cache()
 def server_reachable(server: str = "max-exfl017"):
@@ -46,3 +58,9 @@ def pytest_runtest_setup(item):

    if list(item.iter_markers(name="requires_caldb")) and not server_reachable():
        pytest.skip("caldb not available")
+
+    if (
+        list(item.iter_markers(name="requires_mdc")) and
+        item.config.getoption("--no-mdc")
+    ):
+        pytest.skip("myMDC not available")
--- a/tests/test_cal_tools.py
+++ b/tests/test_cal_tools.py
@@ -5,11 +5,15 @@ from unittest.mock import patch
 import numpy as np
 import pytest
 import zmq
+from extra_data import open_run
 from iCalibrationDB import Conditions, ConstantMetaData, Constants

 from cal_tools.agipdlib import AgipdCorrections, CellRange
 from cal_tools.plotting import show_processed_modules
 from cal_tools.tools import (
+    creation_date_file_metadata,
+    creation_date_metadata_client,
+    creation_date_train_timestamp,
    get_dir_creation_date,
    get_from_db,
    get_pdu_from_db,
@@ -31,6 +35,7 @@ WRONG_AGIPD_MODULE = "AGIPD_**"
 CAL_DB_INTERFACE = "tcp://max-exfl017:8020"
 WRONG_CAL_DB_INTERFACE = "tcp://max-exfl017:0000"

+PROPOSAL = 900113

 @pytest.fixture
 def _agipd_const_cond():
@@ -57,20 +62,63 @@ def test_show_processed_modules():

 @pytest.mark.requires_gpfs
 def test_dir_creation_date():
+    """This test is based on not connecting to MDC and failing to use
+    `creation_date_metadata_client()`
+    """
    folder = '/gpfs/exfel/exp/CALLAB/202031/p900113/raw'

    date = get_dir_creation_date(folder, 9983)
    assert isinstance(date, datetime)
-    assert str(date) == '2020-09-23 13:30:50+00:00'
+    assert str(date) == '2020-09-23 13:30:45.821262+00:00'

-    with pytest.raises(ValueError) as e:
+    # The following data predates the addition of creation_time in metadata
+    date = get_dir_creation_date(folder, 9999)
+    assert isinstance(date, datetime)
+    assert str(date) == '2019-12-16 07:52:25.196603+00:00'
+
+
+@pytest.mark.requires_gpfs
+def test_raise_dir_creation_date():
+    folder = '/gpfs/exfel/exp/CALLAB/202031/p900113/raw'
+
+    with pytest.raises(FileNotFoundError) as e:
        get_dir_creation_date(folder, 4)
    assert e.value.args[1] == Path(folder) / 'r0004'

-    # The following data predates the addition of creation_time in metadata
-    date = get_dir_creation_date(folder, 9999)
+
+@pytest.mark.requires_mdc
+def test_creation_date_metadata_client():
+
+    date = creation_date_metadata_client(PROPOSAL, 9983)
+    assert isinstance(date, datetime)
+    assert str(date) == '2020-09-23 13:30:00+00:00'
+
+
+@pytest.mark.requires_gpfs
+def test_creation_date_file_metadata():
+
+    date = creation_date_file_metadata(open_run(PROPOSAL, 9983))
    assert isinstance(date, datetime)
-    assert str(date) == '2019-12-16 08:52:25.196603'
+    assert str(date) == '2020-09-23 13:30:50+00:00'
+
+    # Old run without METADATA/CreationDate
+    date = creation_date_file_metadata(open_run(PROPOSAL, 9999))
+
+    assert date is None
+
+
+@pytest.mark.requires_gpfs
+def test_creation_date_train_timestamp():
+
+    date = creation_date_train_timestamp(open_run(PROPOSAL, 9983))
+
+    assert isinstance(date, datetime)
+    assert str(date) == '2020-09-23 13:30:45.821262+00:00'
+
+    # Old run without trainId timestamps
+    date = creation_date_train_timestamp(open_run(PROPOSAL, 9999))
+
+    assert date is None


 def _call_get_from_db(
@@ -132,7 +180,7 @@ def _call_send_to_db(
    return metadata


-# TODO add a marker for accessing zmq end_point
+@pytest.mark.requires_caldb
 @pytest.mark.requires_gpfs
 def test_get_from_db_load_data(_agipd_const_cond):
    """ Test retrieving calibration constants with get_from_db
@@ -169,7 +217,7 @@ def test_get_from_db_load_data(_agipd_const_cond):
    assert isinstance(md, ConstantMetaData)


-# TODO add a marker for accessing zmq end_point
+@pytest.mark.requires_caldb
 @pytest.mark.requires_gpfs
 def test_raise_get_from_db(_agipd_const_cond):
    """ Test error raised scenarios for get_from_db:"""

--- a/tests/test_webservice.py
+++ b/tests/test_webservice.py
@@ -14,6 +14,7 @@ from webservice.webservice import (  # noqa: import not at top of file
    run_action,
    wait_on_transfer,
    get_slurm_partition,
+    get_slurm_nice
 )


@@ -143,13 +144,15 @@ async def test_wait_on_transfer_exceptions(
        ('sim', ['DARK', '1', '2', '3', '4'], 1, "success: simulated"),
    ],
 )
-async def test_run_action(mode, cmd, retcode, expected):
+async def test_run_action(mode, cmd, retcode, expected, monkeypatch):
    job_db = mock.Mock()

    async def mock_run_proc_async(*args):
        return retcode, b'Submitted job: 42'

-    webservice.webservice.run_proc_async = mock_run_proc_async
+    monkeypatch.setattr(
+        webservice.webservice, 'run_proc_async', mock_run_proc_async
+    )
    ret = await run_action(job_db, cmd, mode, 1, 1, 1)
    assert ret.lower().startswith(expected)

@@ -177,3 +180,44 @@ async def test_get_slurm_partition(proposal_number,

    ret = await get_slurm_partition(client, action, proposal_number)
    assert ret == expected_result
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    'cycle, num_jobs, expected_result',
+    [
+        ('202201', 0, 0), ('202201', 10, 3*10**2),  # user proposal
+        ('202221', 0, 5), ('202221', 10, 5+3*10**2),  # commissioning
+    ]
+)
+async def test_get_slurm_nice_values(fp, cycle, num_jobs, expected_result):
+    """ Test get_slurm_nice values."""
+
+    fp.register(
+        ['squeue', '-h', '-o', '%.20j', '-p', 'upex-higher', '--me'],
+        stdout='\n'.join(
+            [f'correct_SPB_{i}' for i in range(num_jobs)] +
+            [f'correct_FXE_{i}' for i in range(num_jobs*2)]).encode('ascii'),
+        returncode=0)
+
+    ret = await get_slurm_nice(
+        'upex-higher', 'SPB', cycle, job_penalty=3, commissioning_penalty=5)
+    assert ret == expected_result
+
+@pytest.mark.asyncio
+async def test_get_slurm_nice_fails(fp):
+    """Test corner cases for get_slurm_nice."""
+
+    # non-zero returncode
+    fp.register(
+        ['squeue', '-h', '-o', '%.20j', '-p', 'upex-higher', '--me'],
+        stdout='', returncode=1)
+
+    assert await get_slurm_nice('upex-higher', 'SPB', '202201') == 0
+
+    # exfel is special
+    fp.register(
+        ['squeue', '-h', '-o', '%.20j', '-p', 'exfel', '--me'],
+        stdout='\n'.join([f'correct_SPB_{i}' for i in range(10)]),
+        returncode=0)
+
+    assert await get_slurm_nice('exfel', 'SPB', '202201') == 0
--- a/webservice/config/webservice.yaml
+++ b/webservice/config/webservice.yaml
@@ -31,7 +31,8 @@ kafka:
 correct:
    in-folder: /gpfs/exfel/exp/{instrument}/{cycle}/p{proposal}/raw
    out-folder: /gpfs/exfel/d/proc/{instrument}/{cycle}/p{proposal}/{run}
-    sched-prio: 80
+    commissioning-penalty: 1250
+    job-penalty: 2
    cmd : >-
        python -m xfel_calibrate.calibrate {detector} CORRECT
        --slurm-scheduling {sched_prio}
@@ -45,7 +46,8 @@ correct:
 dark:
    in-folder: /gpfs/exfel/exp/{instrument}/{cycle}/p{proposal}/raw
    out-folder: /gpfs/exfel/u/usr/{instrument}/{cycle}/p{proposal}/dark/runs_{runs}
-    sched-prio: 10
+    commissioning-penalty: 1250
+    job-penalty: 2
    cmd: >-
        python -m xfel_calibrate.calibrate {detector} DARK
        --concurrency-par karabo_da

--- a/webservice/webservice.py
+++ b/webservice/webservice.py
@@ -669,6 +669,55 @@ async def get_slurm_partition(mdc: MetadataClient,
    return partition


+async def get_slurm_nice(partition: str, instrument: str,
+                         cycle: Union[int, str], job_penalty: int = 2,
+                         commissioning_penalty: int = 1250) -> int:
+    """Compute priority adjustment based on cycle and number of running
+       jobs.
+
+       The nice value is computed with
+           base_penalty + job_penalty * num_jobs**2
+
+       base_penalty is 0 for user proposals and commissioning_penalty
+       for commissioning proposals. The number of jobs is computed by
+       calling `squeue` and counting based on job name.
+
+       The default penalty values give commissioning proposals a
+       penalty of 25 running jobs.
+
+       :param partition: Partition to run jobs in.
+       :param instrument: Instrument to run jobs for.
+       :param cycle: Cycle of proposal to run jobs for.
+       :param job_penalty: Scaling factor per job, 2 by default.
+       :param commissioning_penalty: Base penalty for commissioning,
+           1250 by default.
+       :return: Nice value to be passed to sbatch --nice
+    """
+
+    if partition == 'exfel':
+        return 0  # Don't apply degressive priority on exfel.
+
+    # List all names for jobs running in the specified partition.
+    returncode, job_names = await run_proc_async(
+        ['squeue', '-h', '-o', '%.20j', '-p', partition, '--me'])
+
+    if returncode != 0:
+        logging.error(f'Non-zero return code {returncode} from '
+                      f'`squeue` upon counting number of jobs')
+        return 0  # Fallback if something went wrong.
+
+    # Base value depending on proposal type using cycle, assuming that
+    # user proposals follow the pattern xxxx0y, while different kinds of
+    # commissioning proposals use xxxx2y or xxxx3y.
+    base_nice = 0 if str(cycle)[4] == '0' else commissioning_penalty
+
+    # Count number of jobs
+    num_jobs = sum((1 for job in job_names.decode('ascii').split('\n')
+                    if f'correct_{instrument}' in job))
+
+    return base_nice + num_jobs**2 * job_penalty
+
+
 async def update_darks_paths(mdc: MetadataClient, rid: int, in_path: str,
                             out_path: str, report_path: str):
    """Update data paths in MyMDC to provide Globus access
@@ -1209,6 +1258,10 @@ class ActionsServer:
        ret = []

        partition = await get_slurm_partition(self.mdc, action, proposal)
+        nice = await get_slurm_nice(
+            partition, instrument, cycle,
+            commissioning_penalty=self.config[action]['commissioning-penalty'],
+            job_penalty=self.config[action]['job-penalty'])

        # run xfel_calibrate
        for karabo_id, dconfig in detectors.items():
@@ -1216,7 +1269,7 @@ class ActionsServer:
            del dconfig['detector-type']
            cmd = self.config[action]['cmd'].format(
                detector=detector,
-                sched_prio=str(self.config[action]['sched-prio']),
+                sched_prio=nice,
                partition=partition,
                action=action, instrument=instrument,
                cycle=cycle, proposal=proposal,
No results found