Compare revisions

Thomas Kluyver · Thomas Kluyver · Thomas Kluyver · Thomas Kluyver · Thomas Kluyver · Thomas Kluyver
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -17,7 +17,7 @@ repos:
      # If `CI_MERGE_REQUEST_TARGET_BRANCH_SHA` env var is set then this will
      # run flake8 on the diff of the merge request, otherwise it will run
      # flake8 as it would usually execute via the pre-commit hook
-      entry: bash -c 'if [ -z ${CI_MERGE_REQUEST_TARGET_BRANCH_SHA} ]; then (flake8 "$@"); else (git diff $CI_MERGE_REQUEST_TARGET_BRANCH_SHA...$CI_MERGE_REQUEST_SOURCE_BRANCH_SHA | flake8 --diff); fi' --
+      entry: bash -c 'if [ -z ${CI_MERGE_REQUEST_TARGET_BRANCH_SHA} ]; then (flake8 "$@" --max-line-length 88); else (git diff $CI_MERGE_REQUEST_TARGET_BRANCH_SHA...$CI_MERGE_REQUEST_SOURCE_BRANCH_SHA | flake8 --diff --max-line-length 88); fi' --
  - repo: https://github.com/myint/rstcheck
    rev: 3f92957478422df87bd730abde66f089cc1ee19b  # commit where pre-commit support was added
    hooks:

--- a/docs/references/changelog.md
+++ b/docs/references/changelog.md
 # Release Notes

+## 3.13.0
+
+- [AGIPD][PC] Fixing ValueError.
+- [Gotthard2][Dark] Sort dark runs.
+- [ePix100][Correct] Correcting one train for epix100 and storing a list of one pulseId.
+- [Doc] Change log update with latest releases.
+- [Gotthard2][Correct] Add `idx` to `da_to_pdu` to use in picking correct `data_source`.
+- [LPD][Dark] Sort Dark runs by Gain.
+- [Webservice] Use AW status on myMdC for warnings that prevent launching correction.
+- [Webservice] Move list of three gain/run detectors into webservice config yaml.
+- [Webservice] add environment bin/ to PATH if not already there.
+- Convert request time to local timezone in report.
+- Don't keep DEBUG level logging from requests_oauthlib.
+- Avoid using `run_metadata()` by default, to not fail for EXDF-v0.5 files.
+
+## 3.12.6
+
+- [Jungfrau][Correct] Avoid NAN disturbance for correction plots scale
+- [Timepix] Updating Timepix3 Calibration Pipeline for compatibility with new Karabo Implementation + some general improvements
+- [Timepix] Allow pipelines to be part of nodes in DataFile API
+- [Timepix] Throw pasha at it
+- [Timepix] Store labels assigned to pixels
+
+## 3.12.5
+
+- SPARTA AGIPD Single Module integration
+
+## 3.12.4
+
+- [Gotthard2][Correct] Store mask data properly for gotthard2 25um
+- Make the run type regex for skipping JF darks less aggressive
+
 ## 3.12.3

 - [webservice] Update Kafka config to use EuXFEL broker

--- a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
+++ b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
 %% Cell type:markdown id: tags:

 # AGIPD Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/MID/202201/p002834/raw" # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/esobolev/pycal_litfrm/p002834/r0225"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 overwrite = False  # IGNORED, NEEDED FOR COMPATIBILITY.
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 train_ids = [-1] # train IDs to correct, set to -1 for all, range allowed
 run = 225 # runs to process, required

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_template = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 index_source_template = 'INDEX/{}/DET/{}:xtdf/'  # path in the HDF5 file to images
 ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information
 karabo_id_control = "MID_EXP_AGIPD1M1" # karabo-id for control device

 slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants, loaded in precorrection notebook

 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
 cal_db_interface = "tcp://max-exfl-cal001:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milliseconds
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
-cal_db_root = '/gpfs/exfel/d/cal/caldb_store'  # The calibration database root path to access constant files. For example accessing constants from the test database.
+cal_db_root = ""  # The calibration database root path to access constant files. e.g. accessing constants from the test database /gpfs/exfel/d/cal_tst/caldb_store.

 mem_cells = -1  # Number of memory cells used, set to 0 to automatically infer
 bias_voltage = -1  # bias voltage, set to 0 to use stored value in slow data.
 acq_rate = -1. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)
 max_pulses = [0, 352, 1] # range list [st, end, step] of memory cell indices to be processed within a train. 3 allowed maximum list input elements.
 mem_cells_db = -1  # set to a value different than 0 to use this value for DB queries
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction parameters
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
 cm_dark_fraction = 0.66 # threshold for fraction of  empty pixels to consider module enough dark to perform CM correction
 cm_dark_range = [-50.,30] # range for signal value ADU for pixel to be consider as a dark pixel
 cm_n_itr = 4 # number of iterations for common mode correction
 hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
 mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
 noisy_adc_threshold = 0.25 # threshold to mask complete adc
 ff_gain = 7.2 # conversion gain for absolute FlatField constants, while applying xray_gain
 photon_energy = -1.0 # photon energy in keV, non-positive value for XGM autodetection
 rounding_threshold = 0.5 # the fraction to round to down, 0.5 for standard rounding rule

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 zero_nans = False # set NaN values in corrected data to 0
 zero_orange = False # set to 0 very negative and very large values in corrected data
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
 corr_asic_diag = False # if set, diagonal drop offs on ASICs are corrected
 force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold
 force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold
 mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
 common_mode = False # Common mode correction
 melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
 mask_zero_std = False # Mask pixels with zero standard deviation across train
 low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain
 round_photons = False  # Round to absolute number of photons, only use with gain corrections

 # Optional auxiliary devices
 use_ppu_device = ''  # Device ID for a pulse picker device to only process picked trains, empty string to disable
 ppu_train_offset = 0  # When using the pulse picker, offset between the PPU's sequence start and actually picked train
 require_ppu_trigger = False  # Optional protection against running without PPU or without triggering trains.

 use_litframe_finder = 'off' # Process only illuminated frames: 'off' - disable, 'device' - use online device data, 'offline' - use offline algorithm, 'auto' - choose online/offline source automatically (default)
 litframe_device_id = '' # Device ID for a lit frame finder device, empty string to auto detection
 energy_threshold = -1000 # The low limit for the energy (uJ) exposed by frames subject to processing. If -1000, selection by pulse energy is disabled
 use_super_selection = 'cm' # Make a common selection for entire run: 'off' - disable, 'final' - enable for final selection, 'cm' - enable only for common mode correction

 use_xgm_device = ''  # DoocsXGM device ID to obtain actual photon energy, operating condition else.

 # Output parameters
 recast_image_data = ''  # Cast data to a different dtype before saving
 compress_fields = ['gain', 'mask']  # Datasets in image group to compress.

 # Plotting parameters
 skip_plots = False # exit after writing corrected files and metadata
 cell_id_preview = 1 # cell Id used for preview in single-shot plots
 cmap = "viridis"  # matplolib.colormap for almost all heatmap. Other options ['plasma', 'inferno', 'magma', 'cividis', 'jet', ...]

 # Parallelization parameters
 chunk_size = 1000  # Size of chunk for image-wise correction
 n_cores_correct = 16 # Number of chunks to be processed in parallel
 n_cores_files = 4 # Number of files to be processed in parallel
 sequences_per_node = 2 # number of sequence files per cluster node if run as SLURM job, set to 0 to not run SLURM parallel
 max_nodes = 8 # Maximum number of SLURM jobs to split correction work into
 max_tasks_per_worker = 1  # the number of tasks a correction pool worker process can complete before it will exit and be replaced with a fresh worker process. Leave as -1 to keep worker alive as long as pool.


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import math
 import multiprocessing
 import os
 import warnings
 from datetime import timedelta
 from logging import warning
 from pathlib import Path

 import tabulate
 from dateutil import parser
 from IPython.display import Latex, Markdown, display

 warnings.filterwarnings('ignore')
 import h5py
 import matplotlib
 import matplotlib.pyplot as plt
 import yaml
 from extra_data import by_id, RunDirectory, stack_detector_data
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from matplotlib.colors import LogNorm

 matplotlib.use("agg")
 %matplotlib inline
 import numpy as np
 import seaborn as sns

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")

 import cal_tools.restful_config as rest_cfg
 from cal_tools import agipdalgs as calgs
 from cal_tools.agipdlib import (
    AgipdCorrections,
    AgipdCtrl,
    CellRange,
    LitFrameSelection,
 )
 from cal_tools.ana_tools import get_range
 from cal_tools.calcat_interface import (
    AGIPD_CalibrationData,
    CalCatError,
 )
 from cal_tools.enums import AgipdGainMode, BadPixels
 from cal_tools.plotting import agipd_single_module_geometry
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    calcat_creation_time,
    map_modules_from_folder,
    module_index_to_qm,
    write_constants_fragment,
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_folder = in_folder / f'r{run:04d}'

 step_timer = StepTimer()
 ```

 %% Cell type:markdown id: tags:

 ## Evaluated parameters ##

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the hierarchy and dependability for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["zero_nans"] = zero_nans
    corr_bools["zero_orange"] = zero_orange
    corr_bools["mask_noisy_adc"] = mask_noisy_adc
    corr_bools["force_hg_if_below"] = force_hg_if_below
    corr_bools["force_mg_if_below"] = force_mg_if_below
    corr_bools["common_mode"] = common_mode
    corr_bools["melt_snow"] = melt_snow
    corr_bools["mask_zero_std"] = mask_zero_std
    corr_bools["low_medium_gap"] = low_medium_gap
    corr_bools["round_photons"] = round_photons

 # Many corrections don't apply to fixed gain mode; will explicitly disable later if detected
 disable_for_fixed_gain = [
    "adjust_mg_baseline",
    "blc_set_min",
    "force_hg_if_below",
    "force_mg_if_below",
    "low_medium_gap",
    "melt_snow",
    "rel_gain"
 ]
 ```

 %% Cell type:code id: tags:

 ``` python
 if sequences == [-1]:
    sequences = None

 dc = RunDirectory(run_folder)

 ctrl_src = ctrl_source_template.format(karabo_id_control)
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 index_src = index_source_template.format(karabo_id, receiver_template)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create output folder
 out_folder.mkdir(parents=True, exist_ok=True)

 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if "AGIPD1M" in karabo_id:
    nmods = 16
 elif "AGIPD500K" in karabo_id:
    nmods = 8
 else:
    nmods = 1

 # Evaluate requested modules
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    mod_indices = modules if nmods > 1 else [0]
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:  # TODO: fix this with the new CALCAT metadata for module indices.
    modules = [int(x[-2:]) for x in karabo_da]
    mod_indices = modules if nmods > 1 else [0]

 print("Process modules:", ', '.join(module_index_to_qm(x) for x in mod_indices))
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_ppu_device and use_ppu_device in dc.control_sources:
    # Obtain trains to process if using a pulse picker device and it's present.

    seq_start = dc[use_ppu_device, 'trainTrigger.sequenceStart.value'].ndarray()

    # The trains picked are the unique values of trainTrigger.sequenceStart
    # minus the first (previous trigger before this run).
    start_train_ids = np.unique(seq_start)[1:] + ppu_train_offset

    train_ids = []
    for train_id in start_train_ids:
        n_trains = dc[
            use_ppu_device, 'trainTrigger.numberOfTrains'
        ].select_trains(by_id[[train_id]]).ndarray()[0]
        train_ids.extend(list(range(train_id, train_id + n_trains)))

    if train_ids:
        print(f'PPU device {use_ppu_device} triggered for {len(train_ids)} train(s)')
    elif require_ppu_trigger:
        raise RuntimeError(f'PPU device {use_ppu_device} not triggered but required, aborting!')
    else:
        print(f'PPU device {use_ppu_device} not triggered, processing all valid trains')
        train_ids = None

 elif use_ppu_device:
    # PPU configured but not present.

    if require_ppu_trigger:
        raise RuntimeError(f'PPU device {use_ppu_device} required but not found, aborting!')
    else:
        print(f'PPU device {use_ppu_device} configured but not found, processing all valid trains')
        train_ids = None

 elif train_ids != [-1]:
    # Specific trains passed by parameter, convert to ndarray.
    train_ids = np.array(train_ids)

    print(f'Processing up to {len(train_ids)} manually selected train(s)')

 else:
    # No PPU configured.
    print(f'Processing all valid trains')
    train_ids = None
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mapped_files, _, total_sequences, _, _ =  map_modules_from_folder(
    str(in_folder), run, path_template, karabo_da, sequences
 )
 file_list = []

 # ToDo: Split table over pages
 print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
 table = []
 ti = 0
 for k, files in mapped_files.items():
    i = 0
    for f in list(files.queue):
        file_list.append(f)
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["#", "module", "# module", "file"])))
 file_list = sorted(file_list, key=lambda name: name[-10:])
 ```

 %% Cell type:code id: tags:

 ``` python
 first_mod_channel = sorted(modules)[0]

 instrument_src_mod = [
    s for s in list(dc.all_sources) if f"{first_mod_channel}CH" in s][0]

 agipd_cond = AgipdCtrl(
    run_dc=dc,
    image_src=instrument_src_mod,
    ctrl_src=ctrl_src,
    raise_error=False,  # to be able to process very old data without gain_setting value
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 offset = parser.parse(creation_date_offset)
 delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
 creation_time += delta
 print(f"Creation time: {creation_time}")

 if acq_rate == -1.:
    acq_rate = agipd_cond.get_acq_rate()
 if mem_cells == -1:
    mem_cells = agipd_cond.get_num_cells()
 # TODO: look for alternative for passing creation_time
 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == -1:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()
 if gain_mode == -1:
    gain_mode = agipd_cond.get_gain_mode()
 else:
    gain_mode = AgipdGainMode(gain_mode)
 ```

 %% Cell type:code id: tags:

 ``` python
 if mem_cells is None:
    raise ValueError(f"No raw images found for {instrument_src_mod}")

 mem_cells_db = mem_cells if mem_cells_db == -1 else mem_cells_db

 print(f"Maximum memory cells to calibrate: {mem_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Using {creation_time} as creation time")
 print("Operating conditions are:")
 print(f"• Bias voltage: {bias_voltage}")
 print(f"• Memory cells: {mem_cells_db}")
 print(f"• Acquisition rate: {acq_rate}")
 print(f"• Gain setting: {gain_setting}")
 print(f"• Gain mode: {gain_mode.name}")
 print(f"• Integration time: {integration_time}")
 print(f"• Photon Energy: 9.2")
 ```

 %% Cell type:code id: tags:

 ``` python
 if gain_mode:
    for to_disable in disable_for_fixed_gain:
        if corr_bools.get(to_disable, False):
            warning(f"{to_disable} correction was requested, but does not apply to fixed gain mode")
            corr_bools[to_disable] = False
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_litframe_finder != 'off':
    from extra_redu import make_litframe_finder, LitFrameFinderError

    if use_litframe_finder not in ['auto', 'offline', 'online']:
        raise ValueError("Unexpected value in 'use_litframe_finder'.")

    inst = karabo_id_control[:3]
    litfrm = make_litframe_finder(inst, dc, litframe_device_id)
    try:
        get_data = {'auto': litfrm.read_or_process, 'offline': litfrm.process, 'online': litfrm.read}
        r = get_data[use_litframe_finder]()
        cell_sel = LitFrameSelection(r, train_ids, max_pulses, energy_threshold, use_super_selection)
        cell_sel.print_report()
    except LitFrameFinderError as err:
        warning(f"Cannot use AgipdLitFrameFinder due to:\n{err}")
        cell_sel = CellRange(max_pulses, max_cells=mem_cells)
 else:
    # Use range selection
    cell_sel = CellRange(max_pulses, max_cells=mem_cells)

 print(cell_sel.msg())
 ```

 %% Cell type:code id: tags:

 ``` python
 if round_photons and photon_energy <= 0.0:
    if use_xgm_device:
        # Try to obtain photon energy from XGM device.
        wavelength_data = dc[use_xgm_device, 'pulseEnergy.wavelengthUsed']

        try:
            from scipy.constants import h, c, e

            # Read wavelength as a single value and convert to hv.
            photon_energy = (h * c / e) / (wavelength_data.as_single_value(rtol=1e-2) * 1e-6)
            print(f'Obtained photon energy {photon_energy:.3f} keV from {use_xgm_device}')
        except ValueError:
            warning('XGM source available but photon energy varies greater than 1%, '
                 'photon rounding disabled!')
            round_photons = False
    else:
        warning('Neither explicit photon energy nor XGM device configured, photon rounding disabled!')
        round_photons = False
 elif round_photons:
    print(f'Photon energy for rounding: {photon_energy:.3f} keV')

 if round_photons and (rounding_threshold <= .0 or 1. <= rounding_threshold):
    warning('Round threshould is out of (0, 1) range. Use standard 0.5 value.')
    rounding_threshold = 0.5
 ```

 %% Cell type:code id: tags:

 ``` python
 agipd_corr = AgipdCorrections(
    mem_cells,
    cell_sel,
    h5_data_path=instrument_src,
    h5_index_path=index_src,
    corr_bools=corr_bools,
    gain_mode=gain_mode,
    comp_threads=os.cpu_count() // n_cores_files,
    train_ids=train_ids
 )

 agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
 agipd_corr.hg_hard_threshold = hg_hard_threshold
 agipd_corr.mg_hard_threshold = mg_hard_threshold

 agipd_corr.cm_dark_min = cm_dark_range[0]
 agipd_corr.cm_dark_max = cm_dark_range[1]
 agipd_corr.cm_dark_fraction = cm_dark_fraction
 agipd_corr.cm_n_itr = cm_n_itr
 agipd_corr.noisy_adc_threshold = noisy_adc_threshold
 agipd_corr.ff_gain = ff_gain
 agipd_corr.photon_energy = photon_energy
 agipd_corr.rounding_threshold = rounding_threshold

 agipd_corr.compress_fields = compress_fields
 if recast_image_data:
    agipd_corr.recast_image_fields['data'] = np.dtype(recast_image_data)
 ```

 %% Cell type:markdown id: tags:

 ## Retrieving constants

 %% Cell type:code id: tags:

 ``` python
 def get_constants_and_update_metadata(cal_data, main_metadata, constants):
    try:
        metadata = cal_data.metadata(constants)
        for key, value in metadata.items():
            main_metadata.setdefault(key, {}).update(value)
    except CalCatError as e:  # TODO: replace when API errors are improved.
        warning(f"CalCatError: {e}")
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 # Instantiate agipd_cal with the read operating conditions.
 agipd_cal = AGIPD_CalibrationData(
    detector_name=karabo_id,
    modules=karabo_da,
    sensor_bias_voltage=bias_voltage,
    memory_cells=mem_cells,
    acquisition_rate=acq_rate,
    integration_time=integration_time,
    source_energy=9.2,
    gain_mode=gain_mode,
    gain_setting=gain_setting,
    event_at=creation_time,
    client=rest_cfg.calibration_client(),
-    caldb_root=Path(cal_db_root),
+    caldb_root=Path(cal_db_root) if cal_db_root else None,
 )

 # Prepare lists of expected calibrations
 dark_constants = ["Offset", "Noise", "BadPixelsDark"]
 if not gain_mode:  # Adaptive gain
    dark_constants.append("ThresholdsDark")

 agipd_metadata = agipd_cal.metadata(dark_constants)

 agipd_cal.gain_mode = None  # gain_mode is not used for gain constants
 pc_constants, ff_constants = [], []
 if any(agipd_corr.pc_bools):
    pc_constants = ["SlopesPC", "BadPixelsPC"]
    get_constants_and_update_metadata(
        agipd_cal, agipd_metadata, pc_constants)

 if agipd_corr.corr_bools.get('xray_corr'):
    ff_constants = list(agipd_cal.illuminated_calibrations)
    get_constants_and_update_metadata(
        agipd_cal, agipd_metadata, ff_constants)

 step_timer.done_step("Constants were retrieved in")

 print("Preparing constants ("
      f"FF: {agipd_corr.corr_bools.get('xray_corr', False)}, "
      f"PC: {any(agipd_corr.pc_bools)}, "
      f"BLC: {any(agipd_corr.blc_bools)})")
 # Display retrieved calibration constants timestamps
 agipd_cal.display_markdown_retrieved_constants(metadata=agipd_metadata)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Validate constants availability and exclude modules with no offsets.
 for da, calibrations in agipd_metadata.items():
    mod = modules[karabo_da.index(da)]
    # Constants to error out for when missing.
    error_missing_constants = {"Offset"}
    if not gain_mode:
        error_missing_constants |= {"ThresholdsDark"}

    error_missing_constants -= set(calibrations)
    if error_missing_constants:
        warning(f"Offset constant is not available to correct {da}.")
        # Remove module from files to process.
        del mapped_files[module_index_to_qm(mod)]
        karabo_da.remove(da)
        modules.remove(mod)

    warn_missing_constants = set(dark_constants + pc_constants + ff_constants)
    warn_missing_constants -= error_missing_constants
    warn_missing_constants -= set(calibrations)
    if warn_missing_constants:
        warning(f"Constants {warn_missing_constants} were not retrieved for {da}.")

 if not mapped_files:  # Offsets are missing for all modules.
    raise Exception("Could not find offset constants for any modules, will not correct data.")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Record constant details in YAML metadata
 write_constants_fragment(
    out_folder=(metadata_folder or out_folder),
    det_metadata=agipd_metadata,
    caldb_root=agipd_cal.caldb_root)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Load calibration constants to RAM
 agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))

 def load_constants(da, module):
    """
    Initialize constants data from previously retrieved metadata.

    Args:
        da (str): Data Aggregator (Karabo DA)
        module (int): Module index

    Returns:
        (int, dict, str): Module index, {constant name: creation time}, Karabo DA
    """
    const_data = dict()
    variant = dict()
    for cname, mdata in agipd_metadata[da].items():
        dataset = mdata["dataset"]
        with h5py.File(agipd_cal.caldb_root / mdata["path"], "r") as cf:  # noqa
            const_data[cname] = np.copy(cf[f"{dataset}/data"])
            variant[cname] = cf[dataset].attrs["variant"] if cf[dataset].attrs.keys() else 0  # noqa
    agipd_corr.init_constants(const_data, module, variant)


 step_timer.start()
 with multiprocessing.Pool(processes=len(modules)) as pool:
    pool.starmap(load_constants, zip(karabo_da, modules))
 step_timer.done_step(f'Constants were loaded in ')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Store timestamps for Offset, SlopesPC, and SlopesFF
 # in YAML file for time-summary table.
 timestamps = {}

 for mod, mod_mdata in agipd_metadata.items():
    modno = int(mod[-2:])

    module_timestamps = {}

    # Store few time stamps if exists
    # Add NA to keep array structure
    for key in ['Offset', 'SlopesPC', 'SlopesFF']:
        if key in mod_mdata:
            module_timestamps[key] = mod_mdata[key]["begin_validity_at"]
        else:
            module_timestamps[key] = "NA"

    timestamps[module_index_to_qm(modno)] = module_timestamps

 seq = sequences[0] if sequences else 0

 with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fd:
    yaml.safe_dump({"time-summary": {f"S{seq}": timestamps}}, fd)
 ```

 %% Cell type:markdown id: tags:

 ## Data processing ##

 %% Cell type:code id: tags:

 ``` python
 # allocate memory for images and hists
 n_images_max = mem_cells * 256
 data_shape = (n_images_max, 512, 128)
 agipd_corr.allocate_images(data_shape, n_cores_files)
 ```

 %% Cell type:code id: tags:

 ``` python
 def batches(l, batch_size):
    """Group a list into batches of (up to) batch_size elements"""
    start = 0
    while start < len(l):
        yield l[start:start + batch_size]
        start += batch_size
 ```

 %% Cell type:code id: tags:

 ``` python
 def imagewise_chunks(img_counts):
    """Break up the loaded data into chunks of up to chunk_size

    Yields (file data slot, start index, stop index)
    """


    for i_proc, n_img in enumerate(img_counts):
        n_chunks = math.ceil(n_img / chunk_size)
        for i in range(n_chunks):
            yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 if max_tasks_per_worker == -1:
    max_tasks_per_worker = None
 with multiprocessing.Pool(maxtasksperchild=max_tasks_per_worker) as pool:
    step_timer.done_step('Started pool')

    for file_batch in batches(file_list, n_cores_files):
        # TODO: Move some printed output to logging or similar
        print(f"Processing next {len(file_batch)} files")
        step_timer.start()
        img_counts = pool.starmap(
            agipd_corr.read_file,
            zip(range(len(file_batch)), file_batch, [not common_mode]*len(file_batch))
        )
        step_timer.done_step(f'Loading data from files')

        if img_counts == 0:
            # Skip any further processing and output if there are no images to
            # correct in this file.
            continue

        if mask_zero_std:
            # Evaluate zero-data-std mask
            pool.starmap(
                agipd_corr.mask_zero_std, itertools.product(
                    range(len(file_batch)),
                    np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
                )
            )
            step_timer.done_step('Mask 0 std')

        # Perform offset image-wise correction
        pool.starmap(agipd_corr.offset_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Offset correction")

        if blc_noise or blc_stripes or blc_hmatch:
            # Perform image-wise correction
            pool.starmap(agipd_corr.baseline_correction, imagewise_chunks(img_counts))
            step_timer.done_step("Base-line shift correction")

        if common_mode:
            # In common mode corrected is enabled.
            # Cell selection is only activated after common mode correction.
            # Perform cross-file correction parallel over asics
            image_files_idx = [i_proc for i_proc, n_img in enumerate(img_counts) if n_img > 0]
            pool.starmap(agipd_corr.cm_correction, itertools.product(
                image_files_idx, range(16)  # 16 ASICs per module
            ))
            step_timer.done_step("Common-mode correction")

            img_counts = pool.map(agipd_corr.apply_selected_pulses, image_files_idx)
            step_timer.done_step("Applying selected cells after common mode correction")

        # Perform image-wise correction"
        pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Gain corrections")

        # Save corrected data
        pool.starmap(agipd_corr.write_file, [
            (i_proc, file_name, str(out_folder / Path(file_name).name.replace("RAW", "CORR")))
            for i_proc, file_name in enumerate(file_batch)
        ])
        step_timer.done_step("Save")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Correction of {len(file_list)} files is finished")
 print(f"Total processing time {step_timer.timespan():.01f} s")
 print(f"Timing summary per batch of {n_cores_files} files:")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print('Skipping plots')
    import sys
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis, title=""):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),
              np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
                   norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_title(title)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 def get_trains_data(data_folder, source, include, detector_id, tid=None, modules=16, fillvalue=None, mod_starts_at=0):
    """Load single train for all module

    :param data_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param detector_id: The karabo id of the detector to get data for
    :param tid: Train Id to be loaded. First train is considered if None is given
    :param path: Path to find image data inside h5 file
    """
    try:
        run_data = RunDirectory(data_folder, include)
    except FileNotFoundError:
        warning(f'No corrected files for {include}. Skipping plots.')
        import sys
        sys.exit(0)
    if tid is not None:
        tid, data = run_data.select(
            f'{detector_id}/DET/*', source).train_from_id(tid, keep_dims=True)
    else:
        # A first full trainId for all available modules is of interest.
        tid, data = next(run_data.select(
            f'{detector_id}/DET/*', source).trains(require_all=True, keep_dims=True))

    stacked_data = stack_detector_data(
        train=data, data=source, fillvalue=fillvalue, modules=modules,
        starts_at=mod_starts_at,
    )

    return tid, stacked_data
 ```

 %% Cell type:code id: tags:

 ``` python
 if "AGIPD500K" in karabo_id:
    geom = AGIPD_500K2GGeometry.from_origin()
 elif "AGIPD1M" in karabo_id:
    geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
        (-525, 625),
        (-550, -10),
        (520, -160),
        (542.5, 475),
    ])
 else:  # single module AGIPD detector
    geom = agipd_single_module_geometry()
 ```

 %% Cell type:code id: tags:

 ``` python
 include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
 mod_starts_at = 0 if nmods > 1 else modules[0]  # TODO: use CALCAT metadata for the detector.
 tid, corrected = get_trains_data(out_folder, 'image.data', include, karabo_id, modules=nmods, mod_starts_at=mod_starts_at)

 _, gains = get_trains_data(out_folder, 'image.gain', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 _, mask = get_trains_data(out_folder, 'image.mask', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 _, blshift = get_trains_data(out_folder, 'image.blShift', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 _, cellId = get_trains_data(out_folder, 'image.cellId', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 _, pulseId = get_trains_data(out_folder, 'image.pulseId', include, karabo_id, tid, modules=nmods, fillvalue=0, mod_starts_at=mod_starts_at)
 _, raw = get_trains_data(run_folder, 'image.data', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Analogue Gain ###

 %% Cell type:code id: tags:

 ``` python
 raw_float = raw.astype(np.float32)
 signal = raw[:, 0, ...]
 gain = raw[:, 1, ...]
 hist, bins_x, bins_y = calgs.histogram2d(
    signal.flatten().astype(np.float32),
    gain.flatten().astype(np.float32),
    bins=(100, 100),
    range=[
        np.percentile(signal, [0.02, 99.8]),
        np.percentile(gain, [0.02, 99.8]),
        ],
    )
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Digitized Gain ###

 The following plot shows plots signal vs. digitized gain

 %% Cell type:code id: tags:

 ``` python
 vmin, vmax = np.nanmin(corrected), np.nanmax(corrected)
 hist, bins_x, bins_y = calgs.histogram2d(
    corrected.flatten().astype(np.float32),
    gains.flatten().astype(np.float32), bins=(100, 3),
    range=[
        # The range boundaries and decided by DET expert.
        [max(vmin, -50), min(vmax, 8192)],
        [0, 3]
        ],
    )
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Gain statistics in %")
 table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
          f'{gains[gains==1].size/gains.size*100:.03f}',
          f'{gains[gains==2].size/gains.size*100:.03f}']]
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["High", "Medium", "Low"])))
 ```

 %% Cell type:markdown id: tags:

 ### Intensity per Pulse ###

 %% Cell type:code id: tags:

 ``` python
 pulse_range = [np.min(pulseId[pulseId>=0]), np.max(pulseId[pulseId>=0])]


 def clamp(value, min_value, max_value):
    return max(min_value, min(value, max_value))


 # Modify pulse_range, if only one pulse is selected.
 if pulse_range[0] == pulse_range[1]:
    pulse_range = [0, pulse_range[1]+int(acq_rate)]

 mean_data = np.nanmean(corrected, axis=(2, 3))
 vmin, vmax = mean_data.min(), mean_data.max()
 hist, bins_x, bins_y = calgs.histogram2d(
    mean_data.flatten().astype(np.float32),
    pulseId.flatten().astype(np.float32),
    bins=(100, int(pulse_range[1])),
    range=[[clamp(vmin, -50, -0.2), min(vmax, 1000)], pulse_range],
 )
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id", title="Signal-Pulse ID")

 if vmax > 1000:  # a zoom out plot.
    hist, bins_x, bins_y = calgs.histogram2d(
        mean_data.flatten().astype(np.float32),
        pulseId.flatten().astype(np.float32),
        bins=(100,  int(pulse_range[1])),
        range=[[clamp(vmin, -50, -0.2), min(vmax, 20000)], pulse_range]
    )
    do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id", title="Signal-Pulse ID (Extended View)")
 ```

 %% Cell type:markdown id: tags:

 ### Baseline shift ###

 Estimated base-line shift with respect to the total ADU counts of corrected image.

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(blshift.flatten(), bins=100, log=True)
 _ = plt.xlabel('Baseline shift [ADU]')
 _ = plt.ylabel('Counts')
 _ = ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 10))
 corrected_ave = np.nansum(corrected, axis=(2, 3))
 plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)
 plt.xlim(np.nanpercentile(corrected_ave/10**6, [2, 98]))
 plt.grid()
 plt.xlabel('Illuminated corrected [MADU] ')
 _ = plt.ylabel('Estimated baseline shift [ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
 if cell_id_preview not in cellId[:, 0]:
    print(f"WARNING: The selected cell_id_preview value {cell_id_preview} is not available in the corrected data.")
    cell_id_preview = cellId[:, 0][0]
    cell_idx_preview = 0
    print(f"Previewing the first available cellId: {cell_id_preview}.")
 else:
    cell_idx_preview = np.where(cellId[:, 0] == cell_id_preview)[0][0]
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Raw preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown(f'Mean over images of the RAW data\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(raw[slice(*cell_sel.crange), 0, ...], axis=0)
    vmin, vmax = np.percentile(data, [5, 95])
    ax = geom.plot_data_fast(data, ax=ax, vmin=vmin, vmax=vmax, cmap=cmap)
    pass
 else:
    print("Skipping mean RAW preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'Single shot of the RAW data from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = np.percentile(raw[cell_idx_preview, 0, ...], [5, 95])
 ax = geom.plot_data_fast(
    raw[cell_idx_preview, 0, ...], ax=ax, vmin=vmin, vmax=vmax, cmap=cmap)
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Corrected preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown('### Mean CORRECTED Preview ###\n'))
    display(Markdown(f'A mean across train: {tid}\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(corrected, axis=0)
    vmax = np.nanpercentile(data, 99.8)
    ax = geom.plot_data_fast(data, ax=ax, vmin=max(-50, np.nanmin(data)), vmax=vmax, cmap=cmap)
    pass
 else:
    print("Skipping mean CORRECTED preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'A single shot of the CORRECTED image from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmax = np.nanpercentile(corrected[cell_idx_preview], 99.8)
 ax = geom.plot_data_fast(
    corrected[cell_idx_preview],
    ax=ax,
    vmin=max(-50, np.nanmin(corrected[cell_idx_preview])),
    vmax=vmax,
    cmap=cmap,
 )
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_idx_preview], 5, -50)
 nbins = int((vmax + 50) / 2)
 h = ax.hist(corrected[cell_idx_preview].flatten(),
            bins=nbins, range=(-50, vmax),
            histtype='stepfilled', log=True)
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ax.grid()
 plt.title(f'Log-scaled histogram for corrected data for cell {cell_idx_preview}')
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected, 10, -100)
 vmax = np.nanmax(corrected)
 if vmax > 50000:
    vmax = 50000
 nbins = int((vmax + 100) / 5)
 h = ax.hist(corrected.flatten(), bins=nbins,
            range=(-100, vmax), histtype='step', log=True, label = 'All')
 ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='High gain', color='green')
 ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Medium gain', color='red')
 ax.hist(corrected[gains == 2].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Low gain', color='yellow')
 ax.legend()
 ax.grid()
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 plt.title(f'Overlaid Histograms for corrected data for multiple gains')
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Maximum GAIN Preview ###\n'))
 display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(
    np.max(gains, axis=0), ax=ax,
    cmap=cmap, vmin=-0.3, vmax=2.3)  # Extend cmap for wrong gain values.
 pass
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'### Single Shot Bad Pixels ### \n'))
 display(Markdown(f'A single shot bad pixel map from cell {cell_id_preview} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(
    np.log2(mask[cell_idx_preview]), ax=ax, vmin=0, vmax=32, cmap=cmap)
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 if round_photons:
    display(Markdown('### Photonization histograms ###'))

    x_preround = (agipd_corr.hist_bins_preround[1:] + agipd_corr.hist_bins_preround[:-1]) / 2
    x_postround = (agipd_corr.hist_bins_postround[1:] + agipd_corr.hist_bins_postround[:-1]) / 2
    x_photons = np.arange(0, (x_postround[-1] + 1) / photon_energy)

    fig, ax = plt.subplots(ncols=1, nrows=1, clear=True)
    ax.plot(x_preround, agipd_corr.shared_hist_preround, '.-', color='C0')
    ax.bar(x_postround, agipd_corr.shared_hist_postround, photon_energy, color='C1', alpha=0.5)
    ax.set_yscale('log')
    ax.set_ylim(0, max(agipd_corr.shared_hist_preround.max(), agipd_corr.shared_hist_postround.max())*3)
    ax.set_xlim(x_postround[0], x_postround[-1]+1)
    ax.set_xlabel('Photon energy / keV')
    ax.set_ylabel('Intensity')
    ax.vlines(x_photons * photon_energy, *ax.get_ylim(), color='k', linestyle='dashed')

    phx = ax.twiny()
    phx.set_xlim(x_postround[0] / photon_energy, (x_postround[-1]+1)/photon_energy)
    phx.set_xticks(x_photons)
    phx.set_xlabel('# Photons')
    pass
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train  ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(
    np.mean(mask>0, axis=0), vmin=0, ax=ax, vmax=1, cmap=cmap)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train. Only Dark Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 cm = np.copy(mask)
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 ax = geom.plot_data_fast(
    np.mean(cm>0, axis=0), vmin=0, ax=ax, vmax=1, cmap=cmap)
 pass
 ```

 %% Cell type:markdown id: tags:

 # AGIPD Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/MID/202201/p002834/raw" # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/esobolev/pycal_litfrm/p002834/r0225"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 overwrite = False  # IGNORED, NEEDED FOR COMPATIBILITY.
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 train_ids = [-1] # train IDs to correct, set to -1 for all, range allowed
 run = 225 # runs to process, required

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_template = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 index_source_template = 'INDEX/{}/DET/{}:xtdf/'  # path in the HDF5 file to images
 ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information
 karabo_id_control = "MID_EXP_AGIPD1M1" # karabo-id for control device

 slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants, loaded in precorrection notebook

 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
 cal_db_interface = "tcp://max-exfl-cal001:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milliseconds
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
-cal_db_root = '/gpfs/exfel/d/cal/caldb_store'  # The calibration database root path to access constant files. For example accessing constants from the test database.
+cal_db_root = ""  # The calibration database root path to access constant files. e.g. accessing constants from the test database /gpfs/exfel/d/cal_tst/caldb_store.

 mem_cells = -1  # Number of memory cells used, set to 0 to automatically infer
 bias_voltage = -1  # bias voltage, set to 0 to use stored value in slow data.
 acq_rate = -1. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)
 max_pulses = [0, 352, 1] # range list [st, end, step] of memory cell indices to be processed within a train. 3 allowed maximum list input elements.
 mem_cells_db = -1  # set to a value different than 0 to use this value for DB queries
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction parameters
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
 cm_dark_fraction = 0.66 # threshold for fraction of  empty pixels to consider module enough dark to perform CM correction
 cm_dark_range = [-50.,30] # range for signal value ADU for pixel to be consider as a dark pixel
 cm_n_itr = 4 # number of iterations for common mode correction
 hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
 mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
 noisy_adc_threshold = 0.25 # threshold to mask complete adc
 ff_gain = 7.2 # conversion gain for absolute FlatField constants, while applying xray_gain
 photon_energy = -1.0 # photon energy in keV, non-positive value for XGM autodetection
 rounding_threshold = 0.5 # the fraction to round to down, 0.5 for standard rounding rule

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 zero_nans = False # set NaN values in corrected data to 0
 zero_orange = False # set to 0 very negative and very large values in corrected data
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
 corr_asic_diag = False # if set, diagonal drop offs on ASICs are corrected
 force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold
 force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold
 mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
 common_mode = False # Common mode correction
 melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
 mask_zero_std = False # Mask pixels with zero standard deviation across train
 low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain
 round_photons = False  # Round to absolute number of photons, only use with gain corrections

 # Optional auxiliary devices
 use_ppu_device = ''  # Device ID for a pulse picker device to only process picked trains, empty string to disable
 ppu_train_offset = 0  # When using the pulse picker, offset between the PPU's sequence start and actually picked train
 require_ppu_trigger = False  # Optional protection against running without PPU or without triggering trains.

 use_litframe_finder = 'off' # Process only illuminated frames: 'off' - disable, 'device' - use online device data, 'offline' - use offline algorithm, 'auto' - choose online/offline source automatically (default)
 litframe_device_id = '' # Device ID for a lit frame finder device, empty string to auto detection
 energy_threshold = -1000 # The low limit for the energy (uJ) exposed by frames subject to processing. If -1000, selection by pulse energy is disabled
 use_super_selection = 'cm' # Make a common selection for entire run: 'off' - disable, 'final' - enable for final selection, 'cm' - enable only for common mode correction

 use_xgm_device = ''  # DoocsXGM device ID to obtain actual photon energy, operating condition else.

 # Output parameters
 recast_image_data = ''  # Cast data to a different dtype before saving
 compress_fields = ['gain', 'mask']  # Datasets in image group to compress.

 # Plotting parameters
 skip_plots = False # exit after writing corrected files and metadata
 cell_id_preview = 1 # cell Id used for preview in single-shot plots
 cmap = "viridis"  # matplolib.colormap for almost all heatmap. Other options ['plasma', 'inferno', 'magma', 'cividis', 'jet', ...]

 # Parallelization parameters
 chunk_size = 1000  # Size of chunk for image-wise correction
 n_cores_correct = 16 # Number of chunks to be processed in parallel
 n_cores_files = 4 # Number of files to be processed in parallel
 sequences_per_node = 2 # number of sequence files per cluster node if run as SLURM job, set to 0 to not run SLURM parallel
 max_nodes = 8 # Maximum number of SLURM jobs to split correction work into
 max_tasks_per_worker = 1  # the number of tasks a correction pool worker process can complete before it will exit and be replaced with a fresh worker process. Leave as -1 to keep worker alive as long as pool.


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import math
 import multiprocessing
 import os
 import warnings
 from datetime import timedelta
 from logging import warning
 from pathlib import Path

 import tabulate
 from dateutil import parser
 from IPython.display import Latex, Markdown, display

 warnings.filterwarnings('ignore')
 import h5py
 import matplotlib
 import matplotlib.pyplot as plt
 import yaml
 from extra_data import by_id, RunDirectory, stack_detector_data
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from matplotlib.colors import LogNorm

 matplotlib.use("agg")
 %matplotlib inline
 import numpy as np
 import seaborn as sns

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")

 import cal_tools.restful_config as rest_cfg
 from cal_tools import agipdalgs as calgs
 from cal_tools.agipdlib import (
    AgipdCorrections,
    AgipdCtrl,
    CellRange,
    LitFrameSelection,
 )
 from cal_tools.ana_tools import get_range
 from cal_tools.calcat_interface import (
    AGIPD_CalibrationData,
    CalCatError,
 )
 from cal_tools.enums import AgipdGainMode, BadPixels
 from cal_tools.plotting import agipd_single_module_geometry
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    calcat_creation_time,
    map_modules_from_folder,
    module_index_to_qm,
    write_constants_fragment,
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_folder = in_folder / f'r{run:04d}'

 step_timer = StepTimer()
 ```

 %% Cell type:markdown id: tags:

 ## Evaluated parameters ##

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the hierarchy and dependability for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["zero_nans"] = zero_nans
    corr_bools["zero_orange"] = zero_orange
    corr_bools["mask_noisy_adc"] = mask_noisy_adc
    corr_bools["force_hg_if_below"] = force_hg_if_below
    corr_bools["force_mg_if_below"] = force_mg_if_below
    corr_bools["common_mode"] = common_mode
    corr_bools["melt_snow"] = melt_snow
    corr_bools["mask_zero_std"] = mask_zero_std
    corr_bools["low_medium_gap"] = low_medium_gap
    corr_bools["round_photons"] = round_photons

 # Many corrections don't apply to fixed gain mode; will explicitly disable later if detected
 disable_for_fixed_gain = [
    "adjust_mg_baseline",
    "blc_set_min",
    "force_hg_if_below",
    "force_mg_if_below",
    "low_medium_gap",
    "melt_snow",
    "rel_gain"
 ]
 ```

 %% Cell type:code id: tags:

 ``` python
 if sequences == [-1]:
    sequences = None

 dc = RunDirectory(run_folder)

 ctrl_src = ctrl_source_template.format(karabo_id_control)
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 index_src = index_source_template.format(karabo_id, receiver_template)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create output folder
 out_folder.mkdir(parents=True, exist_ok=True)

 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if "AGIPD1M" in karabo_id:
    nmods = 16
 elif "AGIPD500K" in karabo_id:
    nmods = 8
 else:
    nmods = 1

 # Evaluate requested modules
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    mod_indices = modules if nmods > 1 else [0]
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:  # TODO: fix this with the new CALCAT metadata for module indices.
    modules = [int(x[-2:]) for x in karabo_da]
    mod_indices = modules if nmods > 1 else [0]

 print("Process modules:", ', '.join(module_index_to_qm(x) for x in mod_indices))
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_ppu_device and use_ppu_device in dc.control_sources:
    # Obtain trains to process if using a pulse picker device and it's present.

    seq_start = dc[use_ppu_device, 'trainTrigger.sequenceStart.value'].ndarray()

    # The trains picked are the unique values of trainTrigger.sequenceStart
    # minus the first (previous trigger before this run).
    start_train_ids = np.unique(seq_start)[1:] + ppu_train_offset

    train_ids = []
    for train_id in start_train_ids:
        n_trains = dc[
            use_ppu_device, 'trainTrigger.numberOfTrains'
        ].select_trains(by_id[[train_id]]).ndarray()[0]
        train_ids.extend(list(range(train_id, train_id + n_trains)))

    if train_ids:
        print(f'PPU device {use_ppu_device} triggered for {len(train_ids)} train(s)')
    elif require_ppu_trigger:
        raise RuntimeError(f'PPU device {use_ppu_device} not triggered but required, aborting!')
    else:
        print(f'PPU device {use_ppu_device} not triggered, processing all valid trains')
        train_ids = None

 elif use_ppu_device:
    # PPU configured but not present.

    if require_ppu_trigger:
        raise RuntimeError(f'PPU device {use_ppu_device} required but not found, aborting!')
    else:
        print(f'PPU device {use_ppu_device} configured but not found, processing all valid trains')
        train_ids = None

 elif train_ids != [-1]:
    # Specific trains passed by parameter, convert to ndarray.
    train_ids = np.array(train_ids)

    print(f'Processing up to {len(train_ids)} manually selected train(s)')

 else:
    # No PPU configured.
    print(f'Processing all valid trains')
    train_ids = None
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mapped_files, _, total_sequences, _, _ =  map_modules_from_folder(
    str(in_folder), run, path_template, karabo_da, sequences
 )
 file_list = []

 # ToDo: Split table over pages
 print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
 table = []
 ti = 0
 for k, files in mapped_files.items():
    i = 0
    for f in list(files.queue):
        file_list.append(f)
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["#", "module", "# module", "file"])))
 file_list = sorted(file_list, key=lambda name: name[-10:])
 ```

 %% Cell type:code id: tags:

 ``` python
 first_mod_channel = sorted(modules)[0]

 instrument_src_mod = [
    s for s in list(dc.all_sources) if f"{first_mod_channel}CH" in s][0]

 agipd_cond = AgipdCtrl(
    run_dc=dc,
    image_src=instrument_src_mod,
    ctrl_src=ctrl_src,
    raise_error=False,  # to be able to process very old data without gain_setting value
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 offset = parser.parse(creation_date_offset)
 delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
 creation_time += delta
 print(f"Creation time: {creation_time}")

 if acq_rate == -1.:
    acq_rate = agipd_cond.get_acq_rate()
 if mem_cells == -1:
    mem_cells = agipd_cond.get_num_cells()
 # TODO: look for alternative for passing creation_time
 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == -1:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()
 if gain_mode == -1:
    gain_mode = agipd_cond.get_gain_mode()
 else:
    gain_mode = AgipdGainMode(gain_mode)
 ```

 %% Cell type:code id: tags:

 ``` python
 if mem_cells is None:
    raise ValueError(f"No raw images found for {instrument_src_mod}")

 mem_cells_db = mem_cells if mem_cells_db == -1 else mem_cells_db

 print(f"Maximum memory cells to calibrate: {mem_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Using {creation_time} as creation time")
 print("Operating conditions are:")
 print(f"• Bias voltage: {bias_voltage}")
 print(f"• Memory cells: {mem_cells_db}")
 print(f"• Acquisition rate: {acq_rate}")
 print(f"• Gain setting: {gain_setting}")
 print(f"• Gain mode: {gain_mode.name}")
 print(f"• Integration time: {integration_time}")
 print(f"• Photon Energy: 9.2")
 ```

 %% Cell type:code id: tags:

 ``` python
 if gain_mode:
    for to_disable in disable_for_fixed_gain:
        if corr_bools.get(to_disable, False):
            warning(f"{to_disable} correction was requested, but does not apply to fixed gain mode")
            corr_bools[to_disable] = False
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_litframe_finder != 'off':
    from extra_redu import make_litframe_finder, LitFrameFinderError

    if use_litframe_finder not in ['auto', 'offline', 'online']:
        raise ValueError("Unexpected value in 'use_litframe_finder'.")

    inst = karabo_id_control[:3]
    litfrm = make_litframe_finder(inst, dc, litframe_device_id)
    try:
        get_data = {'auto': litfrm.read_or_process, 'offline': litfrm.process, 'online': litfrm.read}
        r = get_data[use_litframe_finder]()
        cell_sel = LitFrameSelection(r, train_ids, max_pulses, energy_threshold, use_super_selection)
        cell_sel.print_report()
    except LitFrameFinderError as err:
        warning(f"Cannot use AgipdLitFrameFinder due to:\n{err}")
        cell_sel = CellRange(max_pulses, max_cells=mem_cells)
 else:
    # Use range selection
    cell_sel = CellRange(max_pulses, max_cells=mem_cells)

 print(cell_sel.msg())
 ```

 %% Cell type:code id: tags:

 ``` python
 if round_photons and photon_energy <= 0.0:
    if use_xgm_device:
        # Try to obtain photon energy from XGM device.
        wavelength_data = dc[use_xgm_device, 'pulseEnergy.wavelengthUsed']

        try:
            from scipy.constants import h, c, e

            # Read wavelength as a single value and convert to hv.
            photon_energy = (h * c / e) / (wavelength_data.as_single_value(rtol=1e-2) * 1e-6)
            print(f'Obtained photon energy {photon_energy:.3f} keV from {use_xgm_device}')
        except ValueError:
            warning('XGM source available but photon energy varies greater than 1%, '
                 'photon rounding disabled!')
            round_photons = False
    else:
        warning('Neither explicit photon energy nor XGM device configured, photon rounding disabled!')
        round_photons = False
 elif round_photons:
    print(f'Photon energy for rounding: {photon_energy:.3f} keV')

 if round_photons and (rounding_threshold <= .0 or 1. <= rounding_threshold):
    warning('Round threshould is out of (0, 1) range. Use standard 0.5 value.')
    rounding_threshold = 0.5
 ```

 %% Cell type:code id: tags:

 ``` python
 agipd_corr = AgipdCorrections(
    mem_cells,
    cell_sel,
    h5_data_path=instrument_src,
    h5_index_path=index_src,
    corr_bools=corr_bools,
    gain_mode=gain_mode,
    comp_threads=os.cpu_count() // n_cores_files,
    train_ids=train_ids
 )

 agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
 agipd_corr.hg_hard_threshold = hg_hard_threshold
 agipd_corr.mg_hard_threshold = mg_hard_threshold

 agipd_corr.cm_dark_min = cm_dark_range[0]
 agipd_corr.cm_dark_max = cm_dark_range[1]
 agipd_corr.cm_dark_fraction = cm_dark_fraction
 agipd_corr.cm_n_itr = cm_n_itr
 agipd_corr.noisy_adc_threshold = noisy_adc_threshold
 agipd_corr.ff_gain = ff_gain
 agipd_corr.photon_energy = photon_energy
 agipd_corr.rounding_threshold = rounding_threshold

 agipd_corr.compress_fields = compress_fields
 if recast_image_data:
    agipd_corr.recast_image_fields['data'] = np.dtype(recast_image_data)
 ```

 %% Cell type:markdown id: tags:

 ## Retrieving constants

 %% Cell type:code id: tags:

 ``` python
 def get_constants_and_update_metadata(cal_data, main_metadata, constants):
    try:
        metadata = cal_data.metadata(constants)
        for key, value in metadata.items():
            main_metadata.setdefault(key, {}).update(value)
    except CalCatError as e:  # TODO: replace when API errors are improved.
        warning(f"CalCatError: {e}")
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 # Instantiate agipd_cal with the read operating conditions.
 agipd_cal = AGIPD_CalibrationData(
    detector_name=karabo_id,
    modules=karabo_da,
    sensor_bias_voltage=bias_voltage,
    memory_cells=mem_cells,
    acquisition_rate=acq_rate,
    integration_time=integration_time,
    source_energy=9.2,
    gain_mode=gain_mode,
    gain_setting=gain_setting,
    event_at=creation_time,
    client=rest_cfg.calibration_client(),
-    caldb_root=Path(cal_db_root),
+    caldb_root=Path(cal_db_root) if cal_db_root else None,
 )

 # Prepare lists of expected calibrations
 dark_constants = ["Offset", "Noise", "BadPixelsDark"]
 if not gain_mode:  # Adaptive gain
    dark_constants.append("ThresholdsDark")

 agipd_metadata = agipd_cal.metadata(dark_constants)

 agipd_cal.gain_mode = None  # gain_mode is not used for gain constants
 pc_constants, ff_constants = [], []
 if any(agipd_corr.pc_bools):
    pc_constants = ["SlopesPC", "BadPixelsPC"]
    get_constants_and_update_metadata(
        agipd_cal, agipd_metadata, pc_constants)

 if agipd_corr.corr_bools.get('xray_corr'):
    ff_constants = list(agipd_cal.illuminated_calibrations)
    get_constants_and_update_metadata(
        agipd_cal, agipd_metadata, ff_constants)

 step_timer.done_step("Constants were retrieved in")

 print("Preparing constants ("
      f"FF: {agipd_corr.corr_bools.get('xray_corr', False)}, "
      f"PC: {any(agipd_corr.pc_bools)}, "
      f"BLC: {any(agipd_corr.blc_bools)})")
 # Display retrieved calibration constants timestamps
 agipd_cal.display_markdown_retrieved_constants(metadata=agipd_metadata)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Validate constants availability and exclude modules with no offsets.
 for da, calibrations in agipd_metadata.items():
    mod = modules[karabo_da.index(da)]
    # Constants to error out for when missing.
    error_missing_constants = {"Offset"}
    if not gain_mode:
        error_missing_constants |= {"ThresholdsDark"}

    error_missing_constants -= set(calibrations)
    if error_missing_constants:
        warning(f"Offset constant is not available to correct {da}.")
        # Remove module from files to process.
        del mapped_files[module_index_to_qm(mod)]
        karabo_da.remove(da)
        modules.remove(mod)

    warn_missing_constants = set(dark_constants + pc_constants + ff_constants)
    warn_missing_constants -= error_missing_constants
    warn_missing_constants -= set(calibrations)
    if warn_missing_constants:
        warning(f"Constants {warn_missing_constants} were not retrieved for {da}.")

 if not mapped_files:  # Offsets are missing for all modules.
    raise Exception("Could not find offset constants for any modules, will not correct data.")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Record constant details in YAML metadata
 write_constants_fragment(
    out_folder=(metadata_folder or out_folder),
    det_metadata=agipd_metadata,
    caldb_root=agipd_cal.caldb_root)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Load calibration constants to RAM
 agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))

 def load_constants(da, module):
    """
    Initialize constants data from previously retrieved metadata.

    Args:
        da (str): Data Aggregator (Karabo DA)
        module (int): Module index

    Returns:
        (int, dict, str): Module index, {constant name: creation time}, Karabo DA
    """
    const_data = dict()
    variant = dict()
    for cname, mdata in agipd_metadata[da].items():
        dataset = mdata["dataset"]
        with h5py.File(agipd_cal.caldb_root / mdata["path"], "r") as cf:  # noqa
            const_data[cname] = np.copy(cf[f"{dataset}/data"])
            variant[cname] = cf[dataset].attrs["variant"] if cf[dataset].attrs.keys() else 0  # noqa
    agipd_corr.init_constants(const_data, module, variant)


 step_timer.start()
 with multiprocessing.Pool(processes=len(modules)) as pool:
    pool.starmap(load_constants, zip(karabo_da, modules))
 step_timer.done_step(f'Constants were loaded in ')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Store timestamps for Offset, SlopesPC, and SlopesFF
 # in YAML file for time-summary table.
 timestamps = {}

 for mod, mod_mdata in agipd_metadata.items():
    modno = int(mod[-2:])

    module_timestamps = {}

    # Store few time stamps if exists
    # Add NA to keep array structure
    for key in ['Offset', 'SlopesPC', 'SlopesFF']:
        if key in mod_mdata:
            module_timestamps[key] = mod_mdata[key]["begin_validity_at"]
        else:
            module_timestamps[key] = "NA"

    timestamps[module_index_to_qm(modno)] = module_timestamps

 seq = sequences[0] if sequences else 0

 with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fd:
    yaml.safe_dump({"time-summary": {f"S{seq}": timestamps}}, fd)
 ```

 %% Cell type:markdown id: tags:

 ## Data processing ##

 %% Cell type:code id: tags:

 ``` python
 # allocate memory for images and hists
 n_images_max = mem_cells * 256
 data_shape = (n_images_max, 512, 128)
 agipd_corr.allocate_images(data_shape, n_cores_files)
 ```

 %% Cell type:code id: tags:

 ``` python
 def batches(l, batch_size):
    """Group a list into batches of (up to) batch_size elements"""
    start = 0
    while start < len(l):
        yield l[start:start + batch_size]
        start += batch_size
 ```

 %% Cell type:code id: tags:

 ``` python
 def imagewise_chunks(img_counts):
    """Break up the loaded data into chunks of up to chunk_size

    Yields (file data slot, start index, stop index)
    """


    for i_proc, n_img in enumerate(img_counts):
        n_chunks = math.ceil(n_img / chunk_size)
        for i in range(n_chunks):
            yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 if max_tasks_per_worker == -1:
    max_tasks_per_worker = None
 with multiprocessing.Pool(maxtasksperchild=max_tasks_per_worker) as pool:
    step_timer.done_step('Started pool')

    for file_batch in batches(file_list, n_cores_files):
        # TODO: Move some printed output to logging or similar
        print(f"Processing next {len(file_batch)} files")
        step_timer.start()
        img_counts = pool.starmap(
            agipd_corr.read_file,
            zip(range(len(file_batch)), file_batch, [not common_mode]*len(file_batch))
        )
        step_timer.done_step(f'Loading data from files')

        if img_counts == 0:
            # Skip any further processing and output if there are no images to
            # correct in this file.
            continue

        if mask_zero_std:
            # Evaluate zero-data-std mask
            pool.starmap(
                agipd_corr.mask_zero_std, itertools.product(
                    range(len(file_batch)),
                    np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
                )
            )
            step_timer.done_step('Mask 0 std')

        # Perform offset image-wise correction
        pool.starmap(agipd_corr.offset_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Offset correction")

        if blc_noise or blc_stripes or blc_hmatch:
            # Perform image-wise correction
            pool.starmap(agipd_corr.baseline_correction, imagewise_chunks(img_counts))
            step_timer.done_step("Base-line shift correction")

        if common_mode:
            # In common mode corrected is enabled.
            # Cell selection is only activated after common mode correction.
            # Perform cross-file correction parallel over asics
            image_files_idx = [i_proc for i_proc, n_img in enumerate(img_counts) if n_img > 0]
            pool.starmap(agipd_corr.cm_correction, itertools.product(
                image_files_idx, range(16)  # 16 ASICs per module
            ))
            step_timer.done_step("Common-mode correction")

            img_counts = pool.map(agipd_corr.apply_selected_pulses, image_files_idx)
            step_timer.done_step("Applying selected cells after common mode correction")

        # Perform image-wise correction"
        pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Gain corrections")

        # Save corrected data
        pool.starmap(agipd_corr.write_file, [
            (i_proc, file_name, str(out_folder / Path(file_name).name.replace("RAW", "CORR")))
            for i_proc, file_name in enumerate(file_batch)
        ])
        step_timer.done_step("Save")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Correction of {len(file_list)} files is finished")
 print(f"Total processing time {step_timer.timespan():.01f} s")
 print(f"Timing summary per batch of {n_cores_files} files:")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print('Skipping plots')
    import sys
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis, title=""):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),
              np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
                   norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_title(title)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 def get_trains_data(data_folder, source, include, detector_id, tid=None, modules=16, fillvalue=None, mod_starts_at=0):
    """Load single train for all module

    :param data_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param detector_id: The karabo id of the detector to get data for
    :param tid: Train Id to be loaded. First train is considered if None is given
    :param path: Path to find image data inside h5 file
    """
    try:
        run_data = RunDirectory(data_folder, include)
    except FileNotFoundError:
        warning(f'No corrected files for {include}. Skipping plots.')
        import sys
        sys.exit(0)
    if tid is not None:
        tid, data = run_data.select(
            f'{detector_id}/DET/*', source).train_from_id(tid, keep_dims=True)
    else:
        # A first full trainId for all available modules is of interest.
        tid, data = next(run_data.select(
            f'{detector_id}/DET/*', source).trains(require_all=True, keep_dims=True))

    stacked_data = stack_detector_data(
        train=data, data=source, fillvalue=fillvalue, modules=modules,
        starts_at=mod_starts_at,
    )

    return tid, stacked_data
 ```

 %% Cell type:code id: tags:

 ``` python
 if "AGIPD500K" in karabo_id:
    geom = AGIPD_500K2GGeometry.from_origin()
 elif "AGIPD1M" in karabo_id:
    geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
        (-525, 625),
        (-550, -10),
        (520, -160),
        (542.5, 475),
    ])
 else:  # single module AGIPD detector
    geom = agipd_single_module_geometry()
 ```

 %% Cell type:code id: tags:

 ``` python
 include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
 mod_starts_at = 0 if nmods > 1 else modules[0]  # TODO: use CALCAT metadata for the detector.
 tid, corrected = get_trains_data(out_folder, 'image.data', include, karabo_id, modules=nmods, mod_starts_at=mod_starts_at)

 _, gains = get_trains_data(out_folder, 'image.gain', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 _, mask = get_trains_data(out_folder, 'image.mask', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 _, blshift = get_trains_data(out_folder, 'image.blShift', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 _, cellId = get_trains_data(out_folder, 'image.cellId', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 _, pulseId = get_trains_data(out_folder, 'image.pulseId', include, karabo_id, tid, modules=nmods, fillvalue=0, mod_starts_at=mod_starts_at)
 _, raw = get_trains_data(run_folder, 'image.data', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Analogue Gain ###

 %% Cell type:code id: tags:

 ``` python
 raw_float = raw.astype(np.float32)
 signal = raw[:, 0, ...]
 gain = raw[:, 1, ...]
 hist, bins_x, bins_y = calgs.histogram2d(
    signal.flatten().astype(np.float32),
    gain.flatten().astype(np.float32),
    bins=(100, 100),
    range=[
        np.percentile(signal, [0.02, 99.8]),
        np.percentile(gain, [0.02, 99.8]),
        ],
    )
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Digitized Gain ###

 The following plot shows plots signal vs. digitized gain

 %% Cell type:code id: tags:

 ``` python
 vmin, vmax = np.nanmin(corrected), np.nanmax(corrected)
 hist, bins_x, bins_y = calgs.histogram2d(
    corrected.flatten().astype(np.float32),
    gains.flatten().astype(np.float32), bins=(100, 3),
    range=[
        # The range boundaries and decided by DET expert.
        [max(vmin, -50), min(vmax, 8192)],
        [0, 3]
        ],
    )
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Gain statistics in %")
 table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
          f'{gains[gains==1].size/gains.size*100:.03f}',
          f'{gains[gains==2].size/gains.size*100:.03f}']]
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["High", "Medium", "Low"])))
 ```

 %% Cell type:markdown id: tags:

 ### Intensity per Pulse ###

 %% Cell type:code id: tags:

 ``` python
 pulse_range = [np.min(pulseId[pulseId>=0]), np.max(pulseId[pulseId>=0])]


 def clamp(value, min_value, max_value):
    return max(min_value, min(value, max_value))


 # Modify pulse_range, if only one pulse is selected.
 if pulse_range[0] == pulse_range[1]:
    pulse_range = [0, pulse_range[1]+int(acq_rate)]

 mean_data = np.nanmean(corrected, axis=(2, 3))
 vmin, vmax = mean_data.min(), mean_data.max()
 hist, bins_x, bins_y = calgs.histogram2d(
    mean_data.flatten().astype(np.float32),
    pulseId.flatten().astype(np.float32),
    bins=(100, int(pulse_range[1])),
    range=[[clamp(vmin, -50, -0.2), min(vmax, 1000)], pulse_range],
 )
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id", title="Signal-Pulse ID")

 if vmax > 1000:  # a zoom out plot.
    hist, bins_x, bins_y = calgs.histogram2d(
        mean_data.flatten().astype(np.float32),
        pulseId.flatten().astype(np.float32),
        bins=(100,  int(pulse_range[1])),
        range=[[clamp(vmin, -50, -0.2), min(vmax, 20000)], pulse_range]
    )
    do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id", title="Signal-Pulse ID (Extended View)")
 ```

 %% Cell type:markdown id: tags:

 ### Baseline shift ###

 Estimated base-line shift with respect to the total ADU counts of corrected image.

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(blshift.flatten(), bins=100, log=True)
 _ = plt.xlabel('Baseline shift [ADU]')
 _ = plt.ylabel('Counts')
 _ = ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 10))
 corrected_ave = np.nansum(corrected, axis=(2, 3))
 plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)
 plt.xlim(np.nanpercentile(corrected_ave/10**6, [2, 98]))
 plt.grid()
 plt.xlabel('Illuminated corrected [MADU] ')
 _ = plt.ylabel('Estimated baseline shift [ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
 if cell_id_preview not in cellId[:, 0]:
    print(f"WARNING: The selected cell_id_preview value {cell_id_preview} is not available in the corrected data.")
    cell_id_preview = cellId[:, 0][0]
    cell_idx_preview = 0
    print(f"Previewing the first available cellId: {cell_id_preview}.")
 else:
    cell_idx_preview = np.where(cellId[:, 0] == cell_id_preview)[0][0]
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Raw preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown(f'Mean over images of the RAW data\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(raw[slice(*cell_sel.crange), 0, ...], axis=0)
    vmin, vmax = np.percentile(data, [5, 95])
    ax = geom.plot_data_fast(data, ax=ax, vmin=vmin, vmax=vmax, cmap=cmap)
    pass
 else:
    print("Skipping mean RAW preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'Single shot of the RAW data from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = np.percentile(raw[cell_idx_preview, 0, ...], [5, 95])
 ax = geom.plot_data_fast(
    raw[cell_idx_preview, 0, ...], ax=ax, vmin=vmin, vmax=vmax, cmap=cmap)
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Corrected preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown('### Mean CORRECTED Preview ###\n'))
    display(Markdown(f'A mean across train: {tid}\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(corrected, axis=0)
    vmax = np.nanpercentile(data, 99.8)
    ax = geom.plot_data_fast(data, ax=ax, vmin=max(-50, np.nanmin(data)), vmax=vmax, cmap=cmap)
    pass
 else:
    print("Skipping mean CORRECTED preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'A single shot of the CORRECTED image from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmax = np.nanpercentile(corrected[cell_idx_preview], 99.8)
 ax = geom.plot_data_fast(
    corrected[cell_idx_preview],
    ax=ax,
    vmin=max(-50, np.nanmin(corrected[cell_idx_preview])),
    vmax=vmax,
    cmap=cmap,
 )
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_idx_preview], 5, -50)
 nbins = int((vmax + 50) / 2)
 h = ax.hist(corrected[cell_idx_preview].flatten(),
            bins=nbins, range=(-50, vmax),
            histtype='stepfilled', log=True)
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ax.grid()
 plt.title(f'Log-scaled histogram for corrected data for cell {cell_idx_preview}')
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected, 10, -100)
 vmax = np.nanmax(corrected)
 if vmax > 50000:
    vmax = 50000
 nbins = int((vmax + 100) / 5)
 h = ax.hist(corrected.flatten(), bins=nbins,
            range=(-100, vmax), histtype='step', log=True, label = 'All')
 ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='High gain', color='green')
 ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Medium gain', color='red')
 ax.hist(corrected[gains == 2].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Low gain', color='yellow')
 ax.legend()
 ax.grid()
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 plt.title(f'Overlaid Histograms for corrected data for multiple gains')
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Maximum GAIN Preview ###\n'))
 display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(
    np.max(gains, axis=0), ax=ax,
    cmap=cmap, vmin=-0.3, vmax=2.3)  # Extend cmap for wrong gain values.
 pass
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'### Single Shot Bad Pixels ### \n'))
 display(Markdown(f'A single shot bad pixel map from cell {cell_id_preview} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(
    np.log2(mask[cell_idx_preview]), ax=ax, vmin=0, vmax=32, cmap=cmap)
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 if round_photons:
    display(Markdown('### Photonization histograms ###'))

    x_preround = (agipd_corr.hist_bins_preround[1:] + agipd_corr.hist_bins_preround[:-1]) / 2
    x_postround = (agipd_corr.hist_bins_postround[1:] + agipd_corr.hist_bins_postround[:-1]) / 2
    x_photons = np.arange(0, (x_postround[-1] + 1) / photon_energy)

    fig, ax = plt.subplots(ncols=1, nrows=1, clear=True)
    ax.plot(x_preround, agipd_corr.shared_hist_preround, '.-', color='C0')
    ax.bar(x_postround, agipd_corr.shared_hist_postround, photon_energy, color='C1', alpha=0.5)
    ax.set_yscale('log')
    ax.set_ylim(0, max(agipd_corr.shared_hist_preround.max(), agipd_corr.shared_hist_postround.max())*3)
    ax.set_xlim(x_postround[0], x_postround[-1]+1)
    ax.set_xlabel('Photon energy / keV')
    ax.set_ylabel('Intensity')
    ax.vlines(x_photons * photon_energy, *ax.get_ylim(), color='k', linestyle='dashed')

    phx = ax.twiny()
    phx.set_xlim(x_postround[0] / photon_energy, (x_postround[-1]+1)/photon_energy)
    phx.set_xticks(x_photons)
    phx.set_xlabel('# Photons')
    pass
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train  ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(
    np.mean(mask>0, axis=0), vmin=0, ax=ax, vmax=1, cmap=cmap)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train. Only Dark Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 cm = np.copy(mask)
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 ax = geom.plot_data_fast(
    np.mean(cm>0, axis=0), vmin=0, ax=ax, vmax=1, cmap=cmap)
 pass
 ```

--- a/notebooks/AGIPD/Chracterize_AGIPD_Gain_PC_NBC.ipynb
+++ b/notebooks/AGIPD/Chracterize_AGIPD_Gain_PC_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Characterize AGIPD Pulse Capacitor Data #

 Author: Detector Operations Group, Version 1.0

 The following code characterizes AGIPD gain via data take with the pulse capacitor source (PCS). The PCS allows scanning through the high and medium gains of AGIPD, by subsequently intecreasing the number of charge pulses from a on-ASIC capicitor, thus increasing the charge a pixel sees in a given integration time.

 Because induced charge does not originate from X-rays on the sensor, the gains evaluated here will later need to be rescaled with gains deduced from X-ray data.

 PCS data is organized into multiple runs, as the on-ASIC current source cannot supply all pixels of a given module with charge at the same time. Hence, only certain pixel rows will have seen charge for a given image. These rows then first need to be combined into single module images again. This script uses new style of merging, which does not support old data format.

 We then use a K-means clustering algorithm to identify components in the resulting per-pixel data series, matching to three general regions:

 * a high gain slope
 * a transition region, where gain switching occurs
 * a medium gain slope.

 The same regions are present in the gain-bit data and are used to deduce the switching threshold.

 The resulting slopes are then fitted with a linear function and a combination of a linear and exponential decay function to determine the relative gains of the pixels with respect to the module. Additionally, we deduce masks for bad pixels form the data.

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB" # The ipcluster profile to use
-in_folder = '/gpfs/exfel/exp/SPB/202130/p900188/raw/' # path to input data, required
-out_folder = "/gpfs/exfel/data/scratch/jsztuk/test/pc" # path to output to, required
+in_folder = '/gpfs/exfel/exp/SPB/202331/p900376/raw' # path to input data, required
+out_folder = "/gpfs/exfel/exp/HED/202430/p900438/scratch/rovensky/pc" # path to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
-runs = [92, 93, 94, 95, 96, 97, 98, 99] # runs to use, required, range allowed
+runs = [61, 62, 63, 64, 65, 66, 67, 68] # runs to use, required, range allowed

 modules = [-1] # modules to work on, required, range allowed
 karabo_da = ["all"]
 karabo_da_control = "AGIPD1MCTRL00" # karabo DA for control infromation
 karabo_id_control = "SPB_IRU_AGIPD1M1"  # karabo-id for the control device e.g. "MID_EXP_AGIPD1M1", or "SPB_IRU_AGIPD1M1"
 karabo_id = "SPB_DET_AGIPD1M-1"
 ctrl_source_template = '{}/MDL/FPGA_COMP' # path to control information
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 receiver_template = "{}CH0" # inset for receiver devices

 creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
 cal_db_timeout = 3000000 # timeout on caldb requests"
 cal_db_interface = "tcp://max-exfl-cal001:8019"  # the database interface to use

 local_output = True # output constants locally
 db_output = False # output constants to database

 bias_voltage = -1  # detector bias voltage, negative values for auto-detection.
 mem_cells = -1  # number of memory cells used, negative values for auto-detection.
 acq_rate = -1.  # the detector acquisition rate, negative values for auto-detection.
 gain_setting = -1  # gain setting can have value 0 or 1, negative values for auto-detection.
 integration_time = -1  # integration time, negative values for auto-detection.
 FF_gain = 7.3 # ADU/keV, gain from FF to convert ADU to keV

 fit_hook = False # fit a hook function to medium gain slope --> run without hook
 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h
 save_plots = False # set to True if you desire saving plots to output folder

-hg_range = [30,210]#[0,-150] # range for linear fit. If upper edge is negative use clustering result (bound_hg - 20)
-mg_range = [-277,600]#[-350,600] # range for linear fit. If lower edge is negative use clustering result (bound_mg + 20)
+hg_range = [30,170]#[0,-150] # range for linear fit. If upper edge is negative use clustering result (bound_hg - 20)
+mg_range = [240,590]#[-350,600] # range for linear fit. If lower edge is negative use clustering result (bound_mg + 20)
 sigma_dev_cut = 5.0 # parameters outside the range median +- sigma_dev_cut*MAD are replaced with the median
 ```

 %% Cell type:code id: tags:

 ``` python
 # imports, usually no need to change anything here

 import os
 import warnings
 from datetime import datetime, timedelta
 from functools import partial
 from dateutil import parser

 warnings.filterwarnings('ignore')

 import h5py
 import matplotlib
 import numpy as np
 from ipyparallel import Client

 import matplotlib.pyplot as plt
 from matplotlib import gridspec
 from matplotlib.colors import LogNorm
 import matplotlib.patches as patches
 from mpl_toolkits.axes_grid1 import AxesGrid

 %matplotlib inline

 import XFELDetAna.xfelpyanatools as xana
 from extra_data import RunDirectory

 from cal_tools.agipdlib import AgipdCtrl
 from cal_tools.enums import BadPixels
 from cal_tools.plotting import plot_badpix_3d
 from cal_tools.tools import (
    calcat_creation_time,
    get_constant_from_db_and_time,
    get_dir_creation_date,
    module_index_to_qm,
 )
 from iCalibrationDB import Conditions, Constants

 # make sure a cluster is running with ipcluster start --n=32, give it a while to start
 view = Client(profile=cluster_profile)[:]
 view.use_dill()
 ```

 %% Cell type:code id: tags:

 ``` python
-cells = mem_cells
-path_temp = in_folder+"/r{:04d}/"
-image_name_temp = 'RAW-R{:04d}-AGIPD{:02d}-S{:05d}.h5'
-print("Parameters are:")
-if mem_cells < 0:
-    print("Memory cells: auto-detection on")
-else:
-    print("Memory cells set by user: {}".format(mem_cells))
-print("Runs: {}".format(runs))
-print("Modules: {}".format(modules))
+# Get operation conditions
+ctrl_source = ctrl_source_template.format(karabo_id_control)
+run_folder = f'{in_folder}/r{runs[0]:04d}/'
+raw_dc = RunDirectory(run_folder)
+instrument_src_mod = list(raw_dc.detector_sources)[0]

-instrument = karabo_id.split("_")[0]
-
-
-if instrument == "HED":
-    nmods = 8
-else:
-    nmods = 16
-
-print(f"Detector in use is {karabo_id}")
+agipd_cond = AgipdCtrl(
+    run_dc=RunDirectory(f'{in_folder}/r{runs[0]:04d}'),
+    image_src=instrument_src_mod,
+    ctrl_src=ctrl_source,
+    raise_error=False,  # to be able to process very old data without gain_setting value
+)
+```

+%% Cell type:code id: tags:

+``` python
 if karabo_da == ["all"]:
    if modules[0] == -1:
-        modules = list(range(nmods))
+        strings = [s.split("/")[-1] for s in list(raw_dc.detector_sources)]
+        modules = [int(s[:s.index("C")]) for s in strings]
+        modules.sort()
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
-```
-
-%% Cell type:code id: tags:
-
-``` python
-first_run = runs[0]
-channel = 0
-ctrl_src = ctrl_source_template.format(karabo_id_control)
-instrument_src = instrument_source_template.format(karabo_id, receiver_template).format(channel)

-agipd_cond = AgipdCtrl(
-    run_dc=RunDirectory(f'{in_folder}/r{first_run:04d}'),
-    image_src=instrument_src,
-    ctrl_src=ctrl_src,
-    raise_error=False,  # to be able to process very old data without gain_setting value
-)
+print("Runs: {}".format(runs))
+print("Modules: {}".format(modules))
+print(f"Detector in use is {karabo_id}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate creation time
 creation_time = calcat_creation_time(in_folder, runs[0], creation_time)
 offset = parser.parse(creation_date_offset)
 delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
 creation_time += delta
 print(f"Creation time: {creation_time}\n")

 # Read AGIPD parameter conditions.
 if acq_rate < 0.:
    acq_rate = agipd_cond.get_acq_rate()
 if mem_cells < 0:
    mem_cells = agipd_cond.get_num_cells()
 # TODO: look for alternative for passing creation_time
 if gain_setting < 0:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage < 0:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
 if integration_time < 0:
    integration_time = agipd_cond.get_integration_time()

 print(f"Acquisition rate: {acq_rate} MHz")
 print(f"Memory cells: {mem_cells}")
 print(f"Gain setting: {gain_setting}")
 print(f"Integration time: {integration_time}")
 print(f"Using {creation_time} as creation time of constant.")
 ```

 %% Cell type:markdown id: tags:

 ## Read in data and merge ##

 The new merging cannot handle the old data formats before Dec. 2017.

 %% Cell type:code id: tags:

 ``` python
 def merge_data(runs, in_folder, karabo_id, channel):

    in_folder = in_folder+"/r{:04d}/"
    def count_min_bursts(in_folder, runs, channel):

        bursts = np.zeros(len(runs))
        for i, run in enumerate(runs):
            run_folder_path = in_folder.format(run)
            r = RunDirectory(run_folder_path)
            instrument_source = karabo_id+'/DET/{}CH0:xtdf'.format(channel)
            c = r.get_array(instrument_source, 'image.length')
            total_frame = np.count_nonzero(c)
            cells = r.detector_info(instrument_source)['frames_per_train']
            bursts[i] = total_frame // cells
        bursts_total = np.min(bursts).astype(int)

        return bursts_total, cells

    bursts_total, cells = count_min_bursts(in_folder, runs, channel)

    pc_data = {'analog': np.zeros((bursts_total, cells, 128, 512)),
               'digital': np.zeros((bursts_total, cells, 128, 512)),
               'cellID': np.zeros(((bursts_total) * cells))
              }

    for counter, run in enumerate(runs):

        run_folder_path = in_folder.format(run)
        print('Run: ', run_folder_path)
        r = RunDirectory(run_folder_path)
        instrument_source = karabo_id+'/DET/{}CH0:xtdf'.format(channel)
        print('Module: ', instrument_source)

        d = r.get_array(instrument_source, 'image.data')
        d = d.values.reshape((d.shape[0] // cells, cells, 2, 512, 128))
        d = np.moveaxis(d, 4, 3)
        c = r.get_array(instrument_source, 'image.cellId')
        print('Bursts: {}, Mem cells: {}\n'.format(c.shape[0] // cells, cells))

        for i in range(7 - counter, 64, 8): # first run starts in row 7 and then decreases in each run by 1
            pc_data['analog'][..., i, :] = d[:bursts_total, :, 0, i, :]
            pc_data['digital'][..., i, :] = d[:bursts_total, :, 1, i, :]
        for i in range(64 + counter, 128, 8): # separated module to 2 halves to catch the injection pattern correctly
            pc_data['analog'][..., i, :] = d[:bursts_total, :, 0, i, :]
            pc_data['digital'][..., i, :] = d[:bursts_total, :, 1, i, :]
        if counter == 0:
            pc_data['cellID'][...] = c[:bursts_total * cells, 0]

        del d
        del c

    return pc_data['analog'], pc_data['digital'], pc_data['cellID']

 ```

 %% Cell type:code id: tags:

 ``` python
 start = datetime.now()
 p = partial(merge_data, runs, in_folder, karabo_id)
 # chunk this a bit, so that we don't overuse available memory
 res = list(map(p, modules))
 end = datetime.now() - start
 print('Duration of merging: ', end)
 ```

 %% Cell type:markdown id: tags:

 ## Plotting of merged PC signal

 %% Cell type:markdown id: tags:

 The merged image of the injected pulse capacitor signal is plotted here for selected trains and a memory cell.
 The left side shows images of analog signal, while the right side shows digital signal image. Rows visualize the signal in a given train.

 %% Cell type:code id: tags:

 ``` python
 def PC_image_plotting(res, burst_of_interest, cell_of_interest):

    fig, axs = plt.subplots(len(burst_of_interest), 2, figsize=(10, 9))
    fig.subplots_adjust(left=0.02, bottom=0.06, right=0.95, top=0.94, wspace=0.25)

    yticks_major=np.arange(0,129,64)
    xticks_major=np.arange(0,512,64)

    for img_type in range(0,2):
        if img_type == 0:
            title = 'PC signal (ADU)'
            img_title = 'Analog'
            scale_min = 5000
            scale_max = 11000
        else:
            title = 'PC gain signal (ADU)'
            img_title = 'Digital'
            scale_min = 4500
            scale_max = 8500

        for x, b, in enumerate(burst_of_interest):
            im1 = axs[x, img_type].imshow(res[0][img_type][b, cell_of_interest, ...], interpolation='nearest',
                                   cmap='jet', origin='lower',vmin=scale_min, vmax=scale_max, aspect='equal')
            cbar = fig.colorbar(im1, ax=axs[x, img_type], fraction=0.1, pad=0.2, orientation='horizontal')
            cbar.set_label(title)
            axs[x, img_type].set_title("{}, Burst: {}, Cell: {}".format(img_title, b, cell_of_interest))
            axs[x,img_type].set_xticks(xticks_major)
            axs[x,img_type].xaxis.grid(True, which='major', linewidth=0.5, color='grey')
            axs[x,img_type].set_yticks(yticks_major)
            axs[x,img_type].yaxis.grid(True, which='major', linewidth=0.5, color='grey')
            axs[x,img_type].set_xlabel('Columns')
            axs[x,img_type].set_ylabel('Rows')
    plt.show()
    return fig
 ```

 %% Cell type:markdown id: tags:

 ### PC signal of a selected cell and bursts

 %% Cell type:code id: tags:

 ``` python
 burst_of_interest = [100, 200, 550]
 cell_of_interest = 4
 for module in modules:
    fig = PC_image_plotting(res, burst_of_interest, cell_of_interest)
 ```

 %% Cell type:markdown id: tags:

 ## Slope clustering and Fitting ##

 The following two cells contain the actual algorithm logic as well as a preview of a single pixel and memory cells visualizing the data and the concepts.

 We start out with calculating an estimate of the slope in proximity of a given data value. This is done by calculating the slopes of a given value with 15 neighbours and averaging the result. Values are then clustered by these slopes into three regions via a K-means algorithm.

 * for the first region a linear function is fitted to the data, determining the gain slope and offset for the high gain mode.

   $$y = mx + b$$

 * for the second and third region a composite function (so-called hook function) of the form:

  $$y = A*e^{-(x-O)/C}+mx+b$$

  is fitted, covering both the transition region and the medium gain slope.

 If variable {fit_hook} is set to False (default) only the medium gain region is fitted with linear function and the transition region is omitted in the fitting procedure.

 Clustering results are not used if {hg_range} or {mg_range} are defined as positive integers, thus setting borders for the high gain region (hg_range) or medium gain region (mg_region). Data inside each region are fitted with linear function. Hook function fit to the data is not allowed in this case.

 %% Cell type:code id: tags:

 ``` python
 from iminuit import Minuit
 from iminuit.util import describe, make_func_code

 def rolling_window(a, window):
    shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
    strides = a.strides + (a.strides[-1],)
    return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)


 def calc_m_cluster2(x, y, r1=5, r2=0, r3=1.5):
    scan_range = 15
    ms = np.zeros((x.shape[0], scan_range))
    for i in range(scan_range):
        xdiffs = x - np.roll(x, i+1)
        ydiffs = y - np.roll(y, i+1)
        m = ydiffs/xdiffs
        ms[:,i] = m
    m = np.mean(ms, axis=1)
    m[scan_range//2:-scan_range//2+1] = np.mean(rolling_window(m, scan_range),-1)
    reg1 = m > r1
    reg2 = m < r2
    reg3 = (m > r2) & (m < r3)
    reg4 = ~(reg1 | reg2 | reg3)
    labels = [reg1, reg2, reg3, reg4]
    regions = np.zeros_like(x, np.uint8)
    for r, lbl in enumerate(labels):
        regions[lbl] = r
    scan_range = 30
    mregions = np.round(np.mean(rolling_window(regions, scan_range),-1))
    # change from np.nan to -1
    regions[...] = -1
    regions[scan_range//2:-scan_range//2+1] = mregions


    labels = [regions == 0, regions == 1, regions == 2, regions == 3]

    idx = np.arange(x.size)
    maxlbl = x.size-1
    for i in range(0, len(labels)-1):
        nidx = labels[i+1]
        if np.any(nidx):
            maxlbl = np.max(idx[nidx])
            cidx = idx > maxlbl
            if np.any(cidx):
                labels[i][cidx] = False

    ms = []
    for lbl in labels:
        xl = x[lbl]
        xd = np.reshape(xl, (len(xl), 1))
        xdiff = xd - xd.transpose()

        yl = y[lbl]
        yd = np.reshape(yl, (len(yl), 1))
        ydiff = yd - yd.transpose()
        ms.append(np.mean(np.nanmean(ydiff/xdiff, axis=0)))

    return ms, labels, None

 def fit_data(fun, x, y, yerr, par_ests):
    par_ests["throw_nan"] = False
    par_ests["pedantic"] = False
    par_ests["print_level"] = 0

    f_sig = describe(fun)[1:]

    class _Chi2Functor:
        def __init__(self, f, x, y, err):
            self.f = f
            self.x = x[y != 0]
            self.y = y[y != 0]
            self.err = err[y != 0]
            f_sig = describe(f)
            # this is how you fake function
            # signature dynamically
            self.func_code = make_func_code(
                f_sig[1:])  # docking off independent variable
            self.func_defaults = None  # this keeps numpy.vectorize happy

        def __call__(self, *arg):
            # notice that it accept variable length
            # positional arguments
            # chi2 = sum((y-self.f(x,*arg))**2 for x,y in zip(self.x,self.y))
            return np.sum(((self.f(self.x, *arg) - self.y) ** 2) / self.err)

    wrapped = _Chi2Functor(fun, x, y, yerr)
    m = Minuit(wrapped, **par_ests)
    fmin = m.migrad()

    return m.values

 def lin_fun(x, m, b):
    return m*x+b

 def hook_fun(x, a, c, o, m, b):
    return a*np.exp(-(x-o)/c)+m*x+b
 ```

 %% Cell type:code id: tags:

 ``` python
 from cal_tools.tools import get_constant_from_db_and_time

 offsets = {}
 noises = {}
 thresholds = {}
 for k_da, mod in zip(karabo_da, modules):

    offset, when = get_constant_from_db_and_time(karabo_id, k_da,
                                                 Constants.AGIPD.Offset(),
                                                 Conditions.Dark.AGIPD(
                                                     memory_cells=mem_cells,
                                                     bias_voltage=bias_voltage,
                                                     acquisition_rate=acq_rate,
                                                     gain_setting=gain_setting,
                                                     integration_time=integration_time),
                                                 np.zeros((128, 512, mem_cells, 3)),
                                                 cal_db_interface,
                                                 creation_time=creation_time)
    offsets[mod] = np.array(offset.data)

    noise, when = get_constant_from_db_and_time(karabo_id, k_da,
                                                Constants.AGIPD.Noise(),
                                                Conditions.Dark.AGIPD(
                                                    memory_cells=mem_cells,
                                                    bias_voltage=bias_voltage,
                                                    acquisition_rate=acq_rate,
                                                    gain_setting=gain_setting,
                                                    integration_time=integration_time),
                                                np.zeros((128, 512, mem_cells, 3)),
                                                cal_db_interface, creation_time=creation_time)
    noises[mod] = np.array(noise.data)

    threshold, when = get_constant_from_db_and_time(karabo_id, k_da,
                                                    Constants.AGIPD.ThresholdsDark(),
                                                    Conditions.Dark.AGIPD(
                                                        memory_cells=mem_cells,
                                                        bias_voltage=bias_voltage,
                                                        acquisition_rate=acq_rate,
                                                        gain_setting=gain_setting,
                                                        integration_time=integration_time),
                                                    np.zeros((128, 512, mem_cells, 3)),
                                                    cal_db_interface, creation_time=creation_time)
    thresholds[mod] = np.array(threshold.data)
 ```

 %% Cell type:code id: tags:

 ``` python
 test_pixels = []
 tpix_range1 = [(0,16), (0,64)]
 for i in range(*tpix_range1[0]):
    for j in range(*tpix_range1[1]):
        test_pixels.append((j,i))
 test_cells = [4, 38, 64, 128]#, 200, 249]
 tcell = np.array(test_cells)
 tcell = tcell[tcell < mem_cells]
 if tcell.size == 0:
    test_cells = [mem_cells-1]
 else:
    test_cells = tcell.tolist()

 from mpl_toolkits.axes_grid1 import ImageGrid
 for mod, r in zip(modules, res):
    ana, dig, cellId = r
    d = []
    d2 = []
    d3 = []

    H = [0, 0, 0, 0]

    ex, ey = None, None
    offset = offsets[mod]
    for pix in test_pixels:
        for cell in test_cells:
            color = np.random.rand(3,1)


            x = np.arange(ana.shape[0])
            y = ana[:,cell, pix[0], pix[1]]
            yd = dig[:,cell, pix[0], pix[1]]
            thresh_trans = thresholds[mod][pix[0], pix[1], cell,0]


            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]
            yd = yd[vidx]
            if x.shape[0] == 0:
                continue

            ms, labels, centers = calc_m_cluster2(x, y)
            bound = None
            bound_m = None
            markers = ['o','.','x','v']
            colors = ['b', 'r', 'g', 'k']
            ymin = y.min()

            #### Transition region gain is set based on dark thresholds. ####
            msk = yd[labels[1]] < thresh_trans
            label_filter = np.where(labels[1])[0]
            labels[1][label_filter[msk]] = False
            labels[0][label_filter[msk]] = True
            ################################################################

            for i, lbl in enumerate(labels):
                if np.any(lbl):
                    if i == 0:
                        gain = 0
                    else:
                        gain = 1

                    ym = y[lbl] - offset[pix[0], pix[1], cell, gain]
                    h, ex, ey = np.histogram2d(x[lbl], ym, range=((0, 600), (-500, 6000)), bins=(300, 650))
                    H[i] += h

    fig = plt.figure(figsize=(10,7))
    ax = fig.add_subplot(111)
    ax.grid(lw=1.5)
    for i in range(3):
        H[i][H[i]==0] = np.nan
    ax.imshow(H[0].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    ax.imshow(H[1].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='spring', alpha=0.7, vmin=0, vmax=100)
    ax.imshow(H[2].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD response (ADU)", fontsize=12)
    ax.set_xlabel("PC scan point (#)", fontsize=12)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.show()
 ```

 %% Cell type:markdown id: tags:

 ### Examples from Pixel Subset ###

 The follwing is an visualization of the clustering and fitting for a subset of pixels. If data significantly mismatches expectations, the clustering and fitting algorithms should fail for this subset:

 * the first plot shows the clustering results for pixels which were sucessfully evaluated
 * the second plot shows the clustering results for pixels which failed to evaluate--> change this
 * the third plot shows the fits and fit residuals for the pixel clusters shown in the first plot

 Non-smooth behaviour is an indication that you are errorously processing interleaved data that is not, or vice versa, or have the wrong number of memory cells set.

 We do this twice for different detector regions

 %% Cell type:code id: tags:

 ``` python
 def find_params(x, y, yd, thresh_trans):

    ms, labels, centers = calc_m_cluster2(x, y)
    bound = None
    bound_m = None

    if labels[1].any():
        bound = np.min(x[labels[1]])
        bound_m = ms[1]
        threshold = (np.mean(yd[labels[0]])+np.mean(yd[labels[2]])) / 2

        #### Transition region gain is set based on dark thresholds. ####
        msk = yd[labels[1]] < thresh_trans
        label_filter = np.where(labels[1])[0]
        labels[1][label_filter[msk]] = False
        labels[0][label_filter[msk]] = True
        try:
            if np.max(x[labels[0]] > 220): # step value above which dark threholds are not considered
                labels[0][label_filter[msk]] = False
        except ValueError:
            print('Could not determine maximum value.')
            pass
        ###############################################################

    if bound is None or bound < 20: #and False:
        ya = dig[:,cell, pix[0], pix[1]][vidx]
        msa, labels, centers = calc_m_cluster2(x, ya, 25, -10, 25)
        if np.count_nonzero(labels[0]) > 0:
            bound = np.min(x[labels[0]])
            bound_m = ms[3]
        else:
            bound = 0
            bound_m = 0

    if hg_range[1] > 0 :
        hg_bound_high = hg_range[1]
        hg_bound_low = hg_range[0]
    else :
        hg_bound_high = bound - 20
        hg_bound_low = 0

    try:
        if fit_hook:
            mg_bound_high = len(x)
            try:
                mg_bound_low = np.max(x[labels[1]]) + 20
            except ValueError:  #raised if `y` is empty.
                    mg_bound_low = bound + 100
        else :
            if mg_range[0] > 0 :
                mg_bound_low = mg_range[0]
                mg_bound_high = mg_range[1]

            else :
                mg_bound_high = len(x)
                try:
                    mg_bound_low = np.max(x[labels[1]]) + 20
                except ValueError:  #raised if `y` is empty.
                    mg_bound_low = bound + 100

    except Exception as e:
        if "zero-size array" in str(e):
            pass
        else:
            print(e)

    return hg_bound_low, hg_bound_high, mg_bound_low, mg_bound_high, bound, bound_m, labels, threshold
 ```

 %% Cell type:code id: tags:

 ``` python
 def evaluate_fitting_ROI(modules, res, tpix_range, test_cells, roi):
    markers = ['o', '.', 'x', 'v']
    colors = ['tab:blue', 'tab:red', 'tab:green', 'k']
    test_pixels = []

    fig1, ax1 = plt.subplots(figsize=(9, 5))
    ax1.grid(zorder=0, lw=1.5)
    ax1.set_ylabel("PC pixel signal (ADU)", fontsize=11)
    ax1.set_xlabel('step #', fontsize=11)

    fig2, ax2 = plt.subplots(figsize=(9, 5))
    ax2.grid(zorder=0, lw=1.5)
    ax2.set_ylabel("PC gain signal (ADU)", fontsize=11)
    ax2.set_xlabel('step #', fontsize=11)

    fig3 = plt.figure(figsize=(9, 5))
    gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1])
    ax3 = plt.subplot(gs[0])
    ax4 = plt.subplot(gs[1])
    ax3.grid(zorder=0, lw=1.5)
    ax3.set_ylabel('PC signal (keV)', fontsize=11)
    ax4.set_ylabel('Relative\ndeviation', fontsize=11)
    ax4.set_xlabel('step #', fontsize=11)

    for i in range(*tpix_range[0]):
        for j in range(*tpix_range[1]):
            test_pixels.append((j,i))

    tcell = np.array(test_cells)
    tcell = tcell[tcell < mem_cells]
    if tcell.size == 0:
        test_cells = [mem_cells-1]
    else:
        test_cells = tcell.tolist()

    for mod, r in zip(modules, res):
        ana, dig, cellId = r
        d = []
        d2 = []
        d3 = []
        offset = offsets[mod]
        noise = noises[mod]
        for pix in test_pixels:
            for cell in test_cells:
                color = np.random.rand(3, 1)
                x = np.arange(ana.shape[0])
                y = ana[:,cell, pix[0], pix[1]]
                yd = dig[:,cell, pix[0], pix[1]]
                thresh_trans = thresholds[mod][pix[0], pix[1], cell,0]

                vidx = (y > 1000) & np.isfinite(y) & np.isfinite(x)
                x = x[vidx]
                y = y[vidx]
                yd = yd[vidx]

                hg_bound_low, hg_bound_high, mg_bound_low, mg_bound_high, bound, bound_m, labels, threshold = find_params(x, y, yd, thresh_trans)


                for i, lbl in enumerate(labels):
                    if i == 0:
                        gain = 0

                    else:
                        gain = 1

                    ax1.plot(x[lbl], (y[lbl] - offset[pix[0], pix[1], cell, gain]), ls='None',
                             marker=markers[i], color=colors[i], alpha=0.3)
                    ax2.plot(x[lbl], yd[lbl], ls='None', marker=markers[i], color=colors[i], alpha=0.3)
                    ax2.plot(np.array([x[0], x[-1]]), np.ones(2) * threshold, lw=1, color='k', alpha=0.3)

                # fit linear slope
                idx = (x > hg_bound_low) & (x < hg_bound_high)
                xl = x[idx]
                yl = y[idx] - y[0]

                if yl.shape[0] != 0:
                    parms = {'m': bound_m, 'b': np.min(yl)}
                    parms["limit_m"] = [0., 50.]
                    errors = np.ones(xl.shape) * noise[pix[0], pix[1], cell, 0]
                    fitted = fit_data(lin_fun, xl, yl, errors , parms)
                    hg_slope = fitted['m']
                    pc_high_l = fitted['b']

                    yf = lin_fun(xl, hg_slope, pc_high_l)

                    ax3.plot(xl, (yl / FF_gain), color='tab:blue', ls='None',alpha=0.5, marker='o')
                    ax3.plot(xl, (yf / FF_gain), color='navy', lw=1, zorder=9)
                    ax4.plot(xl, ((yl-yf) / yl), lw=1, color='navy')

                # fit hook slope
                try:
                    if fit_hook:
                        idx = (x >= mg_bound_low) & (x < mg_bound_high)
                        xh = x[idx]
                        yh = y[idx] - offset[pix[0], pix[1], cell, 1]

                        parms = {'m': bound_m/10 if bound_m/10>0.3 else 0.5,
                                 'b': np.min(yh[yh > -2000]),
                                 'a': np.max(yh), 'c': 5, 'o': bound-1}
                        parms["limit_m"] = [0., 2.0]
                        parms["limit_c"] = [1., 1000]

                        errors = np.ones(xh.shape) * noise[pix[0], pix[1], cell, 1]
                        fitted = fit_data(hook_fun, xh, yh, errors, parms)
                        mg_slope = fitted['m']
                        pc_med_l = fitted['b']

                        slope_ratio = hg_slope / mg_slope
                        md_additional_offset = pc_high_l - pc_med_l * slope_ratio

                        yf = lin_fun(xh, mg_slope * slope_ratio, pc_med_l)
                        corr_yh = yh * slope_ratio + md_additional_offset
                        max_devh = np.median(np.abs((corr_yh - yf) / corr_yh))

                    else:
                        idx = (x >= mg_bound_low) & (x < mg_bound_high)
                        xh = x[idx]
                        yh = y[idx] - offset[pix[0], pix[1], cell, 1]

                        if len(yh[yh > -2000]) == 0:
                            break
                        parms = {'m': bound_m/10 if bound_m/10>0.3 else 0.5,
                                 'b': np.min(yh[yh > -2000]),
                                }
                        parms["limit_m"] = [0., 2.0]
                        errors = np.ones(xh.shape) * noise[pix[0], pix[1], cell, 1]
                        fitted = fit_data(lin_fun, xh, yh, errors, parms)
                        mg_slope = fitted['m']
                        pc_med_l = fitted['b']
                        slope_ratio = hg_slope / mg_slope
                        md_additional_offset = pc_high_l - pc_med_l * slope_ratio

                        yf = lin_fun(xh, mg_slope * slope_ratio, pc_med_l)
                        corr_yh = yh * slope_ratio + md_additional_offset
                        max_devh = np.median(np.abs((corr_yh - yf) / corr_yh))

                    ax3.plot(xh, corr_yh/FF_gain, color='tab:green', ls='None', alpha=0.3, marker='x')
                    ax3.plot(xh, yf/FF_gain, color='green', lw=1, zorder=10)
                    ax4.plot(xh, ((corr_yh-yf)/corr_yh), lw=1, color='green')

                except Exception as e:
                    if "zero-size array" in str(e):
                        pass
                    else:
                        print(e)

        fig1.show()
        fig2.show()
        fig3.show()
        if save_plots:
            fig1.savefig("{}/module_{}_{}_pixel_plot.png".format(out_folder, mod, roi))
            fig2.savefig("{}/module_{}_{}_pixel_plot_gain.png".format(out_folder, mod, roi))
            fig3.savefig("{}/module_{}_{}_pixel_plot_fits.png".format(out_folder, mod, roi))
 ```

 %% Cell type:markdown id: tags:

 ## ROI 1

 %% Cell type:code id: tags:

 ``` python
 tpix_range1 = [(250,254), (60,63)]
 test_cells = [4, 38]
 roi = 'ROI1'
 evaluate_fitting_ROI(modules, res, tpix_range1, test_cells, roi)
 ```

 %% Cell type:markdown id: tags:

 ## ROI 2

 %% Cell type:code id: tags:

 ``` python
 tpix_range2 = [(96,128), (32,64)]
 roi2 = 'ROI2'
 evaluate_fitting_ROI(modules, res, tpix_range2, test_cells, roi2)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Here we perform the calculations in column-parallel for all modules
 def calibrate_single_row(cells, fit_hook, ranges, inp):

    hg_range = ranges[0]
    mg_range = ranges[1]

    import numpy as np
    from iminuit import Minuit
    from iminuit.util import describe, make_func_code
    from sklearn.cluster import KMeans

    yra, yrd, offset, noise, thresh_trans = inp

    def rolling_window(a, window):
        shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
        strides = a.strides + (a.strides[-1],)
        return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)


    def calc_m_cluster2(x, y, r1=5, r2=0, r3=1.5):
        scan_range = 15
        ms = np.zeros((x.shape[0], scan_range))
        for i in range(scan_range):
            xdiffs = x - np.roll(x, i+1)
            ydiffs = y - np.roll(y, i+1)
            m = ydiffs / xdiffs
            ms[:,i] = m
        m = np.mean(ms, axis=1)
        m[scan_range//2:-scan_range//2+1] = np.mean(rolling_window(m, scan_range),-1)
        reg1 = m > r1
        reg2 = m < r2
        reg3 = (m > r2) & (m < r3)
        reg4 = ~(reg1 | reg2 | reg3)
        labels = [reg1, reg2, reg3, reg4]
        regions = np.zeros_like(x, np.uint8)
        for r, lbl in enumerate(labels):
            regions[lbl] = r
        scan_range = 30
        mregions = np.round(np.mean(rolling_window(regions, scan_range),-1))
        # chanage from np.nan to -1
        regions[...] = -1
        regions[scan_range//2:-scan_range//2+1] = mregions


        labels = [regions == 0, regions == 1, regions == 2, regions == 3]

        idx = np.arange(x.size)
        maxlbl = x.size-1
        for i in range(0, len(labels)-1):
            nidx = labels[i+1]
            if np.any(nidx):
                maxlbl = np.max(idx[nidx])
                cidx = idx > maxlbl
                if np.any(cidx):
                    labels[i][cidx] = False

        ms = []
        for lbl in labels:
            xl = x[lbl]
            xd = np.reshape(xl, (len(xl), 1))
            xdiff = xd - xd.transpose()

            yl = y[lbl]
            yd = np.reshape(yl, (len(yl), 1))
            ydiff = yd - yd.transpose()
            ms.append(np.mean(np.nanmean(ydiff/xdiff, axis=0)))

        return ms, labels, None

    def fit_data(fun, x, y, yerr, par_ests):
        par_ests["throw_nan"] = False
        par_ests["pedantic"] = False
        par_ests["print_level"] = 0

        f_sig = describe(fun)[1:]

        class _Chi2Functor:
            def __init__(self, f, x, y, err):
                self.f = f
                self.x = x
                self.y = y
                self.err = err
                f_sig = describe(f)
                # this is how you fake function
                # signature dynamically
                self.func_code = make_func_code(
                    f_sig[1:])  # docking off independent variable
                self.func_defaults = None  # this keeps numpy.vectorize happy

            def __call__(self, *arg):
                # notice that it accept variable length
                # positional arguments
                # chi2 = sum((y-self.f(x,*arg))**2 for x,y in zip(self.x,self.y))
                return np.sum(((self.f(self.x, *arg) - self.y) ** 2) / self.err)

        wrapped = _Chi2Functor(fun, x, y, yerr)
        m = Minuit(wrapped, **par_ests)
        fmin = m.migrad()

        return m.values

    def lin_fun(x, m, b):
        return m*x+b

    def hook_fun(x, a, c, o, m, b):
        return a*np.exp(-(x-o)/c)+m*x+b

    # linear slope
    ml = np.full(yra.shape[1:], np.nan)
    bl = np.full(yra.shape[1:], np.nan)
    devl = np.full(yra.shape[1:], np.nan)

    #hook function
    mh = np.full(yra.shape[1:], np.nan)
    bh = np.full(yra.shape[1:], np.nan)
    ch = np.full(yra.shape[1:], np.nan)
    oh = np.full(yra.shape[1:], np.nan)
    ah = np.full(yra.shape[1:], np.nan)
    devh = np.full(yra.shape[1:], np.nan)
    dhm = np.full(yra.shape[1:], np.nan)

    # threshold
    thresh = np.full(list(yrd.shape[1:])+[3,], np.nan)
    failures = []

    for col in range(yra.shape[-1]):
        try:
            y = yra[:,col]
            x = np.arange(y.shape[0])
            yd = yrd[:,col]

            vidx = (y > 1000) & np.isfinite(y) & np.isfinite(x)
            x = x[vidx]
            y = y[vidx]
            yd = yd[vidx]

            ms, labels, centers = calc_m_cluster2(x, y)

            bound = np.min(x[labels[1]])
            bound_m = ms[1]
            threshold = (np.mean(yd[labels[0]])+np.mean(yd[labels[2]])) / 2
            thresh[col,0] = threshold
            thresh[col,1] = np.mean(y[labels[0]])
            thresh[col,2] = np.mean(y[labels[2]])

            # fit linear slope
            if hg_range[1] > 0 :
                hg_bound_high = hg_range[1]
                hg_bound_low = hg_range[0]
            else :
                hg_bound_high = bound - 20
                hg_bound_low = 0

            if fit_hook:
                mg_bound_high = len(x)
                try:
                    mg_bound_low = np.max(x[labels[1]]) + 20
                except ValueError:  #raised if `y` is empty.
                        mg_bound_low = bound + 100
            else :
                if mg_range[0] > 0 :
                    mg_bound_low = mg_range[0]
                    mg_bound_high = mg_range[1]

                else :
                    mg_bound_high = len(x)
                    try:
                        mg_bound_low = np.max(x[labels[1]]) + 20
                    except ValueError:  #raised if `y` is empty.
                        mg_bound_low = bound + 100

            idx = (x > hg_bound_low) & (x < hg_bound_high)
            xl = x[idx]
            yl = y[idx] - y[0] # instead of offset subtraction is better to subtract first pc value here
            errors = np.ones(xl.shape) * noise[col, 0]
            if yl.shape[0] != 0:
                parms = {'m': bound_m, 'b': np.min(yl)}
                parms["limit_m"] = [0., 50.]
                fitted = fit_data(lin_fun, xl, yl, errors, parms)
                hg_slope = fitted['m']
                pc_high_l = fitted['b']
                yf = lin_fun(xl, hg_slope, pc_high_l)
                max_devl = np.median(np.abs((yl-yf) / yl))
            ml[col] = hg_slope #fitted['m']
            bl[col] = pc_high_l #fitted['b']
            devl[col] = max_devl
            dhml = lin_fun(bound, hg_slope, pc_high_l)

            # fit hook slope
            if fit_hook:
                idx = (x >= mg_bound_low) & (x < mg_bound_high)
                xh = x[idx]
                yh = y[idx] - offset[col, 1]
                errors = np.ones(xh.shape) * noise[col, 1]
                parms = {'m': bound_m/10 if bound_m/10 > 0.3 else 0.5, 'b': np.min(yh[yh > -2000]), 'a': np.max(yh), 'c': 5., 'o': bound-1}
                parms["limit_m"] = [0., 2.0]
                parms["limit_c"] = [1., 1000]
                fitted = fit_data(hook_fun, xh, yh, errors, parms)
                mg_slope = fitted['m']
                pc_med_l = fitted['b']
                slope_ratio = hg_slope / mg_slope
                md_additional_offset = pc_high_l - pc_med_l * slope_ratio
                yf = lin_fun(xh, mg_slope * slope_ratio, pc_med_l)
                corr_yh = yh * slope_ratio + md_additional_offset
                max_devh = np.median(np.abs((corr_yh - yf) / corr_yh))

                mh[col] = mg_slope # fitted['m']
                bh[col] = pc_med_l # fitted['b']
                ah[col] = fitted['a']
                oh[col] = fitted['o']
                ch[col] = fitted['c']
                devh[col] = max_devh
                dhm[col] = bound
            else :
                idx = (x >= mg_bound_low) & (x < mg_bound_high)
                xh = x[idx]
                yh = y[idx] - offset[col, 1]
                errors = np.ones(xh.shape)*noise[col, 1]
                parms = {'m': bound_m/10 if bound_m/10 > 0.3 else 0.5,
                         'b': np.min(yh[yh > -2000]),
                        }
                parms["limit_m"] = [0., 2.0]
                fitted = fit_data(lin_fun, xh, yh, errors, parms)

                mg_slope = fitted['m']
                pc_med_l = fitted['b']
                slope_ratio = hg_slope / mg_slope
                md_additional_offset = pc_high_l - pc_med_l * slope_ratio

                yf = lin_fun(xh, mg_slope * slope_ratio, pc_med_l)
                corr_yh = yh * slope_ratio + md_additional_offset
                max_devh = np.median(np.abs((corr_yh - yf) / corr_yh))

                mh[col] = mg_slope # fitted['m']
                bh[col] = pc_med_l # fitted['b']
                ah[col] = 0
                oh[col] = 1
                ch[col] = 1
                devh[col] = max_devh
                dhm[col] = bound

        except Exception as e:
            print(e)
            failures.append((col, str(e)))
    del yrd
    del yra
    return thresh, (ml, bl, devl), (mh, bh, ah, oh, ch, devh), failures, dhm
 ```

 %% Cell type:code id: tags:

 ``` python
 start = datetime.now()
 fres = {}
 failures = []
 for i, r in zip(modules, res):
    offset = offsets[i]
    noise = noises[i]
    qm = module_index_to_qm(i)
    dig, ana, cellId = r
    thresh_trans = thresholds[i][..., 0]


    # linear slope
    ml = np.zeros(dig.shape[1:])
    bl = np.zeros(dig.shape[1:])
    devl = np.zeros(dig.shape[1:])

    #hook function
    mh = np.zeros(dig.shape[1:])
    bh = np.zeros(dig.shape[1:])
    ch = np.zeros(dig.shape[1:])
    oh = np.zeros(dig.shape[1:])
    ah = np.zeros(dig.shape[1:])
    devh = np.zeros(dig.shape[1:])
    dhma = np.zeros(dig.shape[1:])

    # threshold
    thresh = np.zeros(list(dig.shape[1:]))
    thresh_bounds = np.zeros(list(dig.shape[1:])+[2,])

    for cell in range(dig.shape[1]):
        inp = []
        for j in range(dig.shape[2]):
            inp.append((dig[:,cell,j,:], ana[:,cell,j,:], offset[j,:,cell,:], noise[j,:,cell,:], thresh_trans[j,:,cell]))

-        p = partial(calibrate_single_row, cells, fit_hook, [hg_range, mg_range])
+        p = partial(calibrate_single_row, mem_cells, fit_hook, [hg_range, mg_range])
        frs = view.map_sync(p, inp)

        for j, fr in enumerate(frs):
            threshr, lin, hook, fails, dhm = fr
            mlr, blr, devlr = lin
            mhr, bhr, ahr, ohr, chro, devhr = hook
            failures.append(fails)

            ml[cell, j, :] = mlr
            bl[cell, j, :] = blr
            devl[cell, j, :] = devlr

            mh[cell, j, :] = mhr
            bh[cell, j, :] = bhr
            oh[cell, j, :] = ohr
            ch[cell,j,:] = chro
            ah[cell, j, :] = ahr
            devh[cell, j, :] = devhr
            dhma[cell, j, :] = dhm

            thresh[cell, j, ...] = threshr[..., 0]
            thresh_bounds[cell, j, ...] = threshr[..., 1:]

    fres[qm] = {'ml': ml,
                'bl': bl,
                'devl': devl,
                'tresh': thresh,
                'tresh_bounds': thresh_bounds,
                'dhm': dhma}
    fres[qm].update({
        'mh': mh,
        'bh': bh,
        'oh': oh,
        'ch': ch,
        'ah': ah,
        'devh': devh,
       })

 end = datetime.now() - start
 print('Duration of fitting: ',end)
 ```

 %% Cell type:markdown id: tags:

 Results of slope fitting from PC runs values are
 distinguished on axis 0 by index:

 0: linear slope - m value
 1: linear slope - b value
 2: linear slope - deviation
 3: hook function - m value
 4: hook function - b value
 5: hook function - o value
 6: hook function - c value
 7: hook function - a value
 8: hook function - deviation

 %% Cell type:code id: tags:

 ``` python
 from collections import OrderedDict
 from scipy.stats import median_abs_deviation as mad

 bad_pixels = OrderedDict()
 for qm, data in fres.items():
    mask = np.zeros(data['ml'].shape, np.uint32)
    mask[(data['tresh'][...,0] < 50) | (data['tresh'][...,0] > 8500)] |= BadPixels.CI_GAIN_OF_OF_THRESHOLD.value
    mask[(data['devl'] == 0)] |= BadPixels.CI_LINEAR_DEVIATION.value
    mask[(np.abs(data['devl']) > 0.5)] |= BadPixels.CI_LINEAR_DEVIATION.value
    mask[(~np.isfinite(data['devl']))] |= BadPixels.CI_EVAL_ERROR.value
    bad_pixels[qm] = mask

    # high gain slope from pulse capacitor data
    pc_high_m = data['ml']
    pc_high_b = data['bl']
    # medium gain slope from pulse capacitor data
    pc_med_m = data['mh']
    pc_med_b = data['bh']

    fshape = pc_high_m.shape

    # calculate median for slopes
    pc_high_med = np.nanmedian(pc_high_m, axis=(1, 2))
    pc_med_med = np.nanmedian(pc_med_m, axis=(1, 2))
    # calculate median for intercepts:
    pc_high_b_med = np.nanmedian(pc_high_b, axis=(1, 2))
    pc_med_b_med = np.nanmedian(pc_med_b, axis=(1, 2))

    # mad can only iterate on 1 axis
    tshape = (fshape[0],fshape[1]*fshape[2])

    # calculate MAD for slopes
    pc_high_rms = mad(pc_high_m.reshape(tshape), axis=1, scale='normal', nan_policy='omit')
    pc_med_rms = mad(pc_med_m.reshape(tshape), axis=1, scale='normal', nan_policy='omit')
    # calculate MAD for intercepts:
    pc_high_b_rms = mad(pc_high_b.reshape(tshape), axis=1, scale='normal', nan_policy='omit')
    pc_med_b_rms = mad(pc_med_b.reshape(tshape), axis=1, scale='normal', nan_policy='omit')

    # expand the arrays to match the size of the originals
    pc_high_med = np.repeat(pc_high_med, fshape[1]*fshape[2]).reshape(fshape)
    pc_med_med = np.repeat(pc_med_med, fshape[1]*fshape[2]).reshape(fshape)
    pc_high_b_med = np.repeat(pc_high_b_med, fshape[1]*fshape[2]).reshape(fshape)
    pc_med_b_med = np.repeat(pc_med_b_med, fshape[1]*fshape[2]).reshape(fshape)
    pc_high_rms = np.repeat(pc_high_rms, fshape[1]*fshape[2]).reshape(fshape)
    pc_med_rms = np.repeat(pc_med_rms, fshape[1]*fshape[2]).reshape(fshape)
    pc_high_b_rms = np.repeat(pc_high_b_rms, fshape[1]*fshape[2]).reshape(fshape)
    pc_med_b_rms = np.repeat(pc_med_b_rms, fshape[1]*fshape[2]).reshape(fshape)

    # sanitize PC data
    # replace vals outside range and nans with median.

    bad_fits_high = (np.abs((pc_high_m - pc_high_med)/pc_high_rms) > sigma_dev_cut) |\
                (np.abs((pc_high_b - pc_high_b_med)/pc_high_b_rms) > sigma_dev_cut) |\
                (~np.isfinite(pc_high_m)) | (~np.isfinite(pc_high_b))

    bad_fits_med = (np.abs((pc_med_m - pc_med_med)/pc_med_rms) > sigma_dev_cut) |\
                (np.abs((pc_med_b - pc_med_b_med)/pc_med_b_rms) > sigma_dev_cut) |\
                (~np.isfinite(pc_med_m)) | (~np.isfinite(pc_med_b))

    def plot_cuts(a,sel,name,units, ax = None) :
        stats_cut = {}
        stats = {}
        if ax is None :
            fig = plt.figure(figsize=(5,5))
            ax = fig.add_subplot(111)
        stats_cut["Mean"] = np.nanmean(a[sel])
        stats_cut["STD"] = np.nanstd(a[sel])
        stats_cut["Median"] = np.nanmedian(a[sel])
        stats_cut["Min"] = np.nanmin(a[sel])
        stats_cut["Max"] = np.nanmax(a[sel])

        stats["Mean"] = np.nanmean(a)
        stats["STD"] = np.nanstd(a)
        stats["Median"] = np.nanmedian(a)
        stats["Min"] = np.nanmin(a)
        stats["Max"] = np.nanmax(a)

        m=np.nanmean(a)
        s=np.nanstd(a)
        bins = np.linspace(m-10*s,m+10*s,100)
        ax.hist(a.ravel(),
                bins = bins,
                color='tab:red',
                label='all fits',
               )
        ax.hist(a[sel].ravel(),
                bins = bins,
                color='tab:green',
                label='good fits'
               )
        ax.grid(zorder=0)
        ax.set_xlabel(units)
        ax.set_yscale('log')
        ax.set_title(name)
        ax.legend()
        def statistic(stat, colour, shift):
            textstr = ""
            for key in stat:
                try:
                    textstr += '{0: <6}: {1: 7.2f}\n'.format(key, stat[key])
                except:
                    pass
            props = dict(boxstyle='round', alpha=0.65, color=colour,)
            ax.text(0.05, 0.95-shift, textstr, transform=ax.transAxes, fontsize=10,
                    family='monospace', weight='book', stretch='expanded',
                    verticalalignment='top', bbox=props, zorder=2)
        statistic(stats_cut, 'tab:green', 0)
        statistic(stats, 'tab:red', 0.25)

    fig = plt.figure(figsize=(15,15))
    plot_cuts(pc_high_m,~(bad_fits_high | (mask>0)),'pc_high_m',"ADU/DAC",fig.add_subplot(221))
    plot_cuts(pc_high_b,~(bad_fits_high | (mask>0)),'pc_high_b',"ADU",fig.add_subplot(222))
    plot_cuts(pc_med_m,~(bad_fits_med | (mask>0)),'pc_med_m',"ADU/DAC",fig.add_subplot(223))
    plot_cuts(pc_med_b,~(bad_fits_med | (mask>0)),'pc_med_b',"ADU",fig.add_subplot(224))

    pc_high_m[bad_fits_high] = pc_high_med[bad_fits_high]
    pc_high_b[bad_fits_high] = pc_high_b_med[bad_fits_high]
    pc_med_m[bad_fits_med] = pc_med_med[bad_fits_med]
    pc_med_b[bad_fits_med] = pc_med_b_med[bad_fits_med]

    mask[bad_fits_high] |= BadPixels.CI_EVAL_ERROR.value
    mask[bad_fits_med]  |= BadPixels.CI_EVAL_ERROR.value
 ```

 %% Cell type:code id: tags:

 ``` python
 if local_output:
    ofile = "{}/agipd_pc_store_{}_{}_{}.h5".format(out_folder, "_".join([str(run) for run in runs]), modules[0], modules[-1])
    store_file = h5py.File(ofile, "w")
    for qm, r in fres.items():
        for key, item in r.items():
            store_file["/{}/{}/0/data".format(qm, key)] = item

        store_file["/{}/{}/0/data".format(qm, "BadPixelsPC")] = bad_pixels[qm]
    store_file.close()
 ```

 %% Cell type:markdown id: tags:

 ## Overview Plots ##

 Each of the following plots represents one of the fit parameters of a defined memory cell on a module:

 For the linear function of the high gain region

   $$y = mx + b$$

 * ml denotes the $m$ parameter
 * bl denotes the $b$ parameter
 * devl denotes the absolute relative deviation from linearity.

 For the linear function of the medium gain region

   $$y = mx + b$$

 * mh denotes the $m$ parameter
 * bh denotes the $b$ parameter
 * devh denotes the absolute relative deviation from linearity.

 For the composite function (if selected) of the medium gain and transition region

  $$y = A*e^{-(x-O)/C}+mx+b$$

 * oh denotes the $O$ parameter
 * ch denotes the $C$ parameter
 * mh denotes the $m$ parameter
 * bh denotes the $b$ parameter
 * devh denotes the absolute relative deviation from the linear part of the function.

 Additionally, the thresholds and bad pixels (mask) are shown.

 Finally, the red and white rectangles indicate the first and second pixel ranges.

 %% Cell type:code id: tags:

 ``` python
 def preview_fitted_params(data, module, cell_to_preview, fit_hook):
    plot_text= {"ml": 'HG slope (ml)',
                "bl": 'HG intercept (bl)',
                "devl": 'HG linearity deviation (devl)',
                "tresh": 'Gain threshold (tresh)',
                "tresh_bounds": 'Mean HG signal (tresh_bounds)',
                "dhm": 'HG boundary (dhm)',
                "mh": 'MG slope (mh)',
                "bh": 'MG intercept (bh)',
                "oh": 'Hook - o (oh)',
                "ch": 'Hook - c (ch)',
                "ah": 'Hook - a (ah)',
                "devh": 'MG linearity deviation (devh)'
               }

    mask = bad_pixels[module]
    fig = plt.figure(figsize=(20,20))
    if fit_hook:
        grid = AxesGrid(fig, 111,
                    nrows_ncols=(7, 2),
                    axes_pad=(0.9, 0.55),
                    label_mode="0",
                    share_all=True,
                    cbar_location="right",
                    cbar_mode="each",
                    cbar_size="7%",
                    cbar_pad="2%"
                    )
    else:
        grid = AxesGrid(fig, 111,
                    nrows_ncols=(5, 2),
                    axes_pad=(0.9, 0.55),
                    label_mode="0",
                    share_all=True,
                    cbar_location="right",
                    cbar_mode="each",
                    cbar_size="7%",
                    cbar_pad="2%"
                    )
    i = 0
    for key, citem in data.items():
        item = citem.copy()
        item[~np.isfinite(item)] = 0
        med = np.nanmedian(item)
        bound = 0.1
        maxcnt = 10
        if med < 0:
            bound = -bound

        while(np.count_nonzero((item < med-bound*med) | (item > med+bound*med))/item.size > 0.01):
            bound *= 2
            maxcnt -= 1
            if maxcnt < 0:
                break


        if "bounds" in key:
            d = item[cell_to_preview, ..., 0]
            im = grid[i].imshow(d, interpolation="nearest", origin='lower',
                               vmin=med-bound*med, vmax=med+bound*med)
            grid[i].set_title(plot_text[key], fontsize=14)
        else:
            if fit_hook:
                d = item[cell_to_preview, ...]
                im = grid[i].imshow(d, interpolation="nearest", origin='lower',
                                   vmin=med-bound*med, vmax=med+bound*med)
                grid[i].set_title(plot_text[key], fontsize=14)
            else:
                if key == 'oh' or key == 'ch' or key == 'ah':
                    continue
                else:
                    d = item[cell_to_preview, ...]
                    im = grid[i].imshow(d, interpolation="nearest", origin='lower',
                                   vmin=med-bound*med, vmax=med+bound*med)
                    grid[i].set_title(plot_text[key], fontsize=14)

        cb = grid.cbar_axes[i].colorbar(im)
        # axes coordinates are 0,0 is bottom left and 1,1 is upper right
        x0, x1 = tpix_range1[0][0], tpix_range1[0][1]
        y0, y1 = tpix_range1[1][0], tpix_range1[1][1]
        p = patches.Rectangle((x0, y0), (x1 - x0), (y1 - y0), fill=False, color="red")
        grid[i].add_patch(p)
        x0, x1 = tpix_range2[0][0], tpix_range2[0][1]
        y0, y1 = tpix_range2[1][0], tpix_range2[1][1]
        p = patches.Rectangle((x0, y0), (x1 - x0), (y1 - y0), fill=False, color="white")
        grid[i].add_patch(p)

        i += 1

    im = grid[-1].imshow(mask[cell_to_preview, ...], interpolation="nearest", origin='lower', vmin=0, vmax=1)
    cb = grid.cbar_axes[-1].colorbar(im)
    grid[-1].set_title('Mask', fontsize=14)
 ```

 %% Cell type:code id: tags:

 ``` python
 cell_to_preview=[1,mem_cells-5]

 for module, data in fres.items():
    print(' Memory cell {}:'.format(cell_to_preview[0]))
    preview_fitted_params(data, module, cell_to_preview[0], fit_hook)
 ```

 %% Cell type:code id: tags:

 ``` python
 for module, data in fres.items():
    print('Memory cell {}:'.format(cell_to_preview[1]))
    preview_fitted_params(data, module, cell_to_preview[1], fit_hook)
 ```

 %% Cell type:markdown id: tags:

 ### Memory Cell dependent behavior of thresholding ###

 %% Cell type:code id: tags:

 ``` python
 toplot = {"tresh": "Gain theshold (ADU)",
          "ml": "Slope (HG)",
          "bl": "Offset (HG) (ADU)",
          "mh": "Slope (MG)",
          "bh": "Offset (MG) (ADU)",
          "mltomh": "Ration slope_HG/slope_MG"}

 fig, ax = plt.subplots(3, 2, figsize=(18, 15))
 fig.subplots_adjust(wspace=0.1, hspace=0.15)

 for module, data in fres.items():
    mltomh = data['ml'] / data['mh']
    fres[module].update({'mltomh': mltomh})
    bins = 100

    for counter, (typ, label) in enumerate(toplot.items()):
        r_hist = np.zeros((mem_cells, bins))
        mask = bad_pixels[module]
        thresh = data[typ]
        hrange = [0.5 * np.nanmedian(thresh), 1.5 * np.nanmedian(thresh)]

        if hrange[1] < hrange[0]:
            hrange = hrange[::-1]
        for c in range(mem_cells):
            d = thresh[c, ...]
            h, e = np.histogram(d.flatten(), bins=bins, range=hrange)
            r_hist[c, :] = h

        if counter < 3:
            im = ax[counter, 0].imshow(r_hist[:, :].T[::-1, :], interpolation="nearest",
                      aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(r_hist)),
                     extent=[0, mem_cells, hrange[0], hrange[1]])
            ax[counter,0].set_xlabel("Memory cell", fontsize=12)
            ax[counter,0].set_ylabel(label, fontsize=12)
            cb = fig.colorbar(im, ax=ax[counter, 0], pad=0.01)
        else:
            im = ax[counter-3, 1].imshow(r_hist[:, :].T[::-1, :], interpolation="nearest",
                      aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(r_hist)),
                     extent=[0, mem_cells, hrange[0], hrange[1]])
            ax[counter-3, 1].set_xlabel("Memory cell", fontsize=12)
            ax[counter-3, 1].set_ylabel(label, fontsize=12)
            cb = fig.colorbar(im, ax=ax[counter-3, 1], pad=0.01)
        cb.set_label("Counts", fontsize=12)
 ```

 %% Cell type:markdown id: tags:

 ## Global Bad Pixel Behaviour ##

 The following plots show the results of bad pixel evaluation for all evaluated memory cells. Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2. This excludes single bad pixels present only in disconnected pixels. Hence, any bad pixels spanning at least 2 pixels in the x/y-plane, or across at least two memory cells are indicated. Colors encode the bad pixel type, or mixed type.

 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.CI_GAIN_OF_OF_THRESHOLD.value: (BadPixels.CI_GAIN_OF_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.CI_EVAL_ERROR.value: (BadPixels.CI_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.CI_GAIN_OF_OF_THRESHOLD.value | BadPixels.OFFSET_OUT_OF_THRESHOLD.value: ('MIXED', '#DD00DD80')}

 rebin = 2 if not high_res_badpix_3d else 1

 gain = 0
 for mod, data in bad_pixels.items():
    plot_badpix_3d(np.moveaxis(data, 0, 2), cols, title=mod, rebin_fac=rebin, azim=60.)
 ```

 %% Cell type:code id: tags:

 ``` python
 one_photon = 73 # ADU (10 keV photon)
 test_pixels = []
 tpix_range1 = [(0,8), (0,8)]
 for i in range(*tpix_range1[0]):
    for j in range(*tpix_range1[1]):
        test_pixels.append((j,i))
 test_cells = [4, 38, 64, 128, 200, 249]
 tcell = np.array(test_cells)
 tcell = tcell[tcell < mem_cells]
 if tcell.size == 0:
    test_cells = [mem_cells-1]
 else:
    test_cells = tcell.tolist()
 from mpl_toolkits.axes_grid1 import ImageGrid
 for mod, r in zip(modules, res):
    dig, ana, cellId = r
    d = []
    d2 = []
    d3 = []

    H = [0, 0, 0, 0]
    H2 = [0, 0, 0, 0]
    Ha = [0, 0, 0, 0]
    qm = module_index_to_qm(mod)
    cdata = fres[qm]
    ex, ey, ea = None, None, None
    medml = np.nanmean(cdata['ml'])
    medmh = np.nanmean(cdata['mh'][cdata['mh']> 0.5])
    offset = offsets[mod]
    threshold = thresholds[mod]

    medth = np.nanmean(threshold[..., 0])
    for pix in test_pixels:
        for cell in test_cells:
            color = np.random.rand(3, 1)

            x = np.arange(dig.shape[0])
            y = dig[:, cell, pix[0], pix[1]]
            a = ana[:, cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]
            a = a[vidx]

            ms, labels, centers = calc_m_cluster2(x, y)
            bound = None
            bound_m = None
            markers = ['o','.','x','v']
            colors = ['b', 'r', 'g', 'k']
            ymin = y.min()
            amin = a[labels[2]].min()

            for i, lbl in enumerate(labels):

                if np.any(lbl):
                    if i == 0:
                        cm = (cdata['ml'][cell, pix[0], pix[1]]/medml)
                        o = offset[pix[0], pix[1], cell, 0]
                        ym = (y[lbl]-o)/cm

                    elif i >= 1:
                        mh = cdata['mh'][cell, pix[0], pix[1]]
                        ml = cdata['ml'][cell, pix[0], pix[1]]
                        cml = ml / medml
                        cmh = mh / medmh
                        cm = medml / medmh
                        oh = cdata['bh'][cell, pix[0], pix[1]]
                        o = offset[pix[0], pix[1], cell, 1] + oh
                        ym = (y[lbl]-o) / cmh * cm

                        if i == 1:
                            ah = cdata['ah'][cell, pix[0], pix[1]]
                            ch = cdata['ch'][cell, pix[0], pix[1]]
                            ohh = cdata['oh'][cell, pix[0], pix[1]]
                            tx = ch * np.log(ah/(y[lbl]-o))+ohh

                            chook  = (ah*np.exp(-(tx-ohh)/ch) - mh*tx)/cmh*cm

                            ym -= chook

                    h, ex, ey = np.histogram2d(x[lbl], ym/one_photon, range=((0, 600), (0, 15000/one_photon)), bins=(300, 600))
                    H[i] += h

            labels = [a < threshold[pix[0], pix[1], cell,0], a >= threshold[pix[0], pix[1], cell,0]]
            for i, lbl in enumerate(labels):

                if np.any(lbl):
                    if i == 0:
                        cm = (cdata['ml'][cell, pix[0], pix[1]]/medml)
                        o = offset[pix[0], pix[1], cell, 0]
                        ym = (y[lbl]-o) / cm

                    elif i >= 1:
                        mh = cdata['mh'][cell, pix[0], pix[1]]
                        ml = cdata['ml'][cell, pix[0], pix[1]]
                        cml = ml/medml
                        cmh = mh/medmh
                        cm = medml/medmh
                        oh = cdata['bh'][cell, pix[0], pix[1]]
                        o = offset[pix[0], pix[1], cell, 1] + oh
                        ym = (y[lbl]-o) / cmh * cm

                        if i == 1:
                            ah = cdata['ah'][cell, pix[0], pix[1]]
                            ch = cdata['ch'][cell, pix[0], pix[1]]
                            ohh = cdata['oh'][cell, pix[0], pix[1]]
                            tx = ch*np.log(ah/(y[lbl]-o)) + ohh
                            chook  = (ah*np.exp(-(tx-ohh)/ch) - mh*tx)/cmh*cm
                            idx = (a[lbl]-amin) < 0
                            ym[idx] -= chook[idx]

                    h, ex, ey = np.histogram2d(x[lbl], ym/one_photon, range=((0, 600), (0, 15000/one_photon)), bins=(300, 600))
                    H2[i] += h

            labels = [a < threshold[pix[0], pix[1], cell, 0], a >= threshold[pix[0], pix[1], cell, 0]]
            for i, lbl in enumerate(labels):

                if np.any(lbl):
                    am = a[lbl] - amin
                    h, ex, ea = np.histogram2d(x[lbl], am, range=((0, 600), (-100, 5000)), bins=(300, 400))
                    Ha[i] += h

    fig = plt.figure(figsize=(10,15))
    ax = fig.add_subplot(311)
    for i in range(3):
        H[i][H[i]==0] = np.nan
    ax.imshow(H[0].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    ax.imshow(H[1].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='spring', alpha=0.7, vmin=0, vmax=100)
    ax.imshow(H[2].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD response (# photon)")
    ax.set_xlabel("PC scan point (#)")

    x = np.arange(0, 600)
    ideal = medml * x / one_photon
    ax.plot(x, ideal, color='red')
    ax.plot(x, ideal + np.sqrt(ideal), color='red')
    ax.plot(x, ideal - np.sqrt(ideal), color='red')
    ax.grid(lw=1.5)

    ax = fig.add_subplot(312)
    for i in range(2):
        H2[i][H2[i]==0] = np.nan
    ax.imshow(H2[0].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    ax.imshow(H2[1].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD response (# photon)")
    ax.set_xlabel("PC scan point (#)")

    x = np.arange(0, 600)
    ideal = medml * x / one_photon
    ax.plot(x, ideal, color='red')
    ax.plot(x, ideal + np.sqrt(ideal), color='red')
    ax.plot(x, ideal - np.sqrt(ideal), color='red')
    ax.grid(lw=1.5)

    ax = fig.add_subplot(313)
    for i in range(2):
        Ha[i][Ha[i]==0] = np.nan
    ax.imshow(Ha[0].T, origin="lower", extent=[ex[0], ex[-1], ea[0], ea[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    ax.imshow(Ha[1].T, origin="lower", extent=[ex[0], ex[-1], ea[0], ea[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD gain (ADU)")
    ax.set_xlabel("PC scan point (#)")
    ax.grid(lw=1.5)
 ```

 %% Cell type:markdown id: tags:

 # Characterize AGIPD Pulse Capacitor Data #

 Author: Detector Operations Group, Version 1.0

 The following code characterizes AGIPD gain via data take with the pulse capacitor source (PCS). The PCS allows scanning through the high and medium gains of AGIPD, by subsequently intecreasing the number of charge pulses from a on-ASIC capicitor, thus increasing the charge a pixel sees in a given integration time.

 Because induced charge does not originate from X-rays on the sensor, the gains evaluated here will later need to be rescaled with gains deduced from X-ray data.

 PCS data is organized into multiple runs, as the on-ASIC current source cannot supply all pixels of a given module with charge at the same time. Hence, only certain pixel rows will have seen charge for a given image. These rows then first need to be combined into single module images again. This script uses new style of merging, which does not support old data format.

 We then use a K-means clustering algorithm to identify components in the resulting per-pixel data series, matching to three general regions:

 * a high gain slope
 * a transition region, where gain switching occurs
 * a medium gain slope.

 The same regions are present in the gain-bit data and are used to deduce the switching threshold.

 The resulting slopes are then fitted with a linear function and a combination of a linear and exponential decay function to determine the relative gains of the pixels with respect to the module. Additionally, we deduce masks for bad pixels form the data.

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB" # The ipcluster profile to use
-in_folder = '/gpfs/exfel/exp/SPB/202130/p900188/raw/' # path to input data, required
-out_folder = "/gpfs/exfel/data/scratch/jsztuk/test/pc" # path to output to, required
+in_folder = '/gpfs/exfel/exp/SPB/202331/p900376/raw' # path to input data, required
+out_folder = "/gpfs/exfel/exp/HED/202430/p900438/scratch/rovensky/pc" # path to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
-runs = [92, 93, 94, 95, 96, 97, 98, 99] # runs to use, required, range allowed
+runs = [61, 62, 63, 64, 65, 66, 67, 68] # runs to use, required, range allowed

 modules = [-1] # modules to work on, required, range allowed
 karabo_da = ["all"]
 karabo_da_control = "AGIPD1MCTRL00" # karabo DA for control infromation
 karabo_id_control = "SPB_IRU_AGIPD1M1"  # karabo-id for the control device e.g. "MID_EXP_AGIPD1M1", or "SPB_IRU_AGIPD1M1"
 karabo_id = "SPB_DET_AGIPD1M-1"
 ctrl_source_template = '{}/MDL/FPGA_COMP' # path to control information
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 receiver_template = "{}CH0" # inset for receiver devices

 creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
 cal_db_timeout = 3000000 # timeout on caldb requests"
 cal_db_interface = "tcp://max-exfl-cal001:8019"  # the database interface to use

 local_output = True # output constants locally
 db_output = False # output constants to database

 bias_voltage = -1  # detector bias voltage, negative values for auto-detection.
 mem_cells = -1  # number of memory cells used, negative values for auto-detection.
 acq_rate = -1.  # the detector acquisition rate, negative values for auto-detection.
 gain_setting = -1  # gain setting can have value 0 or 1, negative values for auto-detection.
 integration_time = -1  # integration time, negative values for auto-detection.
 FF_gain = 7.3 # ADU/keV, gain from FF to convert ADU to keV

 fit_hook = False # fit a hook function to medium gain slope --> run without hook
 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h
 save_plots = False # set to True if you desire saving plots to output folder

-hg_range = [30,210]#[0,-150] # range for linear fit. If upper edge is negative use clustering result (bound_hg - 20)
-mg_range = [-277,600]#[-350,600] # range for linear fit. If lower edge is negative use clustering result (bound_mg + 20)
+hg_range = [30,170]#[0,-150] # range for linear fit. If upper edge is negative use clustering result (bound_hg - 20)
+mg_range = [240,590]#[-350,600] # range for linear fit. If lower edge is negative use clustering result (bound_mg + 20)
 sigma_dev_cut = 5.0 # parameters outside the range median +- sigma_dev_cut*MAD are replaced with the median
 ```

 %% Cell type:code id: tags:

 ``` python
 # imports, usually no need to change anything here

 import os
 import warnings
 from datetime import datetime, timedelta
 from functools import partial
 from dateutil import parser

 warnings.filterwarnings('ignore')

 import h5py
 import matplotlib
 import numpy as np
 from ipyparallel import Client

 import matplotlib.pyplot as plt
 from matplotlib import gridspec
 from matplotlib.colors import LogNorm
 import matplotlib.patches as patches
 from mpl_toolkits.axes_grid1 import AxesGrid

 %matplotlib inline

 import XFELDetAna.xfelpyanatools as xana
 from extra_data import RunDirectory

 from cal_tools.agipdlib import AgipdCtrl
 from cal_tools.enums import BadPixels
 from cal_tools.plotting import plot_badpix_3d
 from cal_tools.tools import (
    calcat_creation_time,
    get_constant_from_db_and_time,
    get_dir_creation_date,
    module_index_to_qm,
 )
 from iCalibrationDB import Conditions, Constants

 # make sure a cluster is running with ipcluster start --n=32, give it a while to start
 view = Client(profile=cluster_profile)[:]
 view.use_dill()
 ```

 %% Cell type:code id: tags:

 ``` python
-cells = mem_cells
-path_temp = in_folder+"/r{:04d}/"
-image_name_temp = 'RAW-R{:04d}-AGIPD{:02d}-S{:05d}.h5'
-print("Parameters are:")
-if mem_cells < 0:
-    print("Memory cells: auto-detection on")
-else:
-    print("Memory cells set by user: {}".format(mem_cells))
-print("Runs: {}".format(runs))
-print("Modules: {}".format(modules))
+# Get operation conditions
+ctrl_source = ctrl_source_template.format(karabo_id_control)
+run_folder = f'{in_folder}/r{runs[0]:04d}/'
+raw_dc = RunDirectory(run_folder)
+instrument_src_mod = list(raw_dc.detector_sources)[0]

-instrument = karabo_id.split("_")[0]
-
-
-if instrument == "HED":
-    nmods = 8
-else:
-    nmods = 16
-
-print(f"Detector in use is {karabo_id}")
+agipd_cond = AgipdCtrl(
+    run_dc=RunDirectory(f'{in_folder}/r{runs[0]:04d}'),
+    image_src=instrument_src_mod,
+    ctrl_src=ctrl_source,
+    raise_error=False,  # to be able to process very old data without gain_setting value
+)
+```

+%% Cell type:code id: tags:

+``` python
 if karabo_da == ["all"]:
    if modules[0] == -1:
-        modules = list(range(nmods))
+        strings = [s.split("/")[-1] for s in list(raw_dc.detector_sources)]
+        modules = [int(s[:s.index("C")]) for s in strings]
+        modules.sort()
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
-```
-
-%% Cell type:code id: tags:
-
-``` python
-first_run = runs[0]
-channel = 0
-ctrl_src = ctrl_source_template.format(karabo_id_control)
-instrument_src = instrument_source_template.format(karabo_id, receiver_template).format(channel)

-agipd_cond = AgipdCtrl(
-    run_dc=RunDirectory(f'{in_folder}/r{first_run:04d}'),
-    image_src=instrument_src,
-    ctrl_src=ctrl_src,
-    raise_error=False,  # to be able to process very old data without gain_setting value
-)
+print("Runs: {}".format(runs))
+print("Modules: {}".format(modules))
+print(f"Detector in use is {karabo_id}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate creation time
 creation_time = calcat_creation_time(in_folder, runs[0], creation_time)
 offset = parser.parse(creation_date_offset)
 delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
 creation_time += delta
 print(f"Creation time: {creation_time}\n")

 # Read AGIPD parameter conditions.
 if acq_rate < 0.:
    acq_rate = agipd_cond.get_acq_rate()
 if mem_cells < 0:
    mem_cells = agipd_cond.get_num_cells()
 # TODO: look for alternative for passing creation_time
 if gain_setting < 0:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage < 0:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
 if integration_time < 0:
    integration_time = agipd_cond.get_integration_time()

 print(f"Acquisition rate: {acq_rate} MHz")
 print(f"Memory cells: {mem_cells}")
 print(f"Gain setting: {gain_setting}")
 print(f"Integration time: {integration_time}")
 print(f"Using {creation_time} as creation time of constant.")
 ```

 %% Cell type:markdown id: tags:

 ## Read in data and merge ##

 The new merging cannot handle the old data formats before Dec. 2017.

 %% Cell type:code id: tags:

 ``` python
 def merge_data(runs, in_folder, karabo_id, channel):

    in_folder = in_folder+"/r{:04d}/"
    def count_min_bursts(in_folder, runs, channel):

        bursts = np.zeros(len(runs))
        for i, run in enumerate(runs):
            run_folder_path = in_folder.format(run)
            r = RunDirectory(run_folder_path)
            instrument_source = karabo_id+'/DET/{}CH0:xtdf'.format(channel)
            c = r.get_array(instrument_source, 'image.length')
            total_frame = np.count_nonzero(c)
            cells = r.detector_info(instrument_source)['frames_per_train']
            bursts[i] = total_frame // cells
        bursts_total = np.min(bursts).astype(int)

        return bursts_total, cells

    bursts_total, cells = count_min_bursts(in_folder, runs, channel)

    pc_data = {'analog': np.zeros((bursts_total, cells, 128, 512)),
               'digital': np.zeros((bursts_total, cells, 128, 512)),
               'cellID': np.zeros(((bursts_total) * cells))
              }

    for counter, run in enumerate(runs):

        run_folder_path = in_folder.format(run)
        print('Run: ', run_folder_path)
        r = RunDirectory(run_folder_path)
        instrument_source = karabo_id+'/DET/{}CH0:xtdf'.format(channel)
        print('Module: ', instrument_source)

        d = r.get_array(instrument_source, 'image.data')
        d = d.values.reshape((d.shape[0] // cells, cells, 2, 512, 128))
        d = np.moveaxis(d, 4, 3)
        c = r.get_array(instrument_source, 'image.cellId')
        print('Bursts: {}, Mem cells: {}\n'.format(c.shape[0] // cells, cells))

        for i in range(7 - counter, 64, 8): # first run starts in row 7 and then decreases in each run by 1
            pc_data['analog'][..., i, :] = d[:bursts_total, :, 0, i, :]
            pc_data['digital'][..., i, :] = d[:bursts_total, :, 1, i, :]
        for i in range(64 + counter, 128, 8): # separated module to 2 halves to catch the injection pattern correctly
            pc_data['analog'][..., i, :] = d[:bursts_total, :, 0, i, :]
            pc_data['digital'][..., i, :] = d[:bursts_total, :, 1, i, :]
        if counter == 0:
            pc_data['cellID'][...] = c[:bursts_total * cells, 0]

        del d
        del c

    return pc_data['analog'], pc_data['digital'], pc_data['cellID']

 ```

 %% Cell type:code id: tags:

 ``` python
 start = datetime.now()
 p = partial(merge_data, runs, in_folder, karabo_id)
 # chunk this a bit, so that we don't overuse available memory
 res = list(map(p, modules))
 end = datetime.now() - start
 print('Duration of merging: ', end)
 ```

 %% Cell type:markdown id: tags:

 ## Plotting of merged PC signal

 %% Cell type:markdown id: tags:

 The merged image of the injected pulse capacitor signal is plotted here for selected trains and a memory cell.
 The left side shows images of analog signal, while the right side shows digital signal image. Rows visualize the signal in a given train.

 %% Cell type:code id: tags:

 ``` python
 def PC_image_plotting(res, burst_of_interest, cell_of_interest):

    fig, axs = plt.subplots(len(burst_of_interest), 2, figsize=(10, 9))
    fig.subplots_adjust(left=0.02, bottom=0.06, right=0.95, top=0.94, wspace=0.25)

    yticks_major=np.arange(0,129,64)
    xticks_major=np.arange(0,512,64)

    for img_type in range(0,2):
        if img_type == 0:
            title = 'PC signal (ADU)'
            img_title = 'Analog'
            scale_min = 5000
            scale_max = 11000
        else:
            title = 'PC gain signal (ADU)'
            img_title = 'Digital'
            scale_min = 4500
            scale_max = 8500

        for x, b, in enumerate(burst_of_interest):
            im1 = axs[x, img_type].imshow(res[0][img_type][b, cell_of_interest, ...], interpolation='nearest',
                                   cmap='jet', origin='lower',vmin=scale_min, vmax=scale_max, aspect='equal')
            cbar = fig.colorbar(im1, ax=axs[x, img_type], fraction=0.1, pad=0.2, orientation='horizontal')
            cbar.set_label(title)
            axs[x, img_type].set_title("{}, Burst: {}, Cell: {}".format(img_title, b, cell_of_interest))
            axs[x,img_type].set_xticks(xticks_major)
            axs[x,img_type].xaxis.grid(True, which='major', linewidth=0.5, color='grey')
            axs[x,img_type].set_yticks(yticks_major)
            axs[x,img_type].yaxis.grid(True, which='major', linewidth=0.5, color='grey')
            axs[x,img_type].set_xlabel('Columns')
            axs[x,img_type].set_ylabel('Rows')
    plt.show()
    return fig
 ```

 %% Cell type:markdown id: tags:

 ### PC signal of a selected cell and bursts

 %% Cell type:code id: tags:

 ``` python
 burst_of_interest = [100, 200, 550]
 cell_of_interest = 4
 for module in modules:
    fig = PC_image_plotting(res, burst_of_interest, cell_of_interest)
 ```

 %% Cell type:markdown id: tags:

 ## Slope clustering and Fitting ##

 The following two cells contain the actual algorithm logic as well as a preview of a single pixel and memory cells visualizing the data and the concepts.

 We start out with calculating an estimate of the slope in proximity of a given data value. This is done by calculating the slopes of a given value with 15 neighbours and averaging the result. Values are then clustered by these slopes into three regions via a K-means algorithm.

 * for the first region a linear function is fitted to the data, determining the gain slope and offset for the high gain mode.

   $$y = mx + b$$

 * for the second and third region a composite function (so-called hook function) of the form:

  $$y = A*e^{-(x-O)/C}+mx+b$$

  is fitted, covering both the transition region and the medium gain slope.

 If variable {fit_hook} is set to False (default) only the medium gain region is fitted with linear function and the transition region is omitted in the fitting procedure.

 Clustering results are not used if {hg_range} or {mg_range} are defined as positive integers, thus setting borders for the high gain region (hg_range) or medium gain region (mg_region). Data inside each region are fitted with linear function. Hook function fit to the data is not allowed in this case.

 %% Cell type:code id: tags:

 ``` python
 from iminuit import Minuit
 from iminuit.util import describe, make_func_code

 def rolling_window(a, window):
    shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
    strides = a.strides + (a.strides[-1],)
    return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)


 def calc_m_cluster2(x, y, r1=5, r2=0, r3=1.5):
    scan_range = 15
    ms = np.zeros((x.shape[0], scan_range))
    for i in range(scan_range):
        xdiffs = x - np.roll(x, i+1)
        ydiffs = y - np.roll(y, i+1)
        m = ydiffs/xdiffs
        ms[:,i] = m
    m = np.mean(ms, axis=1)
    m[scan_range//2:-scan_range//2+1] = np.mean(rolling_window(m, scan_range),-1)
    reg1 = m > r1
    reg2 = m < r2
    reg3 = (m > r2) & (m < r3)
    reg4 = ~(reg1 | reg2 | reg3)
    labels = [reg1, reg2, reg3, reg4]
    regions = np.zeros_like(x, np.uint8)
    for r, lbl in enumerate(labels):
        regions[lbl] = r
    scan_range = 30
    mregions = np.round(np.mean(rolling_window(regions, scan_range),-1))
    # change from np.nan to -1
    regions[...] = -1
    regions[scan_range//2:-scan_range//2+1] = mregions


    labels = [regions == 0, regions == 1, regions == 2, regions == 3]

    idx = np.arange(x.size)
    maxlbl = x.size-1
    for i in range(0, len(labels)-1):
        nidx = labels[i+1]
        if np.any(nidx):
            maxlbl = np.max(idx[nidx])
            cidx = idx > maxlbl
            if np.any(cidx):
                labels[i][cidx] = False

    ms = []
    for lbl in labels:
        xl = x[lbl]
        xd = np.reshape(xl, (len(xl), 1))
        xdiff = xd - xd.transpose()

        yl = y[lbl]
        yd = np.reshape(yl, (len(yl), 1))
        ydiff = yd - yd.transpose()
        ms.append(np.mean(np.nanmean(ydiff/xdiff, axis=0)))

    return ms, labels, None

 def fit_data(fun, x, y, yerr, par_ests):
    par_ests["throw_nan"] = False
    par_ests["pedantic"] = False
    par_ests["print_level"] = 0

    f_sig = describe(fun)[1:]

    class _Chi2Functor:
        def __init__(self, f, x, y, err):
            self.f = f
            self.x = x[y != 0]
            self.y = y[y != 0]
            self.err = err[y != 0]
            f_sig = describe(f)
            # this is how you fake function
            # signature dynamically
            self.func_code = make_func_code(
                f_sig[1:])  # docking off independent variable
            self.func_defaults = None  # this keeps numpy.vectorize happy

        def __call__(self, *arg):
            # notice that it accept variable length
            # positional arguments
            # chi2 = sum((y-self.f(x,*arg))**2 for x,y in zip(self.x,self.y))
            return np.sum(((self.f(self.x, *arg) - self.y) ** 2) / self.err)

    wrapped = _Chi2Functor(fun, x, y, yerr)
    m = Minuit(wrapped, **par_ests)
    fmin = m.migrad()

    return m.values

 def lin_fun(x, m, b):
    return m*x+b

 def hook_fun(x, a, c, o, m, b):
    return a*np.exp(-(x-o)/c)+m*x+b
 ```

 %% Cell type:code id: tags:

 ``` python
 from cal_tools.tools import get_constant_from_db_and_time

 offsets = {}
 noises = {}
 thresholds = {}
 for k_da, mod in zip(karabo_da, modules):

    offset, when = get_constant_from_db_and_time(karabo_id, k_da,
                                                 Constants.AGIPD.Offset(),
                                                 Conditions.Dark.AGIPD(
                                                     memory_cells=mem_cells,
                                                     bias_voltage=bias_voltage,
                                                     acquisition_rate=acq_rate,
                                                     gain_setting=gain_setting,
                                                     integration_time=integration_time),
                                                 np.zeros((128, 512, mem_cells, 3)),
                                                 cal_db_interface,
                                                 creation_time=creation_time)
    offsets[mod] = np.array(offset.data)

    noise, when = get_constant_from_db_and_time(karabo_id, k_da,
                                                Constants.AGIPD.Noise(),
                                                Conditions.Dark.AGIPD(
                                                    memory_cells=mem_cells,
                                                    bias_voltage=bias_voltage,
                                                    acquisition_rate=acq_rate,
                                                    gain_setting=gain_setting,
                                                    integration_time=integration_time),
                                                np.zeros((128, 512, mem_cells, 3)),
                                                cal_db_interface, creation_time=creation_time)
    noises[mod] = np.array(noise.data)

    threshold, when = get_constant_from_db_and_time(karabo_id, k_da,
                                                    Constants.AGIPD.ThresholdsDark(),
                                                    Conditions.Dark.AGIPD(
                                                        memory_cells=mem_cells,
                                                        bias_voltage=bias_voltage,
                                                        acquisition_rate=acq_rate,
                                                        gain_setting=gain_setting,
                                                        integration_time=integration_time),
                                                    np.zeros((128, 512, mem_cells, 3)),
                                                    cal_db_interface, creation_time=creation_time)
    thresholds[mod] = np.array(threshold.data)
 ```

 %% Cell type:code id: tags:

 ``` python
 test_pixels = []
 tpix_range1 = [(0,16), (0,64)]
 for i in range(*tpix_range1[0]):
    for j in range(*tpix_range1[1]):
        test_pixels.append((j,i))
 test_cells = [4, 38, 64, 128]#, 200, 249]
 tcell = np.array(test_cells)
 tcell = tcell[tcell < mem_cells]
 if tcell.size == 0:
    test_cells = [mem_cells-1]
 else:
    test_cells = tcell.tolist()

 from mpl_toolkits.axes_grid1 import ImageGrid
 for mod, r in zip(modules, res):
    ana, dig, cellId = r
    d = []
    d2 = []
    d3 = []

    H = [0, 0, 0, 0]

    ex, ey = None, None
    offset = offsets[mod]
    for pix in test_pixels:
        for cell in test_cells:
            color = np.random.rand(3,1)


            x = np.arange(ana.shape[0])
            y = ana[:,cell, pix[0], pix[1]]
            yd = dig[:,cell, pix[0], pix[1]]
            thresh_trans = thresholds[mod][pix[0], pix[1], cell,0]


            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]
            yd = yd[vidx]
            if x.shape[0] == 0:
                continue

            ms, labels, centers = calc_m_cluster2(x, y)
            bound = None
            bound_m = None
            markers = ['o','.','x','v']
            colors = ['b', 'r', 'g', 'k']
            ymin = y.min()

            #### Transition region gain is set based on dark thresholds. ####
            msk = yd[labels[1]] < thresh_trans
            label_filter = np.where(labels[1])[0]
            labels[1][label_filter[msk]] = False
            labels[0][label_filter[msk]] = True
            ################################################################

            for i, lbl in enumerate(labels):
                if np.any(lbl):
                    if i == 0:
                        gain = 0
                    else:
                        gain = 1

                    ym = y[lbl] - offset[pix[0], pix[1], cell, gain]
                    h, ex, ey = np.histogram2d(x[lbl], ym, range=((0, 600), (-500, 6000)), bins=(300, 650))
                    H[i] += h

    fig = plt.figure(figsize=(10,7))
    ax = fig.add_subplot(111)
    ax.grid(lw=1.5)
    for i in range(3):
        H[i][H[i]==0] = np.nan
    ax.imshow(H[0].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    ax.imshow(H[1].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='spring', alpha=0.7, vmin=0, vmax=100)
    ax.imshow(H[2].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD response (ADU)", fontsize=12)
    ax.set_xlabel("PC scan point (#)", fontsize=12)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.show()
 ```

 %% Cell type:markdown id: tags:

 ### Examples from Pixel Subset ###

 The follwing is an visualization of the clustering and fitting for a subset of pixels. If data significantly mismatches expectations, the clustering and fitting algorithms should fail for this subset:

 * the first plot shows the clustering results for pixels which were sucessfully evaluated
 * the second plot shows the clustering results for pixels which failed to evaluate--> change this
 * the third plot shows the fits and fit residuals for the pixel clusters shown in the first plot

 Non-smooth behaviour is an indication that you are errorously processing interleaved data that is not, or vice versa, or have the wrong number of memory cells set.

 We do this twice for different detector regions

 %% Cell type:code id: tags:

 ``` python
 def find_params(x, y, yd, thresh_trans):

    ms, labels, centers = calc_m_cluster2(x, y)
    bound = None
    bound_m = None

    if labels[1].any():
        bound = np.min(x[labels[1]])
        bound_m = ms[1]
        threshold = (np.mean(yd[labels[0]])+np.mean(yd[labels[2]])) / 2

        #### Transition region gain is set based on dark thresholds. ####
        msk = yd[labels[1]] < thresh_trans
        label_filter = np.where(labels[1])[0]
        labels[1][label_filter[msk]] = False
        labels[0][label_filter[msk]] = True
        try:
            if np.max(x[labels[0]] > 220): # step value above which dark threholds are not considered
                labels[0][label_filter[msk]] = False
        except ValueError:
            print('Could not determine maximum value.')
            pass
        ###############################################################

    if bound is None or bound < 20: #and False:
        ya = dig[:,cell, pix[0], pix[1]][vidx]
        msa, labels, centers = calc_m_cluster2(x, ya, 25, -10, 25)
        if np.count_nonzero(labels[0]) > 0:
            bound = np.min(x[labels[0]])
            bound_m = ms[3]
        else:
            bound = 0
            bound_m = 0

    if hg_range[1] > 0 :
        hg_bound_high = hg_range[1]
        hg_bound_low = hg_range[0]
    else :
        hg_bound_high = bound - 20
        hg_bound_low = 0

    try:
        if fit_hook:
            mg_bound_high = len(x)
            try:
                mg_bound_low = np.max(x[labels[1]]) + 20
            except ValueError:  #raised if `y` is empty.
                    mg_bound_low = bound + 100
        else :
            if mg_range[0] > 0 :
                mg_bound_low = mg_range[0]
                mg_bound_high = mg_range[1]

            else :
                mg_bound_high = len(x)
                try:
                    mg_bound_low = np.max(x[labels[1]]) + 20
                except ValueError:  #raised if `y` is empty.
                    mg_bound_low = bound + 100

    except Exception as e:
        if "zero-size array" in str(e):
            pass
        else:
            print(e)

    return hg_bound_low, hg_bound_high, mg_bound_low, mg_bound_high, bound, bound_m, labels, threshold
 ```

 %% Cell type:code id: tags:

 ``` python
 def evaluate_fitting_ROI(modules, res, tpix_range, test_cells, roi):
    markers = ['o', '.', 'x', 'v']
    colors = ['tab:blue', 'tab:red', 'tab:green', 'k']
    test_pixels = []

    fig1, ax1 = plt.subplots(figsize=(9, 5))
    ax1.grid(zorder=0, lw=1.5)
    ax1.set_ylabel("PC pixel signal (ADU)", fontsize=11)
    ax1.set_xlabel('step #', fontsize=11)

    fig2, ax2 = plt.subplots(figsize=(9, 5))
    ax2.grid(zorder=0, lw=1.5)
    ax2.set_ylabel("PC gain signal (ADU)", fontsize=11)
    ax2.set_xlabel('step #', fontsize=11)

    fig3 = plt.figure(figsize=(9, 5))
    gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1])
    ax3 = plt.subplot(gs[0])
    ax4 = plt.subplot(gs[1])
    ax3.grid(zorder=0, lw=1.5)
    ax3.set_ylabel('PC signal (keV)', fontsize=11)
    ax4.set_ylabel('Relative\ndeviation', fontsize=11)
    ax4.set_xlabel('step #', fontsize=11)

    for i in range(*tpix_range[0]):
        for j in range(*tpix_range[1]):
            test_pixels.append((j,i))

    tcell = np.array(test_cells)
    tcell = tcell[tcell < mem_cells]
    if tcell.size == 0:
        test_cells = [mem_cells-1]
    else:
        test_cells = tcell.tolist()

    for mod, r in zip(modules, res):
        ana, dig, cellId = r
        d = []
        d2 = []
        d3 = []
        offset = offsets[mod]
        noise = noises[mod]
        for pix in test_pixels:
            for cell in test_cells:
                color = np.random.rand(3, 1)
                x = np.arange(ana.shape[0])
                y = ana[:,cell, pix[0], pix[1]]
                yd = dig[:,cell, pix[0], pix[1]]
                thresh_trans = thresholds[mod][pix[0], pix[1], cell,0]

                vidx = (y > 1000) & np.isfinite(y) & np.isfinite(x)
                x = x[vidx]
                y = y[vidx]
                yd = yd[vidx]

                hg_bound_low, hg_bound_high, mg_bound_low, mg_bound_high, bound, bound_m, labels, threshold = find_params(x, y, yd, thresh_trans)


                for i, lbl in enumerate(labels):
                    if i == 0:
                        gain = 0

                    else:
                        gain = 1

                    ax1.plot(x[lbl], (y[lbl] - offset[pix[0], pix[1], cell, gain]), ls='None',
                             marker=markers[i], color=colors[i], alpha=0.3)
                    ax2.plot(x[lbl], yd[lbl], ls='None', marker=markers[i], color=colors[i], alpha=0.3)
                    ax2.plot(np.array([x[0], x[-1]]), np.ones(2) * threshold, lw=1, color='k', alpha=0.3)

                # fit linear slope
                idx = (x > hg_bound_low) & (x < hg_bound_high)
                xl = x[idx]
                yl = y[idx] - y[0]

                if yl.shape[0] != 0:
                    parms = {'m': bound_m, 'b': np.min(yl)}
                    parms["limit_m"] = [0., 50.]
                    errors = np.ones(xl.shape) * noise[pix[0], pix[1], cell, 0]
                    fitted = fit_data(lin_fun, xl, yl, errors , parms)
                    hg_slope = fitted['m']
                    pc_high_l = fitted['b']

                    yf = lin_fun(xl, hg_slope, pc_high_l)

                    ax3.plot(xl, (yl / FF_gain), color='tab:blue', ls='None',alpha=0.5, marker='o')
                    ax3.plot(xl, (yf / FF_gain), color='navy', lw=1, zorder=9)
                    ax4.plot(xl, ((yl-yf) / yl), lw=1, color='navy')

                # fit hook slope
                try:
                    if fit_hook:
                        idx = (x >= mg_bound_low) & (x < mg_bound_high)
                        xh = x[idx]
                        yh = y[idx] - offset[pix[0], pix[1], cell, 1]

                        parms = {'m': bound_m/10 if bound_m/10>0.3 else 0.5,
                                 'b': np.min(yh[yh > -2000]),
                                 'a': np.max(yh), 'c': 5, 'o': bound-1}
                        parms["limit_m"] = [0., 2.0]
                        parms["limit_c"] = [1., 1000]

                        errors = np.ones(xh.shape) * noise[pix[0], pix[1], cell, 1]
                        fitted = fit_data(hook_fun, xh, yh, errors, parms)
                        mg_slope = fitted['m']
                        pc_med_l = fitted['b']

                        slope_ratio = hg_slope / mg_slope
                        md_additional_offset = pc_high_l - pc_med_l * slope_ratio

                        yf = lin_fun(xh, mg_slope * slope_ratio, pc_med_l)
                        corr_yh = yh * slope_ratio + md_additional_offset
                        max_devh = np.median(np.abs((corr_yh - yf) / corr_yh))

                    else:
                        idx = (x >= mg_bound_low) & (x < mg_bound_high)
                        xh = x[idx]
                        yh = y[idx] - offset[pix[0], pix[1], cell, 1]

                        if len(yh[yh > -2000]) == 0:
                            break
                        parms = {'m': bound_m/10 if bound_m/10>0.3 else 0.5,
                                 'b': np.min(yh[yh > -2000]),
                                }
                        parms["limit_m"] = [0., 2.0]
                        errors = np.ones(xh.shape) * noise[pix[0], pix[1], cell, 1]
                        fitted = fit_data(lin_fun, xh, yh, errors, parms)
                        mg_slope = fitted['m']
                        pc_med_l = fitted['b']
                        slope_ratio = hg_slope / mg_slope
                        md_additional_offset = pc_high_l - pc_med_l * slope_ratio

                        yf = lin_fun(xh, mg_slope * slope_ratio, pc_med_l)
                        corr_yh = yh * slope_ratio + md_additional_offset
                        max_devh = np.median(np.abs((corr_yh - yf) / corr_yh))

                    ax3.plot(xh, corr_yh/FF_gain, color='tab:green', ls='None', alpha=0.3, marker='x')
                    ax3.plot(xh, yf/FF_gain, color='green', lw=1, zorder=10)
                    ax4.plot(xh, ((corr_yh-yf)/corr_yh), lw=1, color='green')

                except Exception as e:
                    if "zero-size array" in str(e):
                        pass
                    else:
                        print(e)

        fig1.show()
        fig2.show()
        fig3.show()
        if save_plots:
            fig1.savefig("{}/module_{}_{}_pixel_plot.png".format(out_folder, mod, roi))
            fig2.savefig("{}/module_{}_{}_pixel_plot_gain.png".format(out_folder, mod, roi))
            fig3.savefig("{}/module_{}_{}_pixel_plot_fits.png".format(out_folder, mod, roi))
 ```

 %% Cell type:markdown id: tags:

 ## ROI 1

 %% Cell type:code id: tags:

 ``` python
 tpix_range1 = [(250,254), (60,63)]
 test_cells = [4, 38]
 roi = 'ROI1'
 evaluate_fitting_ROI(modules, res, tpix_range1, test_cells, roi)
 ```

 %% Cell type:markdown id: tags:

 ## ROI 2

 %% Cell type:code id: tags:

 ``` python
 tpix_range2 = [(96,128), (32,64)]
 roi2 = 'ROI2'
 evaluate_fitting_ROI(modules, res, tpix_range2, test_cells, roi2)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Here we perform the calculations in column-parallel for all modules
 def calibrate_single_row(cells, fit_hook, ranges, inp):

    hg_range = ranges[0]
    mg_range = ranges[1]

    import numpy as np
    from iminuit import Minuit
    from iminuit.util import describe, make_func_code
    from sklearn.cluster import KMeans

    yra, yrd, offset, noise, thresh_trans = inp

    def rolling_window(a, window):
        shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
        strides = a.strides + (a.strides[-1],)
        return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)


    def calc_m_cluster2(x, y, r1=5, r2=0, r3=1.5):
        scan_range = 15
        ms = np.zeros((x.shape[0], scan_range))
        for i in range(scan_range):
            xdiffs = x - np.roll(x, i+1)
            ydiffs = y - np.roll(y, i+1)
            m = ydiffs / xdiffs
            ms[:,i] = m
        m = np.mean(ms, axis=1)
        m[scan_range//2:-scan_range//2+1] = np.mean(rolling_window(m, scan_range),-1)
        reg1 = m > r1
        reg2 = m < r2
        reg3 = (m > r2) & (m < r3)
        reg4 = ~(reg1 | reg2 | reg3)
        labels = [reg1, reg2, reg3, reg4]
        regions = np.zeros_like(x, np.uint8)
        for r, lbl in enumerate(labels):
            regions[lbl] = r
        scan_range = 30
        mregions = np.round(np.mean(rolling_window(regions, scan_range),-1))
        # chanage from np.nan to -1
        regions[...] = -1
        regions[scan_range//2:-scan_range//2+1] = mregions


        labels = [regions == 0, regions == 1, regions == 2, regions == 3]

        idx = np.arange(x.size)
        maxlbl = x.size-1
        for i in range(0, len(labels)-1):
            nidx = labels[i+1]
            if np.any(nidx):
                maxlbl = np.max(idx[nidx])
                cidx = idx > maxlbl
                if np.any(cidx):
                    labels[i][cidx] = False

        ms = []
        for lbl in labels:
            xl = x[lbl]
            xd = np.reshape(xl, (len(xl), 1))
            xdiff = xd - xd.transpose()

            yl = y[lbl]
            yd = np.reshape(yl, (len(yl), 1))
            ydiff = yd - yd.transpose()
            ms.append(np.mean(np.nanmean(ydiff/xdiff, axis=0)))

        return ms, labels, None

    def fit_data(fun, x, y, yerr, par_ests):
        par_ests["throw_nan"] = False
        par_ests["pedantic"] = False
        par_ests["print_level"] = 0

        f_sig = describe(fun)[1:]

        class _Chi2Functor:
            def __init__(self, f, x, y, err):
                self.f = f
                self.x = x
                self.y = y
                self.err = err
                f_sig = describe(f)
                # this is how you fake function
                # signature dynamically
                self.func_code = make_func_code(
                    f_sig[1:])  # docking off independent variable
                self.func_defaults = None  # this keeps numpy.vectorize happy

            def __call__(self, *arg):
                # notice that it accept variable length
                # positional arguments
                # chi2 = sum((y-self.f(x,*arg))**2 for x,y in zip(self.x,self.y))
                return np.sum(((self.f(self.x, *arg) - self.y) ** 2) / self.err)

        wrapped = _Chi2Functor(fun, x, y, yerr)
        m = Minuit(wrapped, **par_ests)
        fmin = m.migrad()

        return m.values

    def lin_fun(x, m, b):
        return m*x+b

    def hook_fun(x, a, c, o, m, b):
        return a*np.exp(-(x-o)/c)+m*x+b

    # linear slope
    ml = np.full(yra.shape[1:], np.nan)
    bl = np.full(yra.shape[1:], np.nan)
    devl = np.full(yra.shape[1:], np.nan)

    #hook function
    mh = np.full(yra.shape[1:], np.nan)
    bh = np.full(yra.shape[1:], np.nan)
    ch = np.full(yra.shape[1:], np.nan)
    oh = np.full(yra.shape[1:], np.nan)
    ah = np.full(yra.shape[1:], np.nan)
    devh = np.full(yra.shape[1:], np.nan)
    dhm = np.full(yra.shape[1:], np.nan)

    # threshold
    thresh = np.full(list(yrd.shape[1:])+[3,], np.nan)
    failures = []

    for col in range(yra.shape[-1]):
        try:
            y = yra[:,col]
            x = np.arange(y.shape[0])
            yd = yrd[:,col]

            vidx = (y > 1000) & np.isfinite(y) & np.isfinite(x)
            x = x[vidx]
            y = y[vidx]
            yd = yd[vidx]

            ms, labels, centers = calc_m_cluster2(x, y)

            bound = np.min(x[labels[1]])
            bound_m = ms[1]
            threshold = (np.mean(yd[labels[0]])+np.mean(yd[labels[2]])) / 2
            thresh[col,0] = threshold
            thresh[col,1] = np.mean(y[labels[0]])
            thresh[col,2] = np.mean(y[labels[2]])

            # fit linear slope
            if hg_range[1] > 0 :
                hg_bound_high = hg_range[1]
                hg_bound_low = hg_range[0]
            else :
                hg_bound_high = bound - 20
                hg_bound_low = 0

            if fit_hook:
                mg_bound_high = len(x)
                try:
                    mg_bound_low = np.max(x[labels[1]]) + 20
                except ValueError:  #raised if `y` is empty.
                        mg_bound_low = bound + 100
            else :
                if mg_range[0] > 0 :
                    mg_bound_low = mg_range[0]
                    mg_bound_high = mg_range[1]

                else :
                    mg_bound_high = len(x)
                    try:
                        mg_bound_low = np.max(x[labels[1]]) + 20
                    except ValueError:  #raised if `y` is empty.
                        mg_bound_low = bound + 100

            idx = (x > hg_bound_low) & (x < hg_bound_high)
            xl = x[idx]
            yl = y[idx] - y[0] # instead of offset subtraction is better to subtract first pc value here
            errors = np.ones(xl.shape) * noise[col, 0]
            if yl.shape[0] != 0:
                parms = {'m': bound_m, 'b': np.min(yl)}
                parms["limit_m"] = [0., 50.]
                fitted = fit_data(lin_fun, xl, yl, errors, parms)
                hg_slope = fitted['m']
                pc_high_l = fitted['b']
                yf = lin_fun(xl, hg_slope, pc_high_l)
                max_devl = np.median(np.abs((yl-yf) / yl))
            ml[col] = hg_slope #fitted['m']
            bl[col] = pc_high_l #fitted['b']
            devl[col] = max_devl
            dhml = lin_fun(bound, hg_slope, pc_high_l)

            # fit hook slope
            if fit_hook:
                idx = (x >= mg_bound_low) & (x < mg_bound_high)
                xh = x[idx]
                yh = y[idx] - offset[col, 1]
                errors = np.ones(xh.shape) * noise[col, 1]
                parms = {'m': bound_m/10 if bound_m/10 > 0.3 else 0.5, 'b': np.min(yh[yh > -2000]), 'a': np.max(yh), 'c': 5., 'o': bound-1}
                parms["limit_m"] = [0., 2.0]
                parms["limit_c"] = [1., 1000]
                fitted = fit_data(hook_fun, xh, yh, errors, parms)
                mg_slope = fitted['m']
                pc_med_l = fitted['b']
                slope_ratio = hg_slope / mg_slope
                md_additional_offset = pc_high_l - pc_med_l * slope_ratio
                yf = lin_fun(xh, mg_slope * slope_ratio, pc_med_l)
                corr_yh = yh * slope_ratio + md_additional_offset
                max_devh = np.median(np.abs((corr_yh - yf) / corr_yh))

                mh[col] = mg_slope # fitted['m']
                bh[col] = pc_med_l # fitted['b']
                ah[col] = fitted['a']
                oh[col] = fitted['o']
                ch[col] = fitted['c']
                devh[col] = max_devh
                dhm[col] = bound
            else :
                idx = (x >= mg_bound_low) & (x < mg_bound_high)
                xh = x[idx]
                yh = y[idx] - offset[col, 1]
                errors = np.ones(xh.shape)*noise[col, 1]
                parms = {'m': bound_m/10 if bound_m/10 > 0.3 else 0.5,
                         'b': np.min(yh[yh > -2000]),
                        }
                parms["limit_m"] = [0., 2.0]
                fitted = fit_data(lin_fun, xh, yh, errors, parms)

                mg_slope = fitted['m']
                pc_med_l = fitted['b']
                slope_ratio = hg_slope / mg_slope
                md_additional_offset = pc_high_l - pc_med_l * slope_ratio

                yf = lin_fun(xh, mg_slope * slope_ratio, pc_med_l)
                corr_yh = yh * slope_ratio + md_additional_offset
                max_devh = np.median(np.abs((corr_yh - yf) / corr_yh))

                mh[col] = mg_slope # fitted['m']
                bh[col] = pc_med_l # fitted['b']
                ah[col] = 0
                oh[col] = 1
                ch[col] = 1
                devh[col] = max_devh
                dhm[col] = bound

        except Exception as e:
            print(e)
            failures.append((col, str(e)))
    del yrd
    del yra
    return thresh, (ml, bl, devl), (mh, bh, ah, oh, ch, devh), failures, dhm
 ```

 %% Cell type:code id: tags:

 ``` python
 start = datetime.now()
 fres = {}
 failures = []
 for i, r in zip(modules, res):
    offset = offsets[i]
    noise = noises[i]
    qm = module_index_to_qm(i)
    dig, ana, cellId = r
    thresh_trans = thresholds[i][..., 0]


    # linear slope
    ml = np.zeros(dig.shape[1:])
    bl = np.zeros(dig.shape[1:])
    devl = np.zeros(dig.shape[1:])

    #hook function
    mh = np.zeros(dig.shape[1:])
    bh = np.zeros(dig.shape[1:])
    ch = np.zeros(dig.shape[1:])
    oh = np.zeros(dig.shape[1:])
    ah = np.zeros(dig.shape[1:])
    devh = np.zeros(dig.shape[1:])
    dhma = np.zeros(dig.shape[1:])

    # threshold
    thresh = np.zeros(list(dig.shape[1:]))
    thresh_bounds = np.zeros(list(dig.shape[1:])+[2,])

    for cell in range(dig.shape[1]):
        inp = []
        for j in range(dig.shape[2]):
            inp.append((dig[:,cell,j,:], ana[:,cell,j,:], offset[j,:,cell,:], noise[j,:,cell,:], thresh_trans[j,:,cell]))

-        p = partial(calibrate_single_row, cells, fit_hook, [hg_range, mg_range])
+        p = partial(calibrate_single_row, mem_cells, fit_hook, [hg_range, mg_range])
        frs = view.map_sync(p, inp)

        for j, fr in enumerate(frs):
            threshr, lin, hook, fails, dhm = fr
            mlr, blr, devlr = lin
            mhr, bhr, ahr, ohr, chro, devhr = hook
            failures.append(fails)

            ml[cell, j, :] = mlr
            bl[cell, j, :] = blr
            devl[cell, j, :] = devlr

            mh[cell, j, :] = mhr
            bh[cell, j, :] = bhr
            oh[cell, j, :] = ohr
            ch[cell,j,:] = chro
            ah[cell, j, :] = ahr
            devh[cell, j, :] = devhr
            dhma[cell, j, :] = dhm

            thresh[cell, j, ...] = threshr[..., 0]
            thresh_bounds[cell, j, ...] = threshr[..., 1:]

    fres[qm] = {'ml': ml,
                'bl': bl,
                'devl': devl,
                'tresh': thresh,
                'tresh_bounds': thresh_bounds,
                'dhm': dhma}
    fres[qm].update({
        'mh': mh,
        'bh': bh,
        'oh': oh,
        'ch': ch,
        'ah': ah,
        'devh': devh,
       })

 end = datetime.now() - start
 print('Duration of fitting: ',end)
 ```

 %% Cell type:markdown id: tags:

 Results of slope fitting from PC runs values are
 distinguished on axis 0 by index:

 0: linear slope - m value
 1: linear slope - b value
 2: linear slope - deviation
 3: hook function - m value
 4: hook function - b value
 5: hook function - o value
 6: hook function - c value
 7: hook function - a value
 8: hook function - deviation

 %% Cell type:code id: tags:

 ``` python
 from collections import OrderedDict
 from scipy.stats import median_abs_deviation as mad

 bad_pixels = OrderedDict()
 for qm, data in fres.items():
    mask = np.zeros(data['ml'].shape, np.uint32)
    mask[(data['tresh'][...,0] < 50) | (data['tresh'][...,0] > 8500)] |= BadPixels.CI_GAIN_OF_OF_THRESHOLD.value
    mask[(data['devl'] == 0)] |= BadPixels.CI_LINEAR_DEVIATION.value
    mask[(np.abs(data['devl']) > 0.5)] |= BadPixels.CI_LINEAR_DEVIATION.value
    mask[(~np.isfinite(data['devl']))] |= BadPixels.CI_EVAL_ERROR.value
    bad_pixels[qm] = mask

    # high gain slope from pulse capacitor data
    pc_high_m = data['ml']
    pc_high_b = data['bl']
    # medium gain slope from pulse capacitor data
    pc_med_m = data['mh']
    pc_med_b = data['bh']

    fshape = pc_high_m.shape

    # calculate median for slopes
    pc_high_med = np.nanmedian(pc_high_m, axis=(1, 2))
    pc_med_med = np.nanmedian(pc_med_m, axis=(1, 2))
    # calculate median for intercepts:
    pc_high_b_med = np.nanmedian(pc_high_b, axis=(1, 2))
    pc_med_b_med = np.nanmedian(pc_med_b, axis=(1, 2))

    # mad can only iterate on 1 axis
    tshape = (fshape[0],fshape[1]*fshape[2])

    # calculate MAD for slopes
    pc_high_rms = mad(pc_high_m.reshape(tshape), axis=1, scale='normal', nan_policy='omit')
    pc_med_rms = mad(pc_med_m.reshape(tshape), axis=1, scale='normal', nan_policy='omit')
    # calculate MAD for intercepts:
    pc_high_b_rms = mad(pc_high_b.reshape(tshape), axis=1, scale='normal', nan_policy='omit')
    pc_med_b_rms = mad(pc_med_b.reshape(tshape), axis=1, scale='normal', nan_policy='omit')

    # expand the arrays to match the size of the originals
    pc_high_med = np.repeat(pc_high_med, fshape[1]*fshape[2]).reshape(fshape)
    pc_med_med = np.repeat(pc_med_med, fshape[1]*fshape[2]).reshape(fshape)
    pc_high_b_med = np.repeat(pc_high_b_med, fshape[1]*fshape[2]).reshape(fshape)
    pc_med_b_med = np.repeat(pc_med_b_med, fshape[1]*fshape[2]).reshape(fshape)
    pc_high_rms = np.repeat(pc_high_rms, fshape[1]*fshape[2]).reshape(fshape)
    pc_med_rms = np.repeat(pc_med_rms, fshape[1]*fshape[2]).reshape(fshape)
    pc_high_b_rms = np.repeat(pc_high_b_rms, fshape[1]*fshape[2]).reshape(fshape)
    pc_med_b_rms = np.repeat(pc_med_b_rms, fshape[1]*fshape[2]).reshape(fshape)

    # sanitize PC data
    # replace vals outside range and nans with median.

    bad_fits_high = (np.abs((pc_high_m - pc_high_med)/pc_high_rms) > sigma_dev_cut) |\
                (np.abs((pc_high_b - pc_high_b_med)/pc_high_b_rms) > sigma_dev_cut) |\
                (~np.isfinite(pc_high_m)) | (~np.isfinite(pc_high_b))

    bad_fits_med = (np.abs((pc_med_m - pc_med_med)/pc_med_rms) > sigma_dev_cut) |\
                (np.abs((pc_med_b - pc_med_b_med)/pc_med_b_rms) > sigma_dev_cut) |\
                (~np.isfinite(pc_med_m)) | (~np.isfinite(pc_med_b))

    def plot_cuts(a,sel,name,units, ax = None) :
        stats_cut = {}
        stats = {}
        if ax is None :
            fig = plt.figure(figsize=(5,5))
            ax = fig.add_subplot(111)
        stats_cut["Mean"] = np.nanmean(a[sel])
        stats_cut["STD"] = np.nanstd(a[sel])
        stats_cut["Median"] = np.nanmedian(a[sel])
        stats_cut["Min"] = np.nanmin(a[sel])
        stats_cut["Max"] = np.nanmax(a[sel])

        stats["Mean"] = np.nanmean(a)
        stats["STD"] = np.nanstd(a)
        stats["Median"] = np.nanmedian(a)
        stats["Min"] = np.nanmin(a)
        stats["Max"] = np.nanmax(a)

        m=np.nanmean(a)
        s=np.nanstd(a)
        bins = np.linspace(m-10*s,m+10*s,100)
        ax.hist(a.ravel(),
                bins = bins,
                color='tab:red',
                label='all fits',
               )
        ax.hist(a[sel].ravel(),
                bins = bins,
                color='tab:green',
                label='good fits'
               )
        ax.grid(zorder=0)
        ax.set_xlabel(units)
        ax.set_yscale('log')
        ax.set_title(name)
        ax.legend()
        def statistic(stat, colour, shift):
            textstr = ""
            for key in stat:
                try:
                    textstr += '{0: <6}: {1: 7.2f}\n'.format(key, stat[key])
                except:
                    pass
            props = dict(boxstyle='round', alpha=0.65, color=colour,)
            ax.text(0.05, 0.95-shift, textstr, transform=ax.transAxes, fontsize=10,
                    family='monospace', weight='book', stretch='expanded',
                    verticalalignment='top', bbox=props, zorder=2)
        statistic(stats_cut, 'tab:green', 0)
        statistic(stats, 'tab:red', 0.25)

    fig = plt.figure(figsize=(15,15))
    plot_cuts(pc_high_m,~(bad_fits_high | (mask>0)),'pc_high_m',"ADU/DAC",fig.add_subplot(221))
    plot_cuts(pc_high_b,~(bad_fits_high | (mask>0)),'pc_high_b',"ADU",fig.add_subplot(222))
    plot_cuts(pc_med_m,~(bad_fits_med | (mask>0)),'pc_med_m',"ADU/DAC",fig.add_subplot(223))
    plot_cuts(pc_med_b,~(bad_fits_med | (mask>0)),'pc_med_b',"ADU",fig.add_subplot(224))

    pc_high_m[bad_fits_high] = pc_high_med[bad_fits_high]
    pc_high_b[bad_fits_high] = pc_high_b_med[bad_fits_high]
    pc_med_m[bad_fits_med] = pc_med_med[bad_fits_med]
    pc_med_b[bad_fits_med] = pc_med_b_med[bad_fits_med]

    mask[bad_fits_high] |= BadPixels.CI_EVAL_ERROR.value
    mask[bad_fits_med]  |= BadPixels.CI_EVAL_ERROR.value
 ```

 %% Cell type:code id: tags:

 ``` python
 if local_output:
    ofile = "{}/agipd_pc_store_{}_{}_{}.h5".format(out_folder, "_".join([str(run) for run in runs]), modules[0], modules[-1])
    store_file = h5py.File(ofile, "w")
    for qm, r in fres.items():
        for key, item in r.items():
            store_file["/{}/{}/0/data".format(qm, key)] = item

        store_file["/{}/{}/0/data".format(qm, "BadPixelsPC")] = bad_pixels[qm]
    store_file.close()
 ```

 %% Cell type:markdown id: tags:

 ## Overview Plots ##

 Each of the following plots represents one of the fit parameters of a defined memory cell on a module:

 For the linear function of the high gain region

   $$y = mx + b$$

 * ml denotes the $m$ parameter
 * bl denotes the $b$ parameter
 * devl denotes the absolute relative deviation from linearity.

 For the linear function of the medium gain region

   $$y = mx + b$$

 * mh denotes the $m$ parameter
 * bh denotes the $b$ parameter
 * devh denotes the absolute relative deviation from linearity.

 For the composite function (if selected) of the medium gain and transition region

  $$y = A*e^{-(x-O)/C}+mx+b$$

 * oh denotes the $O$ parameter
 * ch denotes the $C$ parameter
 * mh denotes the $m$ parameter
 * bh denotes the $b$ parameter
 * devh denotes the absolute relative deviation from the linear part of the function.

 Additionally, the thresholds and bad pixels (mask) are shown.

 Finally, the red and white rectangles indicate the first and second pixel ranges.

 %% Cell type:code id: tags:

 ``` python
 def preview_fitted_params(data, module, cell_to_preview, fit_hook):
    plot_text= {"ml": 'HG slope (ml)',
                "bl": 'HG intercept (bl)',
                "devl": 'HG linearity deviation (devl)',
                "tresh": 'Gain threshold (tresh)',
                "tresh_bounds": 'Mean HG signal (tresh_bounds)',
                "dhm": 'HG boundary (dhm)',
                "mh": 'MG slope (mh)',
                "bh": 'MG intercept (bh)',
                "oh": 'Hook - o (oh)',
                "ch": 'Hook - c (ch)',
                "ah": 'Hook - a (ah)',
                "devh": 'MG linearity deviation (devh)'
               }

    mask = bad_pixels[module]
    fig = plt.figure(figsize=(20,20))
    if fit_hook:
        grid = AxesGrid(fig, 111,
                    nrows_ncols=(7, 2),
                    axes_pad=(0.9, 0.55),
                    label_mode="0",
                    share_all=True,
                    cbar_location="right",
                    cbar_mode="each",
                    cbar_size="7%",
                    cbar_pad="2%"
                    )
    else:
        grid = AxesGrid(fig, 111,
                    nrows_ncols=(5, 2),
                    axes_pad=(0.9, 0.55),
                    label_mode="0",
                    share_all=True,
                    cbar_location="right",
                    cbar_mode="each",
                    cbar_size="7%",
                    cbar_pad="2%"
                    )
    i = 0
    for key, citem in data.items():
        item = citem.copy()
        item[~np.isfinite(item)] = 0
        med = np.nanmedian(item)
        bound = 0.1
        maxcnt = 10
        if med < 0:
            bound = -bound

        while(np.count_nonzero((item < med-bound*med) | (item > med+bound*med))/item.size > 0.01):
            bound *= 2
            maxcnt -= 1
            if maxcnt < 0:
                break


        if "bounds" in key:
            d = item[cell_to_preview, ..., 0]
            im = grid[i].imshow(d, interpolation="nearest", origin='lower',
                               vmin=med-bound*med, vmax=med+bound*med)
            grid[i].set_title(plot_text[key], fontsize=14)
        else:
            if fit_hook:
                d = item[cell_to_preview, ...]
                im = grid[i].imshow(d, interpolation="nearest", origin='lower',
                                   vmin=med-bound*med, vmax=med+bound*med)
                grid[i].set_title(plot_text[key], fontsize=14)
            else:
                if key == 'oh' or key == 'ch' or key == 'ah':
                    continue
                else:
                    d = item[cell_to_preview, ...]
                    im = grid[i].imshow(d, interpolation="nearest", origin='lower',
                                   vmin=med-bound*med, vmax=med+bound*med)
                    grid[i].set_title(plot_text[key], fontsize=14)

        cb = grid.cbar_axes[i].colorbar(im)
        # axes coordinates are 0,0 is bottom left and 1,1 is upper right
        x0, x1 = tpix_range1[0][0], tpix_range1[0][1]
        y0, y1 = tpix_range1[1][0], tpix_range1[1][1]
        p = patches.Rectangle((x0, y0), (x1 - x0), (y1 - y0), fill=False, color="red")
        grid[i].add_patch(p)
        x0, x1 = tpix_range2[0][0], tpix_range2[0][1]
        y0, y1 = tpix_range2[1][0], tpix_range2[1][1]
        p = patches.Rectangle((x0, y0), (x1 - x0), (y1 - y0), fill=False, color="white")
        grid[i].add_patch(p)

        i += 1

    im = grid[-1].imshow(mask[cell_to_preview, ...], interpolation="nearest", origin='lower', vmin=0, vmax=1)
    cb = grid.cbar_axes[-1].colorbar(im)
    grid[-1].set_title('Mask', fontsize=14)
 ```

 %% Cell type:code id: tags:

 ``` python
 cell_to_preview=[1,mem_cells-5]

 for module, data in fres.items():
    print(' Memory cell {}:'.format(cell_to_preview[0]))
    preview_fitted_params(data, module, cell_to_preview[0], fit_hook)
 ```

 %% Cell type:code id: tags:

 ``` python
 for module, data in fres.items():
    print('Memory cell {}:'.format(cell_to_preview[1]))
    preview_fitted_params(data, module, cell_to_preview[1], fit_hook)
 ```

 %% Cell type:markdown id: tags:

 ### Memory Cell dependent behavior of thresholding ###

 %% Cell type:code id: tags:

 ``` python
 toplot = {"tresh": "Gain theshold (ADU)",
          "ml": "Slope (HG)",
          "bl": "Offset (HG) (ADU)",
          "mh": "Slope (MG)",
          "bh": "Offset (MG) (ADU)",
          "mltomh": "Ration slope_HG/slope_MG"}

 fig, ax = plt.subplots(3, 2, figsize=(18, 15))
 fig.subplots_adjust(wspace=0.1, hspace=0.15)

 for module, data in fres.items():
    mltomh = data['ml'] / data['mh']
    fres[module].update({'mltomh': mltomh})
    bins = 100

    for counter, (typ, label) in enumerate(toplot.items()):
        r_hist = np.zeros((mem_cells, bins))
        mask = bad_pixels[module]
        thresh = data[typ]
        hrange = [0.5 * np.nanmedian(thresh), 1.5 * np.nanmedian(thresh)]

        if hrange[1] < hrange[0]:
            hrange = hrange[::-1]
        for c in range(mem_cells):
            d = thresh[c, ...]
            h, e = np.histogram(d.flatten(), bins=bins, range=hrange)
            r_hist[c, :] = h

        if counter < 3:
            im = ax[counter, 0].imshow(r_hist[:, :].T[::-1, :], interpolation="nearest",
                      aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(r_hist)),
                     extent=[0, mem_cells, hrange[0], hrange[1]])
            ax[counter,0].set_xlabel("Memory cell", fontsize=12)
            ax[counter,0].set_ylabel(label, fontsize=12)
            cb = fig.colorbar(im, ax=ax[counter, 0], pad=0.01)
        else:
            im = ax[counter-3, 1].imshow(r_hist[:, :].T[::-1, :], interpolation="nearest",
                      aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(r_hist)),
                     extent=[0, mem_cells, hrange[0], hrange[1]])
            ax[counter-3, 1].set_xlabel("Memory cell", fontsize=12)
            ax[counter-3, 1].set_ylabel(label, fontsize=12)
            cb = fig.colorbar(im, ax=ax[counter-3, 1], pad=0.01)
        cb.set_label("Counts", fontsize=12)
 ```

 %% Cell type:markdown id: tags:

 ## Global Bad Pixel Behaviour ##

 The following plots show the results of bad pixel evaluation for all evaluated memory cells. Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2. This excludes single bad pixels present only in disconnected pixels. Hence, any bad pixels spanning at least 2 pixels in the x/y-plane, or across at least two memory cells are indicated. Colors encode the bad pixel type, or mixed type.

 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.CI_GAIN_OF_OF_THRESHOLD.value: (BadPixels.CI_GAIN_OF_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.CI_EVAL_ERROR.value: (BadPixels.CI_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.CI_GAIN_OF_OF_THRESHOLD.value | BadPixels.OFFSET_OUT_OF_THRESHOLD.value: ('MIXED', '#DD00DD80')}

 rebin = 2 if not high_res_badpix_3d else 1

 gain = 0
 for mod, data in bad_pixels.items():
    plot_badpix_3d(np.moveaxis(data, 0, 2), cols, title=mod, rebin_fac=rebin, azim=60.)
 ```

 %% Cell type:code id: tags:

 ``` python
 one_photon = 73 # ADU (10 keV photon)
 test_pixels = []
 tpix_range1 = [(0,8), (0,8)]
 for i in range(*tpix_range1[0]):
    for j in range(*tpix_range1[1]):
        test_pixels.append((j,i))
 test_cells = [4, 38, 64, 128, 200, 249]
 tcell = np.array(test_cells)
 tcell = tcell[tcell < mem_cells]
 if tcell.size == 0:
    test_cells = [mem_cells-1]
 else:
    test_cells = tcell.tolist()
 from mpl_toolkits.axes_grid1 import ImageGrid
 for mod, r in zip(modules, res):
    dig, ana, cellId = r
    d = []
    d2 = []
    d3 = []

    H = [0, 0, 0, 0]
    H2 = [0, 0, 0, 0]
    Ha = [0, 0, 0, 0]
    qm = module_index_to_qm(mod)
    cdata = fres[qm]
    ex, ey, ea = None, None, None
    medml = np.nanmean(cdata['ml'])
    medmh = np.nanmean(cdata['mh'][cdata['mh']> 0.5])
    offset = offsets[mod]
    threshold = thresholds[mod]

    medth = np.nanmean(threshold[..., 0])
    for pix in test_pixels:
        for cell in test_cells:
            color = np.random.rand(3, 1)

            x = np.arange(dig.shape[0])
            y = dig[:, cell, pix[0], pix[1]]
            a = ana[:, cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]
            a = a[vidx]

            ms, labels, centers = calc_m_cluster2(x, y)
            bound = None
            bound_m = None
            markers = ['o','.','x','v']
            colors = ['b', 'r', 'g', 'k']
            ymin = y.min()
            amin = a[labels[2]].min()

            for i, lbl in enumerate(labels):

                if np.any(lbl):
                    if i == 0:
                        cm = (cdata['ml'][cell, pix[0], pix[1]]/medml)
                        o = offset[pix[0], pix[1], cell, 0]
                        ym = (y[lbl]-o)/cm

                    elif i >= 1:
                        mh = cdata['mh'][cell, pix[0], pix[1]]
                        ml = cdata['ml'][cell, pix[0], pix[1]]
                        cml = ml / medml
                        cmh = mh / medmh
                        cm = medml / medmh
                        oh = cdata['bh'][cell, pix[0], pix[1]]
                        o = offset[pix[0], pix[1], cell, 1] + oh
                        ym = (y[lbl]-o) / cmh * cm

                        if i == 1:
                            ah = cdata['ah'][cell, pix[0], pix[1]]
                            ch = cdata['ch'][cell, pix[0], pix[1]]
                            ohh = cdata['oh'][cell, pix[0], pix[1]]
                            tx = ch * np.log(ah/(y[lbl]-o))+ohh

                            chook  = (ah*np.exp(-(tx-ohh)/ch) - mh*tx)/cmh*cm

                            ym -= chook

                    h, ex, ey = np.histogram2d(x[lbl], ym/one_photon, range=((0, 600), (0, 15000/one_photon)), bins=(300, 600))
                    H[i] += h

            labels = [a < threshold[pix[0], pix[1], cell,0], a >= threshold[pix[0], pix[1], cell,0]]
            for i, lbl in enumerate(labels):

                if np.any(lbl):
                    if i == 0:
                        cm = (cdata['ml'][cell, pix[0], pix[1]]/medml)
                        o = offset[pix[0], pix[1], cell, 0]
                        ym = (y[lbl]-o) / cm

                    elif i >= 1:
                        mh = cdata['mh'][cell, pix[0], pix[1]]
                        ml = cdata['ml'][cell, pix[0], pix[1]]
                        cml = ml/medml
                        cmh = mh/medmh
                        cm = medml/medmh
                        oh = cdata['bh'][cell, pix[0], pix[1]]
                        o = offset[pix[0], pix[1], cell, 1] + oh
                        ym = (y[lbl]-o) / cmh * cm

                        if i == 1:
                            ah = cdata['ah'][cell, pix[0], pix[1]]
                            ch = cdata['ch'][cell, pix[0], pix[1]]
                            ohh = cdata['oh'][cell, pix[0], pix[1]]
                            tx = ch*np.log(ah/(y[lbl]-o)) + ohh
                            chook  = (ah*np.exp(-(tx-ohh)/ch) - mh*tx)/cmh*cm
                            idx = (a[lbl]-amin) < 0
                            ym[idx] -= chook[idx]

                    h, ex, ey = np.histogram2d(x[lbl], ym/one_photon, range=((0, 600), (0, 15000/one_photon)), bins=(300, 600))
                    H2[i] += h

            labels = [a < threshold[pix[0], pix[1], cell, 0], a >= threshold[pix[0], pix[1], cell, 0]]
            for i, lbl in enumerate(labels):

                if np.any(lbl):
                    am = a[lbl] - amin
                    h, ex, ea = np.histogram2d(x[lbl], am, range=((0, 600), (-100, 5000)), bins=(300, 400))
                    Ha[i] += h

    fig = plt.figure(figsize=(10,15))
    ax = fig.add_subplot(311)
    for i in range(3):
        H[i][H[i]==0] = np.nan
    ax.imshow(H[0].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    ax.imshow(H[1].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='spring', alpha=0.7, vmin=0, vmax=100)
    ax.imshow(H[2].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD response (# photon)")
    ax.set_xlabel("PC scan point (#)")

    x = np.arange(0, 600)
    ideal = medml * x / one_photon
    ax.plot(x, ideal, color='red')
    ax.plot(x, ideal + np.sqrt(ideal), color='red')
    ax.plot(x, ideal - np.sqrt(ideal), color='red')
    ax.grid(lw=1.5)

    ax = fig.add_subplot(312)
    for i in range(2):
        H2[i][H2[i]==0] = np.nan
    ax.imshow(H2[0].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    ax.imshow(H2[1].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD response (# photon)")
    ax.set_xlabel("PC scan point (#)")

    x = np.arange(0, 600)
    ideal = medml * x / one_photon
    ax.plot(x, ideal, color='red')
    ax.plot(x, ideal + np.sqrt(ideal), color='red')
    ax.plot(x, ideal - np.sqrt(ideal), color='red')
    ax.grid(lw=1.5)

    ax = fig.add_subplot(313)
    for i in range(2):
        Ha[i][Ha[i]==0] = np.nan
    ax.imshow(Ha[0].T, origin="lower", extent=[ex[0], ex[-1], ea[0], ea[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    ax.imshow(Ha[1].T, origin="lower", extent=[ex[0], ex[-1], ea[0], ea[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD gain (ADU)")
    ax.set_xlabel("PC scan point (#)")
    ax.grid(lw=1.5)
 ```

--- a/notebooks/AGIPD/Chracterize_AGIPD_Gain_PC_Summary.ipynb
+++ b/notebooks/AGIPD/Chracterize_AGIPD_Gain_PC_Summary.ipynb
 %% Cell type:markdown id: tags:

 # Pulse Capacitor Characterisation Summary

 This notebook is used as a dependency notebook for a pulse capacitor characterisation to provide summary for all modules of the AGIPD.

 %% Cell type:code id: tags:

 ``` python
-in_folder = "/gpfs/exfel/d/raw/SPB/202331/p900376/" # path to input data, required
-out_folder = "/gpfs/exfel/exp/SPB/202331/p900376/usr/PC/temp_test/12on/" # path to output to, required
+in_folder = "/gpfs/exfel/exp/HED/202430/p900438/raw/" # path to input data, required
+out_folder = "/gpfs/exfel/exp/HED/202430/p900438/scratch/rovensky/pc/" # path to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 runs = [] # runs to use, required, range allowed

-karabo_id_control = "SPB_IRU_AGIPD1M1"  # karabo-id for the control device e.g. "MID_EXP_AGIPD1M1", or "SPB_IRU_AGIPD1M1"
-karabo_id = "SPB_DET_AGIPD1M-1"
+karabo_id_control = "HED_DET_AGIPD65K1"  # karabo-id for the control device e.g. "MID_EXP_AGIPD1M1", or "SPB_IRU_AGIPD1M1"
+karabo_id = "HED_DET_AGIPD65K1"
 ctrl_source_template = '{}/MDL/FPGA_COMP' # path to control information

 creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
 cal_db_timeout = 3000000 # timeout on caldb requests"
 cal_db_interface = "tcp://max-exfl-cal001:8015#8045"
 db_output = False

 bias_voltage = -1  # detector bias voltage, negative values for auto-detection.
 mem_cells = -1  # number of memory cells used, negative values for auto-detection.
 acq_rate = -1.  # the detector acquisition rate, negative values for auto-detection.
 gain_setting = -1  # gain setting can have value 0 or 1, negative values for auto-detection.
 integration_time = -1  # integration time, negative values for auto-detection.
 ```

 %% Cell type:code id: tags:

 ``` python
 import warnings
 warnings.filterwarnings('ignore')

-import os
+from pathlib import Path
 from dateutil import parser
 from datetime import timedelta

 import h5py
 import matplotlib.pyplot as plt
 import matplotlib.gridspec as gridspec
 import matplotlib.cm as cm
 import numpy as np
 import tabulate
 import multiprocessing

 from cal_tools.agipdlib import AgipdCtrl
 from cal_tools.ana_tools import get_range
 from cal_tools.tools import (
    calcat_creation_time,
    module_index_to_qm,
    get_from_db,
    get_pdu_from_db,
    get_report,
    send_to_db
 )
+from cal_tools.plotting import agipd_single_module_geometry

 from extra_data import RunDirectory
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from IPython.display import Latex, display
 from XFELDetAna.plotting.simpleplot import simplePlot

 from iCalibrationDB import Conditions, Constants

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate creation time
 creation_time = calcat_creation_time(in_folder, runs[0], creation_time)
 offset = parser.parse(creation_date_offset)
 delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
 creation_time += delta
 print(f"Creation time: {creation_time}\n")

 # Get operation conditions
 ctrl_source = ctrl_source_template.format(karabo_id_control)
 run_folder = f'{in_folder}/r{runs[0]:04d}/'
 raw_dc = RunDirectory(run_folder)
-
-# Read operating conditions from AGIPD00 files
-instrument_src_mod = [
-    s for s in list(raw_dc.all_sources) if "0CH" in s][0]
-ctrl_src = [
-    s for s in list(raw_dc.all_sources) if ctrl_source in s][0]
+instrument_src_mod = list(raw_dc.detector_sources)[0]

 agipd_cond = AgipdCtrl(
    run_dc=raw_dc,
    image_src=instrument_src_mod,
-    ctrl_src=ctrl_src,
+    ctrl_src=ctrl_source,
    raise_error=False,  # to be able to process very old data without mosetting value
 )
 if mem_cells == -1:
    mem_cells = agipd_cond.get_num_cells()
 if mem_cells is None:
    raise ValueError(f"No raw images found in {run_folder}")
 if acq_rate == -1.:
    acq_rate = agipd_cond.get_acq_rate()
 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == -1:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()

 # Evaluate detector instance for mapping
-instrument = karabo_id.split("_")[0]
-if instrument == "HED":
-    nmods = 8
-else:
-    nmods = 16
+instance = karabo_id_control.split("_")[-1]
+agipd_instances = {'AGIPD65K1': [1, agipd_single_module_geometry()],
+                   'AGIPD500K2G': [8, AGIPD_500K2GGeometry.from_origin()],
+                   'AGIPD1M1': [16, AGIPD_1MGeometry.from_quad_positions(quad_pos=[(-525, 625),
+                                                                                   (-550, -10),
+                                                                                   (520, -160),
+                                                                                   (542.5, 475),
+                                                                                  ])]
+                  }
+
+try:
+    nmods = agipd_instances[instance][0]
+    geom =  agipd_instances[instance][1]
+except KeyError as ke:
+    print('The provided AGIPD instance is not recognised. Available instances are: \
+    AGIPD65K1,\nAGIPD500K2G,\nAGIPD1M1')

 print(f"Using {creation_time} as creation time\n")
 print(f"Operating conditions are:\n• Bias voltage: {bias_voltage}\n• Memory cells: {mem_cells}\n"
      f"• Acquisition rate: {acq_rate}\n• Gain setting: {gain_setting}\n• Integration time: {integration_time}\n"
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # ml - high gain slope
 # bl - high gain intercept
 # devl - absolute relative deviation from linearity for high gain
 # mh - medium gain slope
 # bh - medium gain intercept
 # oh, ch, ah - parameters of hook function fit to medium gain (only if requested)
 # devh - absolute relative deviation from linearity for linear part of medium gain

 keys = ['ml', 'bl', 'mh', 'bh', 'BadPixelsPC']
 keys_file = ["ml", "bl", "devl", "mh", "bh", "oh", "ch", "ah", "devh"]

 fit_data = {}
 bad_pixels = {}
 modules = []
 karabo_da = []
+constants_files = []

-for mod in range(nmods):
+out_folder = Path(out_folder)
+constants_files = sorted(out_folder.glob('*.h5'),
+                         key=lambda f: (len(f.stem), f.stem))
+
+for f in constants_files:
+    mod = int(f.stem.split("_")[-1])
    qm = module_index_to_qm(mod)
    fit_data[mod] = {}
-    constants_file = f'{out_folder}/agipd_pc_store_{"_".join([str(run) for run in runs])}_{mod}_{mod}.h5'
-
-    if os.path.exists(constants_file):
-        print(f'Trying to find: {constants_file}')
-        print(f'Data available for module {qm}\n')
-        with h5py.File(constants_file, 'r') as hf:
-            bad_pixels[mod] = hf[f'/{qm}/BadPixelsPC/0/data'][()]
-            for key in keys_file:
-                fit_data[mod][key]= hf[f'/{qm}/{key}/0/data'][()]
-
-        modules.append(mod)
-        karabo_da.append(f"AGIPD{mod:02d}")
-    else:
-        print(f"No fit data available for module {qm}\n")
+    with h5py.File(f, 'r') as hf:
+        bad_pixels[mod] = hf[f'/{qm}/BadPixelsPC/0/data'][()]
+        for key in keys_file:
+            fit_data[mod][key]= hf[f'/{qm}/{key}/0/data'][()]
+
+    modules.append(mod)
+    karabo_da.append(f"AGIPD{mod:02d}")
+    print(f'Data available for AGIPD{mod:02d}')
 ```

 %% Cell type:code id: tags:

 ``` python
 def slope_dict_to_arr(d):
    key_to_index = {
                    "ml": 0,
                    "bl": 1,
                    "devl": 2,
                    "mh": 3,
                    "bh": 4,
                    "oh": 5,
                    "ch": 6,
                    "ah": 7,
                    "devh": 8,
                    "tresh": 9
    }

    arr = np.zeros([11]+list(d["ml"].shape), np.float32)
    for key, item in d.items():
        if key not in key_to_index:
            continue
        arr[key_to_index[key],...] = item

    return arr
 ```

 %% Cell type:code id: tags:

 ``` python
 # set the operating condition
 condition = Conditions.Dark.AGIPD(memory_cells=mem_cells,
                                  bias_voltage=bias_voltage,
                                  acquisition_rate=acq_rate,
                                  gain_setting=gain_setting,
                                  integration_time=integration_time)

 db_modules = get_pdu_from_db(karabo_id, karabo_da, Constants.AGIPD.SlopesPC(),
                             condition, cal_db_interface,
                             snapshot_at=creation_time)
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = proposal + ' ' + ' '.join(list(map(str,runs)))

 report = get_report(metadata_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 md = None

 if db_output:
    for mod, pdu in zip(modules, db_modules):
        for const in ["SlopesPC", "BadPixelsPC"]:

            dbconst = getattr(Constants.AGIPD, const)()

            if const == "SlopesPC":
                dbconst.data = slope_dict_to_arr(fit_data[mod])
            else:
                dbconst.data = bad_pixels[mod]


            md = send_to_db(pdu, karabo_id, dbconst, condition,
                            file_loc, report, cal_db_interface,
                            creation_time=creation_time,
                           variant=1)

    print("Constants injected with the following conditions:\n")
    print(f"• memory_cells: {mem_cells}\n• bias_voltage: {bias_voltage}\n"
          f"• acquisition_rate: {acq_rate}\n• gain_setting: {gain_setting}\n"
          f"• integration_time: {integration_time}\n"
          f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 else:
    print('Injection to DB not requested.')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Remove keys which won't be used for comparison plots and add BP to the rest of data
 for mod in modules:
    fit_data[mod]['BadPixelsPC'] = bad_pixels[mod]

    for key in keys_file:
        if key not in keys:
            del fit_data[mod][key]

    for key in keys:
        fit_data[mod][key] = fit_data[mod][key].swapaxes(1,2)
 ```

 %% Cell type:code id: tags:

 ``` python
 def retrieve_old_PC(mod):
    dconst = getattr(Constants.AGIPD, 'SlopesPC')()
    old_PC = get_from_db(karabo_id=karabo_id,
            karabo_da=karabo_da[mod],
            constant=dconst,
            condition=condition,
            empty_constant=None,
            cal_db_interface=cal_db_interface,
            creation_time=creation_time-timedelta(seconds=1) if creation_time else None,
            strategy="pdu_prior_in_time",
            verbosity=1,
            timeout=cal_db_timeout)
    return old_PC

 with multiprocessing.Pool(processes=len(modules)) as pool:
    old_PC_consts = pool.map(retrieve_old_PC, range(len(modules)))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create the arrays that will be used for figures.
 # Each array correponds to the data for all processed modules.

 pixel_range = [0,0,512,128]
 const_data = {}
 old_const = {}
 const_order = [0, 1, 3, 4]

 for (key, c) in zip(keys, const_order):
    const_data[key] = np.full((nmods, mem_cells, 512, 128), np.nan)
    old_const[key] = np.full((nmods, mem_cells, 512, 128), np.nan)
-    for cnt, i in enumerate(modules):
-        if key in fit_data[i]:
-            const_data[key][i,:,pixel_range[0]:pixel_range[2],
-                               pixel_range[1]:pixel_range[3]] = fit_data[i][key]
+    for cnt, mn in enumerate(modules):
+        if key in fit_data[mn]:
+            const_data[key][cnt,:,pixel_range[0]:pixel_range[2],
+                               pixel_range[1]:pixel_range[3]] = fit_data[mn][key]
            if np.any(old_PC_consts[0][0]):
-                old_const[key][i,:,pixel_range[0]:pixel_range[2],
+                old_const[key][cnt,:,pixel_range[0]:pixel_range[2],
                               pixel_range[1]:pixel_range[3]] = old_PC_consts[cnt][0][c].swapaxes(1,2)

 const_data['BadPixelsPC'] = np.full((nmods, mem_cells, 512, 128), np.nan)
-for i in modules:
-    const_data['BadPixelsPC'][i,:,pixel_range[0]:pixel_range[2],
-                              pixel_range[1]:pixel_range[3]] = fit_data[i]['BadPixelsPC']
-```
-
-%% Cell type:code id: tags:
-
-``` python
-#Define AGIPD geometry
-if instrument == "HED":
-    geom = AGIPD_500K2GGeometry.from_origin()
-else:
-    geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
-        (-525, 625),
-        (-550, -10),
-        (520, -160),
-        (542.5, 475),
-    ])
+for cnt, mn in enumerate(modules):
+    const_data['BadPixelsPC'][cnt,:,pixel_range[0]:pixel_range[2],
+                              pixel_range[1]:pixel_range[3]] = fit_data[mn]['BadPixelsPC']
 ```

 %% Cell type:markdown id: tags:

 ## Summary across pixels ##

 %% Cell type:code id: tags:

 ``` python
 gain_data = {'HG': {},
             'MG': {},
             'BP': {}
            }

 old_gain_data = {'HG': {},
                 'MG': {}
                }

 for key in ['ml', 'bl']:
    gain_data['HG'][key] = const_data[key]
    old_gain_data['HG'][key] = old_const[key]
 for key in ['mh', 'bh']:
    gain_data['MG'][key] = const_data[key]
    old_gain_data['MG'][key] = old_const[key]
 gain_data['BP']['BadPixelsPC'] = const_data['BadPixelsPC']
 ```

 %% Cell type:code id: tags:

 ``` python
 def plot_definition(data, g, key):
    titel = ['Difference to previous', 'Percentage difference']
    gs = gridspec.GridSpec(1, 2)
    fig = plt.figure(figsize=(15, 7))
    plt.suptitle(f'{g}', fontsize=16)

    for pos in range(0,2):
        vmin, vmax = get_range(data[pos], 2)
        vmax = max(vmax, abs(vmin))
        ax = fig.add_subplot(gs[0, pos])
        ticks = [-1, 0, 1] if np.isnan(vmin) else [vmin, (vmin+vmax)/2, vmax]
        geom.plot_data_fast(data[pos],
                                vmin=vmin, vmax=vmax, ax=ax, cmap="RdBu", figsize=(13,7),
                                colorbar={'shrink': 1,
                                          'pad': 0.04,
                                          'fraction': 0.1,
-
                                         })
        colorbar = ax.images[0].colorbar
        colorbar.ax.set_yticklabels(["{:.1f}".format(tk) for tk in colorbar.get_ticks()])
        if pos == 1:
            colorbar.set_label('%')
        ax.set_title(f"{titel[pos]}: {key}", fontsize=14)
        ax.set_xlabel("Columns", fontsize=13)
        ax.set_ylabel("Rows", fontsize=13)

 def plot_diff_consts(old_const, new_const, g, ratio=False):
    if ratio:
        old_data = old_const['HG']['ml'] / old_const['MG']['mh']
        new_data = new_const['HG']['ml'] / new_const['MG']['mh']
        data1 = np.nanmean((new_data - old_data), axis=1)
        data2 = np.nanmean((new_data - old_data)/old_data*100, axis=1)
        data = [data1, data2]
        key ='Slopes ratio HG/MG'
        plot_definition(data, g, key)
    else:
        for i, key in enumerate(old_const[g].keys()):
            data1 = np.nanmean((new_const[g][key] - old_const[g][key]), axis=1)
            data2 = np.nanmean((new_const[g][key] - old_const[g][key])/old_const[g][key]*100, axis=1)
            data = [data1, data2]
            plot_definition(data, g, key)

 ```

 %% Cell type:code id: tags:

 ``` python
 for gain in old_gain_data.keys():
    plot_diff_consts(old_gain_data, gain_data, gain)
 ```

 %% Cell type:code id: tags:

 ``` python
 plot_diff_consts(old_gain_data, gain_data, 'Ratio HG/MG', ratio=True)
 ```

 %% Cell type:code id: tags:

 ``` python
 g_label = ['High gain', 'Medium gain', 'Bad pixels PC']
 for idx, g in enumerate(gain_data.keys()):
    gs = gridspec.GridSpec(1, 2)
    fig = plt.figure(figsize=(15, 7))
    plt.suptitle(f'{g_label[idx]}', fontsize=16)

    for i, key in enumerate(gain_data[g].keys()):
        if key is 'BadPixelsPC':
            data = np.nanmean(gain_data[g][key]>0, axis=1)
            vmin, vmax = (0,1)
            ax = fig.add_subplot(gs[0, :])
            ticks = [0, 0.5, 1]

        else:
            data = np.nanmean(gain_data[g][key], axis=1)
            vmin, vmax = get_range(data, 5)
            ax = fig.add_subplot(gs[0, i])
        geom.plot_data_fast(data,
                                vmin=vmin, vmax=vmax, ax=ax, cmap="jet", figsize=(13,7),
                                colorbar={'shrink': 1,
                                          'pad': 0.04,
                                          'fraction': 0.1,

                                         })
        colorbar = ax.images[0].colorbar
        ax.set_title(key, fontsize=14)
        ax.set_xlabel('Columns', fontsize=13)
        ax.set_ylabel('Rows', fontsize=13)

    plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Summary across cells ##

 Good pixels only.

 %% Cell type:code id: tags:

 ``` python
 ratio = gain_data['HG']['ml'] / gain_data['MG']['mh']

 fig = plt.figure(figsize=(7, 7))
 ax = fig.add_subplot(111)
 data = np.nanmean(ratio, axis=1)
 vmin, vmax = get_range(data, 5)
 ax = geom.plot_data_fast(data,
                                vmin=vmin, vmax=vmax, ax=ax, cmap="jet", figsize=(6,7),
                                colorbar={'shrink': 1,
                                          'pad': 0.04,
                                          'fraction': 0.1
                                         })
 colorbar = ax.images[0].colorbar
 colorbar.set_label('HG slope / MG slope', fontsize=13)
 ax.set_title('High/Medium Gain Slope Ratio', fontsize=14)
 ax.set_xlabel('Columns', fontsize=13)
 ax.set_ylabel('Rows', fontsize=13)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 for idx, g in enumerate(gain_data.keys()):

    for key in gain_data[g].keys():
        data = np.copy(gain_data[g][key])
        if key=='BadPixelsPC':
            data = data>0
        else:
            data[gain_data['BP']['BadPixelsPC']>0] = np.nan

        d = []
        for i in range(nmods):
            d.append({'x': np.arange(data[i].shape[0]),
                      'y': np.nanmean(data[i], axis=(1,2)),
                      'drawstyle': 'steps-pre',
-                      'label': f'{i}',
+                      'label': f'{modules[i]}',
                      'linewidth': 2,
                      'linestyle': '--' if i>7 else '-'
                      })

        fig = plt.figure(figsize=(12, 6))
        plt.suptitle(f'{g_label[idx]} - {key}', fontsize=16)
        ax = fig.add_subplot(111)

        _ = simplePlot(d, xrange=(-12, 510),
                            x_label='Memory Cell ID',
                            y_label=key,
                            use_axis=ax,
                            legend='top-left-frame-ncol8',)
        ylim = ax.get_ylim()
        ax.set_ylim(ylim[0], ylim[1] + np.abs(ylim[1]-ylim[0])*0.2)
        ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 d = []
 for i in range(nmods):
    d.append({'x': np.arange(ratio[i].shape[0]),
-              'y': np.nanmean(ratio[i], axis=(1,2)),
+              'y': np.nanmedian(ratio[i], axis=(1,2)),
              'drawstyle': 'steps-pre',
-              'label': f'{i}',
+              'label': f'{modules[i]}',
              'linewidth': 2,
              'linestyle': '--' if i>7 else '-'
              })

 fig = plt.figure(figsize=(12, 6))
 plt.suptitle('High/Medium Gain Slope Ratio', fontsize=16)
 ax = fig.add_subplot(111)

 _ = simplePlot(d, xrange=(-12, 510),
                    x_label='Memory Cell ID',
                    y_label='Gain ratio ml/mh',
                    use_axis=ax,
                    legend='top-left-frame-ncol8',)
 ylim = ax.get_ylim()
 ax.set_ylim(ylim[0], ylim[1] + np.abs(ylim[1]-ylim[0])*0.2)
 ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 table = []
 ratio_old = old_gain_data['HG']['ml'] / old_gain_data['MG']['mh']
-for mod in modules:
+for cnt, mod in enumerate(modules):

    table.append((mod,
-                  f"{np.nanmean(ratio[mod]):0.1f} +- {np.nanstd(ratio[mod]):0.2f}",
-                  f"{np.nanmean(ratio_old[mod]):0.1f} +- {np.nanstd(ratio_old[mod]):0.2f}",
-                  f"{np.nanmean(gain_data['BP']['BadPixelsPC'][mod]>0)*100:0.1f} ({np.nansum(gain_data['BP']['BadPixelsPC'][mod]>0)})"
+                  f"{np.nanmedian(ratio[cnt]):0.1f} +- {np.nanstd(ratio[cnt]):0.2f}",
+                  f"{np.nanmedian(ratio_old[cnt]):0.1f} +- {np.nanstd(ratio_old[cnt]):0.2f}",
+                  f"{np.nanmedian(gain_data['BP']['BadPixelsPC'][cnt]>0)*100:0.1f} ({np.nansum(gain_data['BP']['BadPixelsPC'][cnt]>0)})"
                ))

 all_HM = []
 all_HM_old = []
-for mod in modules:
+for mod in range(len(modules)):
    all_HM.extend(ratio[mod])
    all_HM_old.extend(ratio_old[mod])
 all_HM = np.array(all_HM)
 all_HM_old = np.array(all_HM_old)

 all_MSK = np.array([list(msk) for msk in gain_data['BP']['BadPixelsPC']])

 table.append(('overall',
              f"{np.nanmean(all_HM):0.1f} +- {np.nanstd(all_HM):0.2f}",
              f"{np.nanmean(all_HM_old):0.1f} +- {np.nanstd(all_HM_old):0.2f}",
              f"{np.nanmean(all_MSK>0)*100:0.1f} ({np.nansum(all_MSK>0)})"
            ))

 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Module",
                                              "HG/MG Ratio",
                                              "Previous HG/MG Ratio",
                                              "Bad pixels [%(Count)]"])))
 ```

 %% Cell type:markdown id: tags:

 ## Summary high-medium gain ratio (good pixels only) + histograms

 %% Cell type:code id: tags:

 ``` python
 colors = cm.rainbow(np.linspace(0, 1, nmods))

 gs = gridspec.GridSpec(1, 2)
 fig = plt.figure(figsize=(17, 8))


 ratio[gain_data['BP']['BadPixelsPC'] > 0] = np.nan
 data = np.nanmean(ratio, axis=1)
 vmin, vmax = get_range(data, 5)
 ax = fig.add_subplot(gs[0, 0])
 geom.plot_data_fast(data,
                        vmin=vmin, vmax=vmax, ax=ax, cmap="jet", figsize=(12.5,7),
                        colorbar={'shrink': 1,
                                  'pad': 0.04,
                                  'fraction': 0.1
                                 })
 colorbar = ax.images[0].colorbar
 colorbar.set_label('HG/MG', fontsize=12)
 ax.set_xlabel('Columns', fontsize=12)
 ax.set_ylabel('Rows', fontsize=12)

 ax = fig.add_subplot(gs[0,1])
-for mod in modules:
-    h, e = np.histogram(ratio[mod].flatten(), bins=100, range=(vmin, vmax))
-    ax.plot(e[:-1], h, color=colors[mod],linewidth=2, label=f'{mod}', alpha=0.8)
+for cnt, mod in enumerate(modules):
+    h, e = np.histogram(ratio[cnt].flatten(), bins=100, range=(vmin, vmax))
+    ax.plot(e[:-1], h, color=colors[cnt],linewidth=2, label=f'{mod}', alpha=0.8)
    ax.set_xlabel('High/Medium Gain Ratio', fontsize=13)
    ax.set_ylabel('Counts', fontsize=13)
    plt.ticklabel_format(axis='y', style='sci', scilimits=(0,0))
 ax.grid()
 ax.legend()
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 # Pulse Capacitor Characterisation Summary

 This notebook is used as a dependency notebook for a pulse capacitor characterisation to provide summary for all modules of the AGIPD.

 %% Cell type:code id: tags:

 ``` python
-in_folder = "/gpfs/exfel/d/raw/SPB/202331/p900376/" # path to input data, required
-out_folder = "/gpfs/exfel/exp/SPB/202331/p900376/usr/PC/temp_test/12on/" # path to output to, required
+in_folder = "/gpfs/exfel/exp/HED/202430/p900438/raw/" # path to input data, required
+out_folder = "/gpfs/exfel/exp/HED/202430/p900438/scratch/rovensky/pc/" # path to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 runs = [] # runs to use, required, range allowed

-karabo_id_control = "SPB_IRU_AGIPD1M1"  # karabo-id for the control device e.g. "MID_EXP_AGIPD1M1", or "SPB_IRU_AGIPD1M1"
-karabo_id = "SPB_DET_AGIPD1M-1"
+karabo_id_control = "HED_DET_AGIPD65K1"  # karabo-id for the control device e.g. "MID_EXP_AGIPD1M1", or "SPB_IRU_AGIPD1M1"
+karabo_id = "HED_DET_AGIPD65K1"
 ctrl_source_template = '{}/MDL/FPGA_COMP' # path to control information

 creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
 cal_db_timeout = 3000000 # timeout on caldb requests"
 cal_db_interface = "tcp://max-exfl-cal001:8015#8045"
 db_output = False

 bias_voltage = -1  # detector bias voltage, negative values for auto-detection.
 mem_cells = -1  # number of memory cells used, negative values for auto-detection.
 acq_rate = -1.  # the detector acquisition rate, negative values for auto-detection.
 gain_setting = -1  # gain setting can have value 0 or 1, negative values for auto-detection.
 integration_time = -1  # integration time, negative values for auto-detection.
 ```

 %% Cell type:code id: tags:

 ``` python
 import warnings
 warnings.filterwarnings('ignore')

-import os
+from pathlib import Path
 from dateutil import parser
 from datetime import timedelta

 import h5py
 import matplotlib.pyplot as plt
 import matplotlib.gridspec as gridspec
 import matplotlib.cm as cm
 import numpy as np
 import tabulate
 import multiprocessing

 from cal_tools.agipdlib import AgipdCtrl
 from cal_tools.ana_tools import get_range
 from cal_tools.tools import (
    calcat_creation_time,
    module_index_to_qm,
    get_from_db,
    get_pdu_from_db,
    get_report,
    send_to_db
 )
+from cal_tools.plotting import agipd_single_module_geometry

 from extra_data import RunDirectory
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from IPython.display import Latex, display
 from XFELDetAna.plotting.simpleplot import simplePlot

 from iCalibrationDB import Conditions, Constants

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate creation time
 creation_time = calcat_creation_time(in_folder, runs[0], creation_time)
 offset = parser.parse(creation_date_offset)
 delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
 creation_time += delta
 print(f"Creation time: {creation_time}\n")

 # Get operation conditions
 ctrl_source = ctrl_source_template.format(karabo_id_control)
 run_folder = f'{in_folder}/r{runs[0]:04d}/'
 raw_dc = RunDirectory(run_folder)
-
-# Read operating conditions from AGIPD00 files
-instrument_src_mod = [
-    s for s in list(raw_dc.all_sources) if "0CH" in s][0]
-ctrl_src = [
-    s for s in list(raw_dc.all_sources) if ctrl_source in s][0]
+instrument_src_mod = list(raw_dc.detector_sources)[0]

 agipd_cond = AgipdCtrl(
    run_dc=raw_dc,
    image_src=instrument_src_mod,
-    ctrl_src=ctrl_src,
+    ctrl_src=ctrl_source,
    raise_error=False,  # to be able to process very old data without mosetting value
 )
 if mem_cells == -1:
    mem_cells = agipd_cond.get_num_cells()
 if mem_cells is None:
    raise ValueError(f"No raw images found in {run_folder}")
 if acq_rate == -1.:
    acq_rate = agipd_cond.get_acq_rate()
 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == -1:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()

 # Evaluate detector instance for mapping
-instrument = karabo_id.split("_")[0]
-if instrument == "HED":
-    nmods = 8
-else:
-    nmods = 16
+instance = karabo_id_control.split("_")[-1]
+agipd_instances = {'AGIPD65K1': [1, agipd_single_module_geometry()],
+                   'AGIPD500K2G': [8, AGIPD_500K2GGeometry.from_origin()],
+                   'AGIPD1M1': [16, AGIPD_1MGeometry.from_quad_positions(quad_pos=[(-525, 625),
+                                                                                   (-550, -10),
+                                                                                   (520, -160),
+                                                                                   (542.5, 475),
+                                                                                  ])]
+                  }
+
+try:
+    nmods = agipd_instances[instance][0]
+    geom =  agipd_instances[instance][1]
+except KeyError as ke:
+    print('The provided AGIPD instance is not recognised. Available instances are: \
+    AGIPD65K1,\nAGIPD500K2G,\nAGIPD1M1')

 print(f"Using {creation_time} as creation time\n")
 print(f"Operating conditions are:\n• Bias voltage: {bias_voltage}\n• Memory cells: {mem_cells}\n"
      f"• Acquisition rate: {acq_rate}\n• Gain setting: {gain_setting}\n• Integration time: {integration_time}\n"
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # ml - high gain slope
 # bl - high gain intercept
 # devl - absolute relative deviation from linearity for high gain
 # mh - medium gain slope
 # bh - medium gain intercept
 # oh, ch, ah - parameters of hook function fit to medium gain (only if requested)
 # devh - absolute relative deviation from linearity for linear part of medium gain

 keys = ['ml', 'bl', 'mh', 'bh', 'BadPixelsPC']
 keys_file = ["ml", "bl", "devl", "mh", "bh", "oh", "ch", "ah", "devh"]

 fit_data = {}
 bad_pixels = {}
 modules = []
 karabo_da = []
+constants_files = []

-for mod in range(nmods):
+out_folder = Path(out_folder)
+constants_files = sorted(out_folder.glob('*.h5'),
+                         key=lambda f: (len(f.stem), f.stem))
+
+for f in constants_files:
+    mod = int(f.stem.split("_")[-1])
    qm = module_index_to_qm(mod)
    fit_data[mod] = {}
-    constants_file = f'{out_folder}/agipd_pc_store_{"_".join([str(run) for run in runs])}_{mod}_{mod}.h5'
-
-    if os.path.exists(constants_file):
-        print(f'Trying to find: {constants_file}')
-        print(f'Data available for module {qm}\n')
-        with h5py.File(constants_file, 'r') as hf:
-            bad_pixels[mod] = hf[f'/{qm}/BadPixelsPC/0/data'][()]
-            for key in keys_file:
-                fit_data[mod][key]= hf[f'/{qm}/{key}/0/data'][()]
-
-        modules.append(mod)
-        karabo_da.append(f"AGIPD{mod:02d}")
-    else:
-        print(f"No fit data available for module {qm}\n")
+    with h5py.File(f, 'r') as hf:
+        bad_pixels[mod] = hf[f'/{qm}/BadPixelsPC/0/data'][()]
+        for key in keys_file:
+            fit_data[mod][key]= hf[f'/{qm}/{key}/0/data'][()]
+
+    modules.append(mod)
+    karabo_da.append(f"AGIPD{mod:02d}")
+    print(f'Data available for AGIPD{mod:02d}')
 ```

 %% Cell type:code id: tags:

 ``` python
 def slope_dict_to_arr(d):
    key_to_index = {
                    "ml": 0,
                    "bl": 1,
                    "devl": 2,
                    "mh": 3,
                    "bh": 4,
                    "oh": 5,
                    "ch": 6,
                    "ah": 7,
                    "devh": 8,
                    "tresh": 9
    }

    arr = np.zeros([11]+list(d["ml"].shape), np.float32)
    for key, item in d.items():
        if key not in key_to_index:
            continue
        arr[key_to_index[key],...] = item

    return arr
 ```

 %% Cell type:code id: tags:

 ``` python
 # set the operating condition
 condition = Conditions.Dark.AGIPD(memory_cells=mem_cells,
                                  bias_voltage=bias_voltage,
                                  acquisition_rate=acq_rate,
                                  gain_setting=gain_setting,
                                  integration_time=integration_time)

 db_modules = get_pdu_from_db(karabo_id, karabo_da, Constants.AGIPD.SlopesPC(),
                             condition, cal_db_interface,
                             snapshot_at=creation_time)
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = proposal + ' ' + ' '.join(list(map(str,runs)))

 report = get_report(metadata_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 md = None

 if db_output:
    for mod, pdu in zip(modules, db_modules):
        for const in ["SlopesPC", "BadPixelsPC"]:

            dbconst = getattr(Constants.AGIPD, const)()

            if const == "SlopesPC":
                dbconst.data = slope_dict_to_arr(fit_data[mod])
            else:
                dbconst.data = bad_pixels[mod]


            md = send_to_db(pdu, karabo_id, dbconst, condition,
                            file_loc, report, cal_db_interface,
                            creation_time=creation_time,
                           variant=1)

    print("Constants injected with the following conditions:\n")
    print(f"• memory_cells: {mem_cells}\n• bias_voltage: {bias_voltage}\n"
          f"• acquisition_rate: {acq_rate}\n• gain_setting: {gain_setting}\n"
          f"• integration_time: {integration_time}\n"
          f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 else:
    print('Injection to DB not requested.')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Remove keys which won't be used for comparison plots and add BP to the rest of data
 for mod in modules:
    fit_data[mod]['BadPixelsPC'] = bad_pixels[mod]

    for key in keys_file:
        if key not in keys:
            del fit_data[mod][key]

    for key in keys:
        fit_data[mod][key] = fit_data[mod][key].swapaxes(1,2)
 ```

 %% Cell type:code id: tags:

 ``` python
 def retrieve_old_PC(mod):
    dconst = getattr(Constants.AGIPD, 'SlopesPC')()
    old_PC = get_from_db(karabo_id=karabo_id,
            karabo_da=karabo_da[mod],
            constant=dconst,
            condition=condition,
            empty_constant=None,
            cal_db_interface=cal_db_interface,
            creation_time=creation_time-timedelta(seconds=1) if creation_time else None,
            strategy="pdu_prior_in_time",
            verbosity=1,
            timeout=cal_db_timeout)
    return old_PC

 with multiprocessing.Pool(processes=len(modules)) as pool:
    old_PC_consts = pool.map(retrieve_old_PC, range(len(modules)))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create the arrays that will be used for figures.
 # Each array correponds to the data for all processed modules.

 pixel_range = [0,0,512,128]
 const_data = {}
 old_const = {}
 const_order = [0, 1, 3, 4]

 for (key, c) in zip(keys, const_order):
    const_data[key] = np.full((nmods, mem_cells, 512, 128), np.nan)
    old_const[key] = np.full((nmods, mem_cells, 512, 128), np.nan)
-    for cnt, i in enumerate(modules):
-        if key in fit_data[i]:
-            const_data[key][i,:,pixel_range[0]:pixel_range[2],
-                               pixel_range[1]:pixel_range[3]] = fit_data[i][key]
+    for cnt, mn in enumerate(modules):
+        if key in fit_data[mn]:
+            const_data[key][cnt,:,pixel_range[0]:pixel_range[2],
+                               pixel_range[1]:pixel_range[3]] = fit_data[mn][key]
            if np.any(old_PC_consts[0][0]):
-                old_const[key][i,:,pixel_range[0]:pixel_range[2],
+                old_const[key][cnt,:,pixel_range[0]:pixel_range[2],
                               pixel_range[1]:pixel_range[3]] = old_PC_consts[cnt][0][c].swapaxes(1,2)

 const_data['BadPixelsPC'] = np.full((nmods, mem_cells, 512, 128), np.nan)
-for i in modules:
-    const_data['BadPixelsPC'][i,:,pixel_range[0]:pixel_range[2],
-                              pixel_range[1]:pixel_range[3]] = fit_data[i]['BadPixelsPC']
-```
-
-%% Cell type:code id: tags:
-
-``` python
-#Define AGIPD geometry
-if instrument == "HED":
-    geom = AGIPD_500K2GGeometry.from_origin()
-else:
-    geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
-        (-525, 625),
-        (-550, -10),
-        (520, -160),
-        (542.5, 475),
-    ])
+for cnt, mn in enumerate(modules):
+    const_data['BadPixelsPC'][cnt,:,pixel_range[0]:pixel_range[2],
+                              pixel_range[1]:pixel_range[3]] = fit_data[mn]['BadPixelsPC']
 ```

 %% Cell type:markdown id: tags:

 ## Summary across pixels ##

 %% Cell type:code id: tags:

 ``` python
 gain_data = {'HG': {},
             'MG': {},
             'BP': {}
            }

 old_gain_data = {'HG': {},
                 'MG': {}
                }

 for key in ['ml', 'bl']:
    gain_data['HG'][key] = const_data[key]
    old_gain_data['HG'][key] = old_const[key]
 for key in ['mh', 'bh']:
    gain_data['MG'][key] = const_data[key]
    old_gain_data['MG'][key] = old_const[key]
 gain_data['BP']['BadPixelsPC'] = const_data['BadPixelsPC']
 ```

 %% Cell type:code id: tags:

 ``` python
 def plot_definition(data, g, key):
    titel = ['Difference to previous', 'Percentage difference']
    gs = gridspec.GridSpec(1, 2)
    fig = plt.figure(figsize=(15, 7))
    plt.suptitle(f'{g}', fontsize=16)

    for pos in range(0,2):
        vmin, vmax = get_range(data[pos], 2)
        vmax = max(vmax, abs(vmin))
        ax = fig.add_subplot(gs[0, pos])
        ticks = [-1, 0, 1] if np.isnan(vmin) else [vmin, (vmin+vmax)/2, vmax]
        geom.plot_data_fast(data[pos],
                                vmin=vmin, vmax=vmax, ax=ax, cmap="RdBu", figsize=(13,7),
                                colorbar={'shrink': 1,
                                          'pad': 0.04,
                                          'fraction': 0.1,
-
                                         })
        colorbar = ax.images[0].colorbar
        colorbar.ax.set_yticklabels(["{:.1f}".format(tk) for tk in colorbar.get_ticks()])
        if pos == 1:
            colorbar.set_label('%')
        ax.set_title(f"{titel[pos]}: {key}", fontsize=14)
        ax.set_xlabel("Columns", fontsize=13)
        ax.set_ylabel("Rows", fontsize=13)

 def plot_diff_consts(old_const, new_const, g, ratio=False):
    if ratio:
        old_data = old_const['HG']['ml'] / old_const['MG']['mh']
        new_data = new_const['HG']['ml'] / new_const['MG']['mh']
        data1 = np.nanmean((new_data - old_data), axis=1)
        data2 = np.nanmean((new_data - old_data)/old_data*100, axis=1)
        data = [data1, data2]
        key ='Slopes ratio HG/MG'
        plot_definition(data, g, key)
    else:
        for i, key in enumerate(old_const[g].keys()):
            data1 = np.nanmean((new_const[g][key] - old_const[g][key]), axis=1)
            data2 = np.nanmean((new_const[g][key] - old_const[g][key])/old_const[g][key]*100, axis=1)
            data = [data1, data2]
            plot_definition(data, g, key)

 ```

 %% Cell type:code id: tags:

 ``` python
 for gain in old_gain_data.keys():
    plot_diff_consts(old_gain_data, gain_data, gain)
 ```

 %% Cell type:code id: tags:

 ``` python
 plot_diff_consts(old_gain_data, gain_data, 'Ratio HG/MG', ratio=True)
 ```

 %% Cell type:code id: tags:

 ``` python
 g_label = ['High gain', 'Medium gain', 'Bad pixels PC']
 for idx, g in enumerate(gain_data.keys()):
    gs = gridspec.GridSpec(1, 2)
    fig = plt.figure(figsize=(15, 7))
    plt.suptitle(f'{g_label[idx]}', fontsize=16)

    for i, key in enumerate(gain_data[g].keys()):
        if key is 'BadPixelsPC':
            data = np.nanmean(gain_data[g][key]>0, axis=1)
            vmin, vmax = (0,1)
            ax = fig.add_subplot(gs[0, :])
            ticks = [0, 0.5, 1]

        else:
            data = np.nanmean(gain_data[g][key], axis=1)
            vmin, vmax = get_range(data, 5)
            ax = fig.add_subplot(gs[0, i])
        geom.plot_data_fast(data,
                                vmin=vmin, vmax=vmax, ax=ax, cmap="jet", figsize=(13,7),
                                colorbar={'shrink': 1,
                                          'pad': 0.04,
                                          'fraction': 0.1,

                                         })
        colorbar = ax.images[0].colorbar
        ax.set_title(key, fontsize=14)
        ax.set_xlabel('Columns', fontsize=13)
        ax.set_ylabel('Rows', fontsize=13)

    plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Summary across cells ##

 Good pixels only.

 %% Cell type:code id: tags:

 ``` python
 ratio = gain_data['HG']['ml'] / gain_data['MG']['mh']

 fig = plt.figure(figsize=(7, 7))
 ax = fig.add_subplot(111)
 data = np.nanmean(ratio, axis=1)
 vmin, vmax = get_range(data, 5)
 ax = geom.plot_data_fast(data,
                                vmin=vmin, vmax=vmax, ax=ax, cmap="jet", figsize=(6,7),
                                colorbar={'shrink': 1,
                                          'pad': 0.04,
                                          'fraction': 0.1
                                         })
 colorbar = ax.images[0].colorbar
 colorbar.set_label('HG slope / MG slope', fontsize=13)
 ax.set_title('High/Medium Gain Slope Ratio', fontsize=14)
 ax.set_xlabel('Columns', fontsize=13)
 ax.set_ylabel('Rows', fontsize=13)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 for idx, g in enumerate(gain_data.keys()):

    for key in gain_data[g].keys():
        data = np.copy(gain_data[g][key])
        if key=='BadPixelsPC':
            data = data>0
        else:
            data[gain_data['BP']['BadPixelsPC']>0] = np.nan

        d = []
        for i in range(nmods):
            d.append({'x': np.arange(data[i].shape[0]),
                      'y': np.nanmean(data[i], axis=(1,2)),
                      'drawstyle': 'steps-pre',
-                      'label': f'{i}',
+                      'label': f'{modules[i]}',
                      'linewidth': 2,
                      'linestyle': '--' if i>7 else '-'
                      })

        fig = plt.figure(figsize=(12, 6))
        plt.suptitle(f'{g_label[idx]} - {key}', fontsize=16)
        ax = fig.add_subplot(111)

        _ = simplePlot(d, xrange=(-12, 510),
                            x_label='Memory Cell ID',
                            y_label=key,
                            use_axis=ax,
                            legend='top-left-frame-ncol8',)
        ylim = ax.get_ylim()
        ax.set_ylim(ylim[0], ylim[1] + np.abs(ylim[1]-ylim[0])*0.2)
        ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 d = []
 for i in range(nmods):
    d.append({'x': np.arange(ratio[i].shape[0]),
-              'y': np.nanmean(ratio[i], axis=(1,2)),
+              'y': np.nanmedian(ratio[i], axis=(1,2)),
              'drawstyle': 'steps-pre',
-              'label': f'{i}',
+              'label': f'{modules[i]}',
              'linewidth': 2,
              'linestyle': '--' if i>7 else '-'
              })

 fig = plt.figure(figsize=(12, 6))
 plt.suptitle('High/Medium Gain Slope Ratio', fontsize=16)
 ax = fig.add_subplot(111)

 _ = simplePlot(d, xrange=(-12, 510),
                    x_label='Memory Cell ID',
                    y_label='Gain ratio ml/mh',
                    use_axis=ax,
                    legend='top-left-frame-ncol8',)
 ylim = ax.get_ylim()
 ax.set_ylim(ylim[0], ylim[1] + np.abs(ylim[1]-ylim[0])*0.2)
 ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 table = []
 ratio_old = old_gain_data['HG']['ml'] / old_gain_data['MG']['mh']
-for mod in modules:
+for cnt, mod in enumerate(modules):

    table.append((mod,
-                  f"{np.nanmean(ratio[mod]):0.1f} +- {np.nanstd(ratio[mod]):0.2f}",
-                  f"{np.nanmean(ratio_old[mod]):0.1f} +- {np.nanstd(ratio_old[mod]):0.2f}",
-                  f"{np.nanmean(gain_data['BP']['BadPixelsPC'][mod]>0)*100:0.1f} ({np.nansum(gain_data['BP']['BadPixelsPC'][mod]>0)})"
+                  f"{np.nanmedian(ratio[cnt]):0.1f} +- {np.nanstd(ratio[cnt]):0.2f}",
+                  f"{np.nanmedian(ratio_old[cnt]):0.1f} +- {np.nanstd(ratio_old[cnt]):0.2f}",
+                  f"{np.nanmedian(gain_data['BP']['BadPixelsPC'][cnt]>0)*100:0.1f} ({np.nansum(gain_data['BP']['BadPixelsPC'][cnt]>0)})"
                ))

 all_HM = []
 all_HM_old = []
-for mod in modules:
+for mod in range(len(modules)):
    all_HM.extend(ratio[mod])
    all_HM_old.extend(ratio_old[mod])
 all_HM = np.array(all_HM)
 all_HM_old = np.array(all_HM_old)

 all_MSK = np.array([list(msk) for msk in gain_data['BP']['BadPixelsPC']])

 table.append(('overall',
              f"{np.nanmean(all_HM):0.1f} +- {np.nanstd(all_HM):0.2f}",
              f"{np.nanmean(all_HM_old):0.1f} +- {np.nanstd(all_HM_old):0.2f}",
              f"{np.nanmean(all_MSK>0)*100:0.1f} ({np.nansum(all_MSK>0)})"
            ))

 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Module",
                                              "HG/MG Ratio",
                                              "Previous HG/MG Ratio",
                                              "Bad pixels [%(Count)]"])))
 ```

 %% Cell type:markdown id: tags:

 ## Summary high-medium gain ratio (good pixels only) + histograms

 %% Cell type:code id: tags:

 ``` python
 colors = cm.rainbow(np.linspace(0, 1, nmods))

 gs = gridspec.GridSpec(1, 2)
 fig = plt.figure(figsize=(17, 8))


 ratio[gain_data['BP']['BadPixelsPC'] > 0] = np.nan
 data = np.nanmean(ratio, axis=1)
 vmin, vmax = get_range(data, 5)
 ax = fig.add_subplot(gs[0, 0])
 geom.plot_data_fast(data,
                        vmin=vmin, vmax=vmax, ax=ax, cmap="jet", figsize=(12.5,7),
                        colorbar={'shrink': 1,
                                  'pad': 0.04,
                                  'fraction': 0.1
                                 })
 colorbar = ax.images[0].colorbar
 colorbar.set_label('HG/MG', fontsize=12)
 ax.set_xlabel('Columns', fontsize=12)
 ax.set_ylabel('Rows', fontsize=12)

 ax = fig.add_subplot(gs[0,1])
-for mod in modules:
-    h, e = np.histogram(ratio[mod].flatten(), bins=100, range=(vmin, vmax))
-    ax.plot(e[:-1], h, color=colors[mod],linewidth=2, label=f'{mod}', alpha=0.8)
+for cnt, mod in enumerate(modules):
+    h, e = np.histogram(ratio[cnt].flatten(), bins=100, range=(vmin, vmax))
+    ax.plot(e[:-1], h, color=colors[cnt],linewidth=2, label=f'{mod}', alpha=0.8)
    ax.set_xlabel('High/Medium Gain Ratio', fontsize=13)
    ax.set_ylabel('Counts', fontsize=13)
    plt.ticklabel_format(axis='y', style='sci', scilimits=(0,0))
 ax.grid()
 ax.legend()
 plt.show()
 ```

--- a/setup.py
+++ b/setup.py
@@ -58,7 +58,7 @@ install_requires = [
        "markupsafe==2.0.1",
        "astcheck==0.3.0",
        "cfelpyutils==2.0.6",
-        "calibration_client==11.2.0",
+        "calibration_client==11.3.0",
        "dill==0.3.0",
        "docutils==0.17.1",
        "dynaconf==3.1.4",

--- a/src/cal_tools/calcat_interface.py
+++ b/src/cal_tools/calcat_interface.py
@@ -2,6 +2,7 @@
 from datetime import date, datetime, time, timezone
 from functools import lru_cache
 from pathlib import Path
+from typing import Optional, Sequence
 from weakref import WeakKeyDictionary

 import h5py
@@ -113,8 +114,7 @@ class CalCatApi(metaclass=ClientWrapper):
        """Encode operating condition to CalCat API format.

        Args:
-            caldata (CalibrationData): Calibration data instance used to
-                interface with database.
+            condition (dict): Mapping of parameter DB name to value

        Returns:
            (dict) Operating condition for use in CalCat API.
@@ -192,6 +192,19 @@ class CalCatApi(metaclass=ClientWrapper):

        return resp_calibration["data"]["id"]

+    @lru_cache()
+    def calibration_name(self, calibration_id):
+        """Name for a calibration in CalCat."""
+
+        resp_calibration = Calibration.get_by_id(
+            self.client, calibration_id
+        )
+
+        if not resp_calibration["success"]:
+            raise CalCatError(resp_calibration)
+
+        return resp_calibration["data"]["name"]
+
    @lru_cache()
    def parameter_id(self, param_name):
        """ID for an operating condition parameter in CalCat."""
@@ -203,6 +216,33 @@ class CalCatApi(metaclass=ClientWrapper):

        return resp_parameter["data"]["id"]

+    def _closest_ccv_by_time_by_condition(
+            self,
+            detector_name: str,
+            calibration_ids: Sequence[int],
+            condition: dict,
+            karabo_da: Optional[str] = None,
+            event_at=None,
+            pdu_snapshot_at=None,
+    ):
+        resp = CalibrationConstantVersion.get_closest_by_time_by_detector_conditions(
+            self.client,
+            detector_name,
+            calibration_ids,
+            self.format_cond(condition),
+            karabo_da=karabo_da or "",
+            event_at=self.format_time(event_at),
+            pdu_snapshot_at=self.format_time(pdu_snapshot_at),
+        )
+
+        if not resp["success"]:
+            if resp["status_code"] == 200:
+                # calibration_client turns empty response into an error
+                return []
+            raise CalCatError(resp)
+
+        return resp["data"]
+
    def closest_ccv_by_time_by_condition(
        self,
        detector_name,
@@ -284,20 +324,16 @@ class CalCatApi(metaclass=ClientWrapper):
        # afterwards, if necessary.
        karabo_da = next(iter(da_to_modname)) if len(da_to_modname) == 1 else '',

-        resp_versions = CalibrationConstantVersion.get_closest_by_time_by_detector_conditions(  # noqa
-            self.client,
+        resp_data = self._closest_ccv_by_time_by_condition(
            detector_name,
            calibration_ids,
-            self.format_cond(condition),
+            condition,
            karabo_da=karabo_da,
            event_at=event_at,
            pdu_snapshot_at=pdu_snapshot_at,
        )

-        if not resp_versions["success"]:
-            raise CalCatError(resp_versions)
-
-        for ccv in resp_versions["data"]:
+        for ccv in resp_data:
            try:
                mod = da_to_modname[ccv['physical_detector_unit']['karabo_da']]
            except KeyError:

--- a/src/cal_tools/calcat_interface2.py
+++ b/src/cal_tools/calcat_interface2.py
--- a/tests/test_calcat_interface2.py
+++ b/tests/test_calcat_interface2.py
+import numpy as np
+import pytest
+import xarray as xr
+
+from cal_tools.calcat_interface2 import (
+    AGIPDConditions,
+    CalibrationData,
+    DSSCConditions,
+    LPDConditions,
+    SingleConstant,
+)
+
+
+@pytest.mark.requires_gpfs
+def test_AGIPD_CalibrationData_metadata():
+    """Test CalibrationData with AGIPD condition"""
+    cond = AGIPDConditions(
+        # From: https://in.xfel.eu/calibration/calibration_constants/5754#condition
+        sensor_bias_voltage=300,  # V
+        memory_cells=352,
+        acquisition_rate=2.2,  # MHz
+        gain_mode=0,
+        gain_setting=1,
+        integration_time=12,
+        source_energy=9.2,
+    )
+    agipd_cd = CalibrationData.from_condition(
+        cond,
+        "MID_DET_AGIPD1M-1",
+        event_at="2022-09-01 13:26:48.00",
+        calibrations=["Offset", "SlopesFF"],
+    )
+    assert agipd_cd.detector_name == "MID_DET_AGIPD1M-1"
+    assert "Offset" in agipd_cd
+    assert set(agipd_cd["Offset"].constants) == {f"AGIPD{m:02}" for m in range(16)}
+    assert isinstance(agipd_cd["Offset", "AGIPD00"], SingleConstant)
+    assert agipd_cd["Offset", "Q1M2"] == agipd_cd["Offset", "AGIPD01"]
+
+
+@pytest.mark.requires_gpfs
+def test_AGIPD_merge():
+    cond = AGIPDConditions(
+        # From: https://in.xfel.eu/calibration/calibration_constants/5754#condition
+        sensor_bias_voltage=300,  # V
+        memory_cells=352,
+        acquisition_rate=2.2,  # MHz
+        gain_mode=0,
+        gain_setting=1,
+        integration_time=12,
+        source_energy=9.2,
+    )
+    agipd_cd = CalibrationData.from_condition(
+        cond,
+        "MID_DET_AGIPD1M-1",
+        event_at="2022-09-01 13:26:48.00",
+        calibrations=["Offset", "SlopesFF"],
+    )
+
+    modnos_q1 = list(range(0, 4))
+    modnos_q4 = list(range(12, 16))
+    merged = agipd_cd.select_modules(modnos_q1).merge(
+        agipd_cd.select_modules(modnos_q4)
+    )
+    assert merged.module_nums == modnos_q1 + modnos_q4
+
+    offset_only = agipd_cd.select_calibrations(["Offset"])
+    slopes_only = agipd_cd.select_calibrations(["SlopesFF"])
+    assert set(offset_only) == {"Offset"}
+    assert set(slopes_only) == {"SlopesFF"}
+    merged_cals = offset_only.merge(slopes_only)
+    assert set(merged_cals) == {"Offset", "SlopesFF"}
+    assert merged_cals.module_nums == list(range(16))
+
+
+@pytest.mark.requires_gpfs
+def test_AGIPD_CalibrationData_metadata_SPB():
+    """Test CalibrationData with AGIPD condition"""
+    cond = AGIPDConditions(
+        sensor_bias_voltage=300,
+        memory_cells=352,
+        acquisition_rate=1.1,
+        integration_time=12,
+        source_energy=9.2,
+        gain_mode=0,
+        gain_setting=0,
+    )
+    agipd_cd = CalibrationData.from_condition(
+        cond,
+        "SPB_DET_AGIPD1M-1",
+        event_at="2020-01-07 13:26:48.00",
+    )
+    assert "Offset" in agipd_cd
+    assert set(agipd_cd["Offset"].constants) == {f"AGIPD{m:02}" for m in range(16)}
+    assert agipd_cd["Offset"].module_nums == list(range(16))
+    assert agipd_cd["Offset"].qm_names == [
+        f"Q{(m // 4) + 1}M{(m % 4) + 1}" for m in range(16)
+    ]
+    assert isinstance(agipd_cd["Offset", 0], SingleConstant)
+
+
+@pytest.mark.requires_gpfs
+def test_AGIPD_load_data():
+    cond = AGIPDConditions(
+        sensor_bias_voltage=300,
+        memory_cells=352,
+        acquisition_rate=1.1,
+        integration_time=12,
+        source_energy=9.2,
+        gain_mode=0,
+        gain_setting=0,
+    )
+    agipd_cd = CalibrationData.from_condition(
+        cond,
+        "SPB_DET_AGIPD1M-1",
+        event_at="2020-01-07 13:26:48.00",
+    )
+    arr = agipd_cd["Offset"].select_modules(list(range(4))).xarray()
+    assert arr.shape == (4, 128, 512, 352, 3)
+    assert arr.dims[0] == "module"
+    np.testing.assert_array_equal(arr.coords["module"], np.arange(0, 4))
+    assert arr.dtype == np.float64
+
+    # Load parallel
+    arr_p = agipd_cd["Offset"].select_modules(list(range(4))).xarray(parallel=4)
+    xr.testing.assert_identical(arr_p, arr)
+
+
+@pytest.mark.requires_gpfs
+def test_DSSC_modules_missing():
+    dssc_cd = CalibrationData.from_condition(
+        DSSCConditions(sensor_bias_voltage=100, memory_cells=600),
+        "SQS_DET_DSSC1M-1",
+        event_at="2023-11-29 00:00:00",
+    )
+    # DSSC was used with only 3 quadrants at this point
+    modnos = list(range(4)) + list(range(8, 16))
+    assert dssc_cd.aggregator_names == [f"DSSC{m:02}" for m in modnos]
+    assert dssc_cd.module_nums == modnos
+    assert dssc_cd.qm_names == [f"Q{(m // 4) + 1}M{(m % 4) + 1}" for m in modnos]
+
+    offset = dssc_cd["Offset"]
+    assert offset.module_nums == modnos
+
+    # test ModulesConstantVersions.select_modules()
+    modnos_q3 = list(range(8, 12))
+    aggs_q3 = [f"DSSC{m:02}" for m in modnos_q3]
+    qm_q3 = [f"Q3M{i}" for i in range(1, 5)]
+    assert offset.select_modules(modnos_q3).module_nums == modnos_q3
+    assert offset.select_modules(aggregator_names=aggs_q3).module_nums == modnos_q3
+    assert offset.select_modules(qm_names=qm_q3).module_nums == modnos_q3
+
+    # test CalibrationData.select_modules()
+    assert dssc_cd.select_modules(modnos_q3).module_nums == modnos_q3
+    assert dssc_cd.select_modules(aggregator_names=aggs_q3).module_nums == modnos_q3
+    assert dssc_cd.select_modules(qm_names=qm_q3).module_nums == modnos_q3
+
+
+@pytest.mark.requires_gpfs
+def test_LPD_constant_missing():
+    lpd_cd = CalibrationData.from_condition(
+        LPDConditions(memory_cells=200, sensor_bias_voltage=250),
+        "FXE_DET_LPD1M-1",
+        event_at="2022-05-22T02:00:00",
+    )
+    # Constants are missing for 1 module (LPD05), but it was still included in
+    # the PDUs for the detector, so it should still appear in the lists.
+    assert lpd_cd.aggregator_names == [f"LPD{m:02}" for m in range(16)]
+    assert lpd_cd.module_nums == list(range(16))
+    assert lpd_cd.qm_names == [f"Q{(m // 4) + 1}M{(m % 4) + 1}" for m in range(16)]
+
+    # When we look at a specific constant, module LPD05 is missing
+    modnos_w_constant = list(range(0, 5)) + list(range(6, 16))
+    assert lpd_cd["Offset"].module_nums == modnos_w_constant
+
+    # Test CalibrationData.require_constant()
+    assert lpd_cd.require_calibrations(["Offset"]).module_nums == modnos_w_constant
+
+
+@pytest.mark.requires_gpfs
+def test_AGIPD_CalibrationData_report():
+    """Test CalibrationData with data from report"""
+    # Report ID: https://in.xfel.eu/calibration/reports/3757
+    agipd_cd = CalibrationData.from_report(3757)
+    assert agipd_cd.detector_name == "SPB_DET_AGIPD1M-1"
+    assert set(agipd_cd) == {"Offset", "Noise", "ThresholdsDark", "BadPixelsDark"}
+    assert agipd_cd.aggregator_names == [f"AGIPD{n:02}" for n in range(16)]
+    assert isinstance(agipd_cd["Offset", "AGIPD00"], SingleConstant)
No results found