Compare revisions

Thomas Kluyver · Philipp Schmidt · Philipp Schmidt · Thomas Kluyver · Thomas Kluyver · Thomas Kluyver
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -32,7 +32,7 @@ checks:
    #  GitLab... so this is a workaround for extracting the hash
    - export CI_MERGE_REQUEST_TARGET_BRANCH_SHA=$(git ls-remote origin $CI_MERGE_REQUEST_TARGET_BRANCH_NAME | cut -d$'\t' -f1)
    - export FILES=$(git diff $CI_COMMIT_SHA $CI_MERGE_REQUEST_TARGET_BRANCH_SHA --name-only | tr '\n' ' ')
-    - python3 -m pip install ".[test,dev]"
+    - python3 -m pip install pre-commit
    - echo "Running pre-commit on diff from $CI_COMMIT_SHA to $CI_MERGE_REQUEST_TARGET_BRANCH_SHA ($CI_MERGE_REQUEST_TARGET_BRANCH_NAME)"
    #  Pass list of modified files to pre-commit so that it only checks them
    - echo $FILES | xargs pre-commit run --color=always --files
@@ -42,6 +42,7 @@ pytest:
  only: [merge_requests]
  <<: *before_script
  script:
+    - export LANG=C  # Hopefully detect anything relying on locale
    - python3 -m pip install ".[test]"
    - python3 -m pytest --color yes --verbose --cov=cal_tools --cov=xfel_calibrate
 #  Nope... https://docs.gitlab.com/12.10/ee/user/project/merge_requests/test_coverage_visualization.html#enabling-the-feature

--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -3,19 +3,9 @@ repos:
    hooks:
    -   id: identity
  - repo: https://github.com/nbQA-dev/nbQA
-    rev: 0.5.9
+    rev: 0.13.0
    hooks:
-    - id: nbqa-isort
-      additional_dependencies: [isort==5.7.0]
-      args: [--nbqa-mutate]
-    - id: nbqa-flake8
-      additional_dependencies: [flake8==3.9.0]
-      args: [--nbqa-mutate]
    - id: nbqa-check-ast
-  - repo: https://github.com/kynan/nbstripout
-    rev: 0.3.9
-    hooks:
-      - id: nbstripout
  - repo: https://github.com/pycqa/isort
    rev: 5.7.0
    hooks:
@@ -25,10 +15,9 @@ repos:
    hooks:
    - id: flake8
      # If `CI_MERGE_REQUEST_TARGET_BRANCH_SHA` env var is set then this will
-      # run flake8 on the diff from the current commit to the latest commit of
-      # the branch being merged into, otherwise it will run flake8 as it would
-      # usually execute via the pre-commit hook
-      entry: bash -c 'if [ -z ${CI_MERGE_REQUEST_TARGET_BRANCH_SHA} ]; then (flake8 "$@"); else (git diff $CI_MERGE_REQUEST_TARGET_BRANCH_SHA | flake8 --diff); fi' --
+      # run flake8 on the diff of the merge request, otherwise it will run
+      # flake8 as it would usually execute via the pre-commit hook
+      entry: bash -c 'if [ -z ${CI_MERGE_REQUEST_TARGET_BRANCH_SHA} ]; then (flake8 "$@"); else (git diff $CI_MERGE_REQUEST_TARGET_BRANCH_SHA...$CI_MERGE_REQUEST_SOURCE_BRANCH_SHA | flake8 --diff); fi' --
  - repo: https://github.com/myint/rstcheck
    rev: 3f92957478422df87bd730abde66f089cc1ee19b  # commit where pre-commit support was added
    hooks:

--- a/bin/slurm_calibrate.sh
+++ b/bin/slurm_calibrate.sh
@@ -33,6 +33,9 @@ module load texlive/2019
 # make sure we use agg backend
 export MPLBACKEND=AGG

+# Ensure Python uses UTF-8 for files by default
+export LANG=en_US.UTF-8
+
 # start an ip cluster if requested
 if [ "${ipcluster_profile}" != "NO_CLUSTER" ]
 then

--- a/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
+++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
 %% Cell type:markdown id: tags:

 # AGIPD Characterize Dark Images #

 Author: S. Hauf, Version: 0.1

 The following code analyzes a set of dark images taken with the AGIPD detector to deduce detector offsets , noise, bad-pixel maps and thresholding. All four types of constants are evaluated per-pixel and per-memory cell. Data for the detector's three gain stages needs to be present, separated into separate runs.

 The evaluated calibration constants are stored locally and injected in the calibration data base.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/d/raw/CALLAB/202031/p900113" # path to input data, required
 out_folder = "" # path to output to, required
 sequences = [-1] # sequence files to evaluate.
 modules = [-1]  # list of modules to evaluate, RANGE ALLOWED
 run_high = 9985 # run number in which high gain data was recorded, required
 run_med = 9984 # run number in which medium gain data was recorded, required
 run_low = 9983 # run number in which low gain data was recorded, required
 operation_mode = "ADAPTIVE_GAIN"  # Detector operation mode, optional (defaults to "ADAPTIVE_GAIN")

 karabo_id = "HED_DET_AGIPD500K2G" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = '/INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
 h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information
 karabo_id_control = "HED_EXP_AGIPD500K2G" # karabo-id for control device '
 karabo_da_control = "AGIPD500K2G00" # karabo DA for control infromation

 use_dir_creation_date = True  # use dir creation date as data production reference date
 cal_db_interface = "tcp://max-exfl016:8020" # the database interface to use
 cal_db_timeout = 3000000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database

 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 0 # detector bias voltage
 gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 interlaced = False # assume interlaced data format, for data prior to Dec. 2017
 rawversion = 2 # RAW file format version

 thresholds_offset_sigma = 3. # offset sigma thresholds for offset deduced bad pixels
 thresholds_offset_hard = [0, 0]  # For setting the same threshold offset for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.
 thresholds_offset_hard_hg = [3000, 7000]  # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_mg = [6000, 10000]  # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_lg = [6000, 10000]  # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_hg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_hg, but for fixed gain operation
 thresholds_offset_hard_mg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_mg, but for fixed gain operation
 thresholds_offset_hard_lg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_lg, but for fixed gain operation

 thresholds_noise_sigma = 5. # noise sigma thresholds for offset deduced bad pixels
 thresholds_noise_hard = [0, 0] # For setting the same threshold noise for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.
 thresholds_noise_hard_hg = [4, 20] # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_mg = [4, 20] # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_lg = [4, 20] # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels

 thresholds_gain_sigma = 5. # Gain separation sigma threshold

 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. ~7mins extra time for 64 memory cells
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import multiprocessing
 import os
 from collections import OrderedDict
 from datetime import timedelta
 from typing import Tuple

 import dateutil.parser
 import h5py
 import matplotlib
 import numpy as np
 import pasha as psh
 import tabulate
 import yaml

 matplotlib.use('agg')

 import iCalibrationDB
 import matplotlib.pyplot as plt
 from cal_tools.agipdlib import (
    get_acq_rate,
    get_bias_voltage,
    get_gain_mode,
    get_gain_setting,
    get_num_cells,
 )
 from cal_tools.enums import AgipdGainMode, BadPixels
 from cal_tools.plotting import (
    create_constant_overview,
    plot_badpix_3d,
    show_overview,
    show_processed_modules,
 )
 from cal_tools.tools import (
    get_dir_creation_date,
    get_from_db,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    map_gain_stages,
    module_index_to_qm,
    run_prop_seq_from_path,
    save_const_to_h5,
    send_to_db,
 )
 from IPython.display import Latex, Markdown, display

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 # insert control device if format string (does nothing otherwise)
 h5path_ctrl = h5path_ctrl.format(karabo_id_control)

 max_cells = mem_cells

 offset_runs = OrderedDict()
 offset_runs["high"] = run_high
 offset_runs["med"] = run_med
 offset_runs["low"] = run_low

 creation_time=None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)

 print(f"Using {creation_time} as creation time of constant.")

 run, prop, seq = run_prop_seq_from_path(in_folder)

 cal_db_interface = get_random_db_interface(cal_db_interface)
 print(f'Calibration database interface: {cal_db_interface}')

 instrument = karabo_id.split("_")[0]

 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 if sequences == [-1]:
    sequences = None
 control_names = [f'{in_folder}/r{r:04d}/RAW-R{r:04d}-{karabo_da_control}-S00000.h5'
                 for r in (run_high, run_med, run_low)]

 if operation_mode not in ("ADAPTIVE_GAIN", "FIXED_GAIN"):
    print(f"WARNING: unknown operation_mode \"{operation_mode}\" parameter set")
 run_gain_modes = [get_gain_mode(fn, h5path_ctrl) for fn in control_names]
 if all(gm == AgipdGainMode.ADAPTIVE_GAIN for gm in run_gain_modes):
    fixed_gain_mode = False
    if operation_mode == "FIXED_GAIN":
        print("WARNING: operation_mode parameter is FIXED_GAIN, slow data indicates adaptive gain")
 elif run_gain_modes == [AgipdGainMode.FIXED_HIGH_GAIN, AgipdGainMode.FIXED_MEDIUM_GAIN, AgipdGainMode.FIXED_LOW_GAIN]:
    if operation_mode == "ADAPTIVE_GAIN":
        print("WARNING: operation_mode parameter ix ADAPTIVE_GAIN, slow data indicates fixed gain")
    fixed_gain_mode = True
 else:
    print(f'Something is clearly wrong; slow data indicates gain modes {run_gain_modes}')

 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
 runs = [run_high, run_med, run_low]

 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < dateutil.parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            # extract gain setting and validate that all runs have the same setting
            gsettings = []
            for r in runs:
                control_fname = '{}/r{:04d}/RAW-R{:04d}-{}-S00000.h5'.format(in_folder, r, r,
                                                                             karabo_da_control)
                gsettings.append(get_gain_setting(control_fname, h5path_ctrl))
            if not all(g == gsettings[0] for g in gsettings):
                raise ValueError(f"Different gain settings for the 3 input runs {gsettings}")
            gain_setting = gsettings[0]
        except Exception as e:
            print(f'Error while reading gain setting from: \n{control_fname}')
            print(f'Error: {e}')
            if "component not found" in str(e):
                print("Gain setting is not found in the control information")
            print("Data will not be processed")
            sequences = []
 ```

 %% Cell type:code id: tags:

 ``` python
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)

 if bias_voltage == 0:
    # Read the bias voltage from files, if recorded.
    # If not available, make use of the historical voltage the detector is running at
    bias_voltage = get_bias_voltage(control_names[0], karabo_id_control)
    bias_voltage = bias_voltage if bias_voltage is not None else 300

 print("Parameters are:")
 print(f"Proposal: {prop}")
 print(f"Memory cells: {mem_cells}/{max_cells}")
 print("Runs: {}".format([v for v in offset_runs.values()]))
 print(f"Sequences: {sequences if sequences else 'All'}")
 print(f"Interlaced mode: {interlaced}")
 print(f"Using DB: {db_output}")
 print(f"Input: {in_folder}")
 print(f"Output: {out_folder}")
 print(f"Bias voltage: {bias_voltage}V")
 print(f"Gain setting: {gain_setting}")
 print(f"Operation mode is {'fixed' if fixed_gain_mode else 'adaptive'} gain mode")
 ```

 %% Cell type:code id: tags:

 ``` python
 if thresholds_offset_hard != [0, 0]:
    # if set, this will override the individual parameters
    thresholds_offset_hard = [thresholds_offset_hard] * 3
 elif fixed_gain_mode:
    thresholds_offset_hard = [
        thresholds_offset_hard_hg_fixed,
        thresholds_offset_hard_mg_fixed,
        thresholds_offset_hard_lg_fixed,
    ]
 else:
    thresholds_offset_hard = [
        thresholds_offset_hard_hg,
        thresholds_offset_hard_mg,
        thresholds_offset_hard_lg,
    ]
 print("Will use the following hard offset thresholds")
 for name, value in zip(("High", "Medium", "Low"), thresholds_offset_hard):
    print(f"- {name} gain: {value}")

 if thresholds_noise_hard != [0, 0]:
    thresholds_noise_hard = [thresholds_noise_hard] * 3
 else:
    thresholds_noise_hard = [
        thresholds_noise_hard_hg,
        thresholds_noise_hard_mg,
        thresholds_noise_hard_lg,
    ]
 ```

 %% Cell type:markdown id: tags:

 The following lines will create a queue of files which will the be executed module-parallel. Distiguishing between different gains.

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 os.makedirs(out_folder, exist_ok=True)
 gain_mapped_files, total_files, total_file_size = map_gain_stages(
    in_folder, offset_runs, path_template, karabo_da, sequences
 )
 print(f"Will process a total of {total_files} files ({total_file_size:.02f} GB).")

 inp = []
 for gain_index, (gain, qm_file_map) in enumerate(gain_mapped_files.items()):
    for module_index in modules:
        qm = module_index_to_qm(module_index)
        if qm not in qm_file_map:
            print(f"Did not find files for {qm}")
            continue
        file_queue = qm_file_map[qm]
        while not file_queue.empty():
            filename = file_queue.get()
-            print(f"Process {filename} for {qm}")
-            inp.append((filename, module_index, gain_index))
+            # TODO: remove after using EXtra-data to read files
+            # and skip empty trains.
+            with h5py.File(filename, "r") as fin:
+                if fin[h5path.format(module_index)+"/trainId"].shape[0] != 0:
+                    print(f"Process {filename} for {qm}")
+                    inp.append((filename, module_index, gain_index))
+                else:
+                    print(f"Do not process {filename} because it is empty.")
 ```

 %% Cell type:markdown id: tags:

 ## Calculate Offsets, Noise and Thresholds ##

 The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array.

 %% Cell type:code id: tags:

 ``` python
 # min() only relevant if running on multiple modules (i.e. within notebook)
 parallel_num_procs = min(12, total_files)
 parallel_num_threads = multiprocessing.cpu_count() // parallel_num_procs
 print(f"Will use {parallel_num_procs} processes with {parallel_num_threads} threads each")


 def characterize_module(
    fast_data_filename: str, channel: int, gain_index: int
 ) -> Tuple[np.array, np.array, np.array, np.array, int, np.array, int, float]:
    if max_cells == 0:
        num_cells = get_num_cells(fast_data_filename, karabo_id, channel)
    else:
        num_cells = max_cells

    if acq_rate == 0.:
        slow_paths = control_names[gain_index], karabo_id_control
        fast_paths = fast_data_filename, karabo_id, channel
        local_acq_rate = get_acq_rate(fast_paths, slow_paths)
    else:
        local_acq_rate = acq_rate

    local_thresholds_offset_hard = thresholds_offset_hard[gain_index]
    local_thresholds_noise_hard = thresholds_noise_hard[gain_index]

    h5path_f = h5path.format(channel)
    h5path_idx_f = h5path_idx.format(channel)

    with h5py.File(fast_data_filename, "r") as infile:
        if rawversion == 2:
            count = np.squeeze(infile[f"{h5path_idx_f}/count"])
            first = np.squeeze(infile[f"{h5path_idx_f}/first"])
            last_index = int(first[count != 0][-1]+count[count != 0][-1])
            first_index = int(first[count != 0][0])
        else:
            status = np.squeeze(infile[f"{h5path_idx_f}/status"])
            if np.count_nonzero(status != 0) == 0:
                return
            last = np.squeeze(infile[f"{h5path_idx_f}/last"])
            first = np.squeeze(infile[f"{h5path_idx_f}/first"])
            last_index = int(last[status != 0][-1]) + 1
            first_index = int(first[status != 0][0])
        im = np.array(infile[f"{h5path_f}/data"][first_index:last_index,...])
        cell_ids = np.squeeze(infile[f"{h5path_f}/cellId"][first_index:last_index,...])

    if interlaced:
        if not fixed_gain_mode:
            ga = im[1::2, 0, ...]
        im = im[0::2, 0, ...].astype(np.float32)
        cell_ids = cell_ids[::2]
    else:
        if not fixed_gain_mode:
            ga = im[:, 1, ...]
        im = im[:, 0, ...].astype(np.float32)

    im = np.transpose(im)
    if not fixed_gain_mode:
        ga = np.transpose(ga)

    context = psh.context.ThreadContext(num_workers=parallel_num_threads)
    offset = context.alloc(shape=(im.shape[0], im.shape[1], num_cells), dtype=np.float64)
    noise = context.alloc(like=offset)

    if fixed_gain_mode:
        gains = None
        gains_std = None
    else:
        gains = context.alloc(like=offset)
        gains_std = context.alloc(like=offset)

    def process_cell(worker_id, array_index, cell_number):
        cell_slice_index = (cell_ids == cell_number)
        im_slice = im[..., cell_slice_index]
        offset[..., cell_number] = np.median(im_slice, axis=2)
        noise[..., cell_number] = np.std(im_slice, axis=2)
        if not fixed_gain_mode:
            ga_slice = ga[..., cell_slice_index]
            gains[..., cell_number] = np.median(ga_slice, axis=2)
            gains_std[..., cell_number] = np.std(ga_slice, axis=2)

    context.map(process_cell, np.unique(cell_ids))

    # bad pixels
    bp = np.zeros_like(offset, dtype=np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0,1))
    offset_std = np.nanstd(offset, axis=(0,1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD
    bp[(offset < local_thresholds_offset_hard[0]) |
       (offset > local_thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0,1))
    noise_std = np.nanstd(noise, axis=(0,1))
    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD
    bp[(noise < local_thresholds_noise_hard[0]) | (noise > local_thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR

    return offset, noise, gains, gains_std, bp, num_cells, local_acq_rate
 ```

 %% Cell type:code id: tags:

 ``` python
 with multiprocessing.Pool(processes=parallel_num_procs) as pool:
    results = pool.starmap(characterize_module, inp)
 ```

 %% Cell type:code id: tags:

 ``` python
 offset_g = OrderedDict()
 noise_g = OrderedDict()
 badpix_g = OrderedDict()
 if not fixed_gain_mode:
    gain_g = OrderedDict()
    gainstd_g = OrderedDict()

 all_cells = []
 all_acq_rate = []

 for (_, module_index, gain_index), (offset, noise, gains, gains_std, bp,
                                    thiscell, thisacq) in zip(inp, results):
    all_cells.append(thiscell)
    all_acq_rate.append(thisacq)
    qm = module_index_to_qm(module_index)
    if qm not in offset_g:
        offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2], 3))
        noise_g[qm] = np.zeros_like(offset_g[qm])
        badpix_g[qm] = np.zeros_like(offset_g[qm], np.uint32)
        if not fixed_gain_mode:
            gain_g[qm] = np.zeros_like(offset_g[qm])
            gainstd_g[qm] = np.zeros_like(offset_g[qm])

    offset_g[qm][..., gain_index] = offset
    noise_g[qm][..., gain_index] = noise
    badpix_g[qm][..., gain_index] = bp
    if not fixed_gain_mode:
        gain_g[qm][..., gain_index] = gains
        gainstd_g[qm][..., gain_index] = gains_std


 max_cells = np.max(all_cells)
 print(f"Using {max_cells} memory cells")
 acq_rate = np.max(all_acq_rate)
 print(f"Using {acq_rate} MHz acquisition rate")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Add bad pixels due to bad gain separation
 if not fixed_gain_mode:
    for qm in gain_g.keys():
        for g in range(2):
            # Bad pixels during bad gain separation.
            # Fraction of pixels in the module with separation lower than "thresholds_gain_sigma".
            bad_sep = (gain_g[qm][..., g+1] - gain_g[qm][..., g]) / \
                np.sqrt(gainstd_g[qm][..., g+1]**2 + gainstd_g[qm][..., g]**2)
            badpix_g[qm][...,g+1][bad_sep<thresholds_gain_sigma] |= \
                BadPixels.GAIN_THRESHOLDING_ERROR
 ```

 %% Cell type:markdown id: tags:

 The thresholds for gain switching are then defined as the mean value between in individual gain bit levels. Note that these thresholds need to be refined with charge induced thresholds, as the two are not the same.

 %% Cell type:code id: tags:

 ``` python
 if not fixed_gain_mode:
    thresholds_g = {}
    for qm in gain_g.keys():
        thresholds_g[qm] = np.zeros((gain_g[qm].shape[0], gain_g[qm].shape[1], gain_g[qm].shape[2], 5))
        thresholds_g[qm][...,0] = (gain_g[qm][...,1]+gain_g[qm][...,0])/2
        thresholds_g[qm][...,1] = (gain_g[qm][...,2]+gain_g[qm][...,1])/2
        for i in range(3):
            thresholds_g[qm][...,2+i] = gain_g[qm][...,i]
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()
 for i in modules:
    qm = module_index_to_qm(i)
    res[qm] = {
        'Offset': offset_g[qm],
        'Noise': noise_g[qm],
        'BadPixelsDark': badpix_g[qm]
    }
    if not fixed_gain_mode:
        res[qm]['ThresholdsDark'] = thresholds_g[qm]
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read report path and create file location tuple to add with the injection
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_low, run_med, run_high)

 report = get_report(out_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 # set the operating condition
 # note: iCalibrationDB only adds gain_mode if it is truthy, so we don't need to handle None
 condition = iCalibrationDB.Conditions.Dark.AGIPD(
    memory_cells=max_cells,
    bias_voltage=bias_voltage,
    acquisition_rate=acq_rate,
    gain_setting=gain_setting,
    gain_mode=fixed_gain_mode
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create mapping from module(s) (qm) to karabo_da(s) and PDU(s)
 qm_dict = OrderedDict()
 all_pdus = get_pdu_from_db(
    karabo_id,
    karabo_da,
    constant=iCalibrationDB.CalibrationConstant(),
    condition=condition,
    cal_db_interface=cal_db_interface,
    snapshot_at=creation_time.isoformat(),
    timeout=cal_db_timeout
 )
 for module_index, module_da, module_pdu in zip(modules, karabo_da, all_pdus):
    qm = module_index_to_qm(module_index)
    qm_dict[qm] = {
        "karabo_da": module_da,
        "db_module": module_pdu
    }
 ```

 %% Cell type:code id: tags:

 ``` python
 md = None

 for qm in res:
    db_module = qm_dict[qm]["db_module"]
    for const in res[qm]:
        dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()
        dconst.data = res[qm][const]

        if db_output:
            md = send_to_db(db_module, karabo_id, dconst, condition, file_loc,
                            report, cal_db_interface, creation_time=creation_time,
                            timeout=cal_db_timeout)

        if local_output:
            md = save_const_to_h5(db_module, karabo_id, dconst, condition, dconst.data,
                                  file_loc, report, creation_time, out_folder)
            print(f"Calibration constant {const} for {qm} is stored locally in {file_loc}.\n")

    print("Constants parameter conditions are:\n")
    print(f"• memory_cells: {max_cells}\n• bias_voltage: {bias_voltage}\n"
          f"• acquisition_rate: {acq_rate}\n• gain_setting: {gain_setting}\n"
          f"• gain_mode: {fixed_gain_mode}\n"
          f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Start retrieving existing constants for comparison
 qm_x_const = [(qm, const) for const in res[qm] for qm in res]


 def retrieve_old_constant(qm, const):
    dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()

    # This should be used in case of running notebook
    # by a different method other than myMDC which already
    # sends CalCat info.
    # TODO: Set db_module to "" by default in the first cell

    data, mdata = get_from_db(
        karabo_id=karabo_id,
        karabo_da=qm_dict[qm]["karabo_da"],
        constant=dconst,
        condition=condition,
        empty_constant=None,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time-timedelta(seconds=1),
        strategy="pdu_prior_in_time",
        verbosity=1,
        timeout=cal_db_timeout
    )

    if mdata is None or data is None:
        timestamp = "Not found"
        filepath = None
        h5path = None
    else:
        timestamp = mdata.calibration_constant_version.begin_at.isoformat()
        filepath = os.path.join(
            mdata.calibration_constant_version.hdf5path,
            mdata.calibration_constant_version.filename
        )
        h5path = mdata.calibration_constant_version.h5path

    return data, timestamp, filepath, h5path


 old_retrieval_pool = multiprocessing.Pool()
 old_retrieval_res = old_retrieval_pool.starmap_async(
    retrieve_old_constant, qm_x_const
 )
 old_retrieval_pool.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 mnames=[]
 for i in modules:
    qm = module_index_to_qm(i)
    mnames.append(qm)
    display(Markdown(f'## Position of the module {qm} and its ASICs'))
 show_processed_modules(dinstance, constants=None, mnames=mnames, mode="position")
 ```

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:markdown id: tags:

 ### High Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 0
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*offset_runs.values()))
 ```

 %% Cell type:markdown id: tags:

 ### Medium Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 1
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*offset_runs.values()))
 ```

 %% Cell type:markdown id: tags:

 ### Low Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 2
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*offset_runs.values()))
 ```

 %% Cell type:code id: tags:

 ``` python
 if high_res_badpix_3d:
    cols = {
        BadPixels.NOISE_OUT_OF_THRESHOLD: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.GAIN_THRESHOLDING_ERROR: (BadPixels.GAIN_THRESHOLDING_ERROR.name, '#FF40FF40'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD | BadPixels.NOISE_OUT_OF_THRESHOLD: ('OFFSET_OUT_OF_THRESHOLD + NOISE_OUT_OF_THRESHOLD', '#DD00DD80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD | BadPixels.NOISE_OUT_OF_THRESHOLD |
        BadPixels.GAIN_THRESHOLDING_ERROR: ('MIXED', '#BFDF009F')
    }

    display(Markdown("""

    ## Global Bad Pixel Behaviour ##

    The following plots show the results of bad pixel evaluation for all evaluated memory cells.
    Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2.
    This excludes single bad pixels present only in disconnected pixels.
    Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
    Colors encode the bad pixel type, or mixed type.

    """))

    gnames = ['High Gain', 'Medium Gain', 'Low Gain']
    for gain in range(3):
        display(Markdown(f'### {gnames[gain]} ###'))
        for mod, data in badpix_g.items():
            plot_badpix_3d(data[...,gain], cols, title=mod, rebin_fac=1)
            plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Aggregate values, and per Cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior.

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(offset_g, "Offset (ADU)", max_cells, 4000, 8000,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(noise_g, "Noise (ADU)", max_cells, 0, 100,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 if not fixed_gain_mode:
    # Plot only three gain threshold maps.
    bp_thresh = OrderedDict()
    for mod, con in badpix_g.items():
        bp_thresh[mod] = np.zeros((con.shape[0], con.shape[1], con.shape[2], 5), dtype=con.dtype)
        bp_thresh[mod][...,:2] = con[...,:2]
        bp_thresh[mod][...,2:] = con

    create_constant_overview(thresholds_g, "Threshold (ADU)", max_cells, 4000, 10000, 5,
                             badpixels=[bp_thresh, np.nan],
                             gmap=['HG-MG Threshold', 'MG-LG Threshold', 'High gain', 'Medium gain', 'low gain'],
                             marker=['d','d','','','']
                             )
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixel_aggregate_g = OrderedDict()
 for m, d in badpix_g.items():
    bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)
 create_constant_overview(bad_pixel_aggregate_g, "Bad pixel fraction", max_cells, 0, 0.10, 3)
 ```

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 # now we need the old constants
 old_const = {}
 old_mdata = {}
 old_retrieval_res.wait()

 for (qm, const), (data, timestamp, filepath, h5path) in zip(qm_x_const, old_retrieval_res.get()):
    old_const.setdefault(qm, {})[const] = data
    old_mdata.setdefault(qm, {})[const] = {
        "timestamp": timestamp,
        "filepath": filepath,
        "h5path": h5path
    }
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown("The following pre-existing constants are used for comparison:"))
 for qm, consts in old_mdata.items():
    display(Markdown(f"- {qm}"))
    for const in consts:
        display(Markdown(f"    - {const} at {consts[const]['timestamp']}"))
    # saving locations of old constants for summary notebook
    with open(f"{out_folder}/module_metadata_{qm}.yml", "w") as fd:
        yaml.safe_dump(
            {
                "module": qm,
                "pdu": qm_dict[qm]["db_module"],
                "old-constants": old_mdata[qm]
            },
            fd,
        )
 ```

 %% Cell type:code id: tags:

 ``` python
 table = []
 gain_names = ['High', 'Medium', 'Low']
 bits = [BadPixels.NOISE_OUT_OF_THRESHOLD, BadPixels.OFFSET_OUT_OF_THRESHOLD, BadPixels.OFFSET_NOISE_EVAL_ERROR, BadPixels.GAIN_THRESHOLDING_ERROR]
 for qm in badpix_g.keys():
    for gain in range(3):
        l_data = []
        l_data_old = []

        data = np.copy(badpix_g[qm][:,:,:,gain])
        datau32 = data.astype(np.uint32)
        l_data.append(len(datau32[datau32>0].flatten()))
        for bit in bits:
            l_data.append(np.count_nonzero(badpix_g[qm][:,:,:,gain] & bit))

        if old_const[qm]['BadPixelsDark'] is not None:
            dataold = np.copy(old_const[qm]['BadPixelsDark'][:, :, :, gain])
            datau32old = dataold.astype(np.uint32)
            l_data_old.append(len(datau32old[datau32old>0].flatten()))
            for bit in bits:
                l_data_old.append(np.count_nonzero(old_const[qm]['BadPixelsDark'][:, :, :, gain] & bit))

        l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',
                       'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR', 'GAIN_THRESHOLDING_ERROR']

        l_threshold = ['', f'{thresholds_noise_sigma}' f'{thresholds_noise_hard[gain]}',
                       f'{thresholds_offset_sigma}' f'{thresholds_offset_hard[gain]}',
                       '', f'{thresholds_gain_sigma}']

        for i in range(len(l_data)):
            line = [f'{l_data_name[i]}, {gain_names[gain]} gain', l_threshold[i], l_data[i]]

            if old_const[qm]['BadPixelsDark'] is not None:
                line += [l_data_old[i]]
            else:
                line += ['-']

            table.append(line)
        table.append(['', '', '', ''])

 display(Markdown('''
 ### Number of bad pixels

 One pixel can be bad for different reasons, therefore, the sum of all types of bad pixels can be more than the number of all bad pixels.

 '''))
 if len(table)>0:
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                         headers=["Pixel type", "Threshold",
                                                  "New constant", "Old constant"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 if fixed_gain_mode:
    constants = ['Offset', 'Noise']
 else:
    constants = ['Offset', 'Noise', 'ThresholdsDark']

 constants_x_qms = list(itertools.product(constants, res.keys()))


 def compute_table(const, qm):
    if const == 'ThresholdsDark':
        table = [['','HG-MG threshold', 'HG-MG threshold', 'MG-LG threshold', 'MG-LG threshold']]
    else:
        table = [['','High gain', 'High gain', 'Medium gain', 'Medium gain', 'Low gain', 'Low gain']]

    compare_with_old_constant = old_const[qm][const] is not None and \
        old_const[qm]['BadPixelsDark'] is not None

    data = np.copy(res[qm][const])

    if const == 'ThresholdsDark':
        data[...,0][res[qm]['BadPixelsDark'][...,0]>0] = np.nan
        data[...,1][res[qm]['BadPixelsDark'][...,1]>0] = np.nan
    else:
        data[res[qm]['BadPixelsDark']>0] = np.nan

    if compare_with_old_constant:
        data_old = np.copy(old_const[qm][const])
        if const == 'ThresholdsDark':
            data_old[...,0][old_const[qm]['BadPixelsDark'][...,0]>0] = np.nan
            data_old[...,1][old_const[qm]['BadPixelsDark'][...,1]>0] = np.nan
        else:
            data_old[old_const[qm]['BadPixelsDark']>0] = np.nan

    f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
    n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

    def compute_row(i):
        line = [n_list[i]]
        for gain in range(3):
            # Compare only 3 threshold gain-maps
            if gain == 2 and const == 'ThresholdsDark':
                continue
            stat_measure = f_list[i](data[...,gain])
            line.append(f"{stat_measure:6.1f}")
            if compare_with_old_constant:
                old_stat_measure = f_list[i](data_old[...,gain])
                line.append(f"{old_stat_measure:6.1f}")
            else:
                line.append("-")
        return line


    with multiprocessing.pool.ThreadPool(processes=multiprocessing.cpu_count() // len(constants_x_qms)) as pool:
        rows = pool.map(compute_row, range(len(f_list)))

    table.extend(rows)

    return table


 with multiprocessing.Pool(processes=len(constants_x_qms)) as pool:
    tables = pool.starmap(compute_table, constants_x_qms)

 for (const, qm), table in zip(constants_x_qms, tables):
    display(Markdown(f"### {qm}: {const} [ADU], good pixels only"))
    display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
 ```

 %% Cell type:markdown id: tags:

 # AGIPD Characterize Dark Images #

 Author: S. Hauf, Version: 0.1

 The following code analyzes a set of dark images taken with the AGIPD detector to deduce detector offsets , noise, bad-pixel maps and thresholding. All four types of constants are evaluated per-pixel and per-memory cell. Data for the detector's three gain stages needs to be present, separated into separate runs.

 The evaluated calibration constants are stored locally and injected in the calibration data base.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/d/raw/CALLAB/202031/p900113" # path to input data, required
 out_folder = "" # path to output to, required
 sequences = [-1] # sequence files to evaluate.
 modules = [-1]  # list of modules to evaluate, RANGE ALLOWED
 run_high = 9985 # run number in which high gain data was recorded, required
 run_med = 9984 # run number in which medium gain data was recorded, required
 run_low = 9983 # run number in which low gain data was recorded, required
 operation_mode = "ADAPTIVE_GAIN"  # Detector operation mode, optional (defaults to "ADAPTIVE_GAIN")

 karabo_id = "HED_DET_AGIPD500K2G" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = '/INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
 h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information
 karabo_id_control = "HED_EXP_AGIPD500K2G" # karabo-id for control device '
 karabo_da_control = "AGIPD500K2G00" # karabo DA for control infromation

 use_dir_creation_date = True  # use dir creation date as data production reference date
 cal_db_interface = "tcp://max-exfl016:8020" # the database interface to use
 cal_db_timeout = 3000000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database

 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 0 # detector bias voltage
 gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 interlaced = False # assume interlaced data format, for data prior to Dec. 2017
 rawversion = 2 # RAW file format version

 thresholds_offset_sigma = 3. # offset sigma thresholds for offset deduced bad pixels
 thresholds_offset_hard = [0, 0]  # For setting the same threshold offset for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.
 thresholds_offset_hard_hg = [3000, 7000]  # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_mg = [6000, 10000]  # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_lg = [6000, 10000]  # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_hg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_hg, but for fixed gain operation
 thresholds_offset_hard_mg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_mg, but for fixed gain operation
 thresholds_offset_hard_lg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_lg, but for fixed gain operation

 thresholds_noise_sigma = 5. # noise sigma thresholds for offset deduced bad pixels
 thresholds_noise_hard = [0, 0] # For setting the same threshold noise for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.
 thresholds_noise_hard_hg = [4, 20] # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_mg = [4, 20] # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_lg = [4, 20] # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels

 thresholds_gain_sigma = 5. # Gain separation sigma threshold

 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. ~7mins extra time for 64 memory cells
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import multiprocessing
 import os
 from collections import OrderedDict
 from datetime import timedelta
 from typing import Tuple

 import dateutil.parser
 import h5py
 import matplotlib
 import numpy as np
 import pasha as psh
 import tabulate
 import yaml

 matplotlib.use('agg')

 import iCalibrationDB
 import matplotlib.pyplot as plt
 from cal_tools.agipdlib import (
    get_acq_rate,
    get_bias_voltage,
    get_gain_mode,
    get_gain_setting,
    get_num_cells,
 )
 from cal_tools.enums import AgipdGainMode, BadPixels
 from cal_tools.plotting import (
    create_constant_overview,
    plot_badpix_3d,
    show_overview,
    show_processed_modules,
 )
 from cal_tools.tools import (
    get_dir_creation_date,
    get_from_db,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    map_gain_stages,
    module_index_to_qm,
    run_prop_seq_from_path,
    save_const_to_h5,
    send_to_db,
 )
 from IPython.display import Latex, Markdown, display

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 # insert control device if format string (does nothing otherwise)
 h5path_ctrl = h5path_ctrl.format(karabo_id_control)

 max_cells = mem_cells

 offset_runs = OrderedDict()
 offset_runs["high"] = run_high
 offset_runs["med"] = run_med
 offset_runs["low"] = run_low

 creation_time=None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)

 print(f"Using {creation_time} as creation time of constant.")

 run, prop, seq = run_prop_seq_from_path(in_folder)

 cal_db_interface = get_random_db_interface(cal_db_interface)
 print(f'Calibration database interface: {cal_db_interface}')

 instrument = karabo_id.split("_")[0]

 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 if sequences == [-1]:
    sequences = None
 control_names = [f'{in_folder}/r{r:04d}/RAW-R{r:04d}-{karabo_da_control}-S00000.h5'
                 for r in (run_high, run_med, run_low)]

 if operation_mode not in ("ADAPTIVE_GAIN", "FIXED_GAIN"):
    print(f"WARNING: unknown operation_mode \"{operation_mode}\" parameter set")
 run_gain_modes = [get_gain_mode(fn, h5path_ctrl) for fn in control_names]
 if all(gm == AgipdGainMode.ADAPTIVE_GAIN for gm in run_gain_modes):
    fixed_gain_mode = False
    if operation_mode == "FIXED_GAIN":
        print("WARNING: operation_mode parameter is FIXED_GAIN, slow data indicates adaptive gain")
 elif run_gain_modes == [AgipdGainMode.FIXED_HIGH_GAIN, AgipdGainMode.FIXED_MEDIUM_GAIN, AgipdGainMode.FIXED_LOW_GAIN]:
    if operation_mode == "ADAPTIVE_GAIN":
        print("WARNING: operation_mode parameter ix ADAPTIVE_GAIN, slow data indicates fixed gain")
    fixed_gain_mode = True
 else:
    print(f'Something is clearly wrong; slow data indicates gain modes {run_gain_modes}')

 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
 runs = [run_high, run_med, run_low]

 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < dateutil.parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            # extract gain setting and validate that all runs have the same setting
            gsettings = []
            for r in runs:
                control_fname = '{}/r{:04d}/RAW-R{:04d}-{}-S00000.h5'.format(in_folder, r, r,
                                                                             karabo_da_control)
                gsettings.append(get_gain_setting(control_fname, h5path_ctrl))
            if not all(g == gsettings[0] for g in gsettings):
                raise ValueError(f"Different gain settings for the 3 input runs {gsettings}")
            gain_setting = gsettings[0]
        except Exception as e:
            print(f'Error while reading gain setting from: \n{control_fname}')
            print(f'Error: {e}')
            if "component not found" in str(e):
                print("Gain setting is not found in the control information")
            print("Data will not be processed")
            sequences = []
 ```

 %% Cell type:code id: tags:

 ``` python
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)

 if bias_voltage == 0:
    # Read the bias voltage from files, if recorded.
    # If not available, make use of the historical voltage the detector is running at
    bias_voltage = get_bias_voltage(control_names[0], karabo_id_control)
    bias_voltage = bias_voltage if bias_voltage is not None else 300

 print("Parameters are:")
 print(f"Proposal: {prop}")
 print(f"Memory cells: {mem_cells}/{max_cells}")
 print("Runs: {}".format([v for v in offset_runs.values()]))
 print(f"Sequences: {sequences if sequences else 'All'}")
 print(f"Interlaced mode: {interlaced}")
 print(f"Using DB: {db_output}")
 print(f"Input: {in_folder}")
 print(f"Output: {out_folder}")
 print(f"Bias voltage: {bias_voltage}V")
 print(f"Gain setting: {gain_setting}")
 print(f"Operation mode is {'fixed' if fixed_gain_mode else 'adaptive'} gain mode")
 ```

 %% Cell type:code id: tags:

 ``` python
 if thresholds_offset_hard != [0, 0]:
    # if set, this will override the individual parameters
    thresholds_offset_hard = [thresholds_offset_hard] * 3
 elif fixed_gain_mode:
    thresholds_offset_hard = [
        thresholds_offset_hard_hg_fixed,
        thresholds_offset_hard_mg_fixed,
        thresholds_offset_hard_lg_fixed,
    ]
 else:
    thresholds_offset_hard = [
        thresholds_offset_hard_hg,
        thresholds_offset_hard_mg,
        thresholds_offset_hard_lg,
    ]
 print("Will use the following hard offset thresholds")
 for name, value in zip(("High", "Medium", "Low"), thresholds_offset_hard):
    print(f"- {name} gain: {value}")

 if thresholds_noise_hard != [0, 0]:
    thresholds_noise_hard = [thresholds_noise_hard] * 3
 else:
    thresholds_noise_hard = [
        thresholds_noise_hard_hg,
        thresholds_noise_hard_mg,
        thresholds_noise_hard_lg,
    ]
 ```

 %% Cell type:markdown id: tags:

 The following lines will create a queue of files which will the be executed module-parallel. Distiguishing between different gains.

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 os.makedirs(out_folder, exist_ok=True)
 gain_mapped_files, total_files, total_file_size = map_gain_stages(
    in_folder, offset_runs, path_template, karabo_da, sequences
 )
 print(f"Will process a total of {total_files} files ({total_file_size:.02f} GB).")

 inp = []
 for gain_index, (gain, qm_file_map) in enumerate(gain_mapped_files.items()):
    for module_index in modules:
        qm = module_index_to_qm(module_index)
        if qm not in qm_file_map:
            print(f"Did not find files for {qm}")
            continue
        file_queue = qm_file_map[qm]
        while not file_queue.empty():
            filename = file_queue.get()
-            print(f"Process {filename} for {qm}")
-            inp.append((filename, module_index, gain_index))
+            # TODO: remove after using EXtra-data to read files
+            # and skip empty trains.
+            with h5py.File(filename, "r") as fin:
+                if fin[h5path.format(module_index)+"/trainId"].shape[0] != 0:
+                    print(f"Process {filename} for {qm}")
+                    inp.append((filename, module_index, gain_index))
+                else:
+                    print(f"Do not process {filename} because it is empty.")
 ```

 %% Cell type:markdown id: tags:

 ## Calculate Offsets, Noise and Thresholds ##

 The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array.

 %% Cell type:code id: tags:

 ``` python
 # min() only relevant if running on multiple modules (i.e. within notebook)
 parallel_num_procs = min(12, total_files)
 parallel_num_threads = multiprocessing.cpu_count() // parallel_num_procs
 print(f"Will use {parallel_num_procs} processes with {parallel_num_threads} threads each")


 def characterize_module(
    fast_data_filename: str, channel: int, gain_index: int
 ) -> Tuple[np.array, np.array, np.array, np.array, int, np.array, int, float]:
    if max_cells == 0:
        num_cells = get_num_cells(fast_data_filename, karabo_id, channel)
    else:
        num_cells = max_cells

    if acq_rate == 0.:
        slow_paths = control_names[gain_index], karabo_id_control
        fast_paths = fast_data_filename, karabo_id, channel
        local_acq_rate = get_acq_rate(fast_paths, slow_paths)
    else:
        local_acq_rate = acq_rate

    local_thresholds_offset_hard = thresholds_offset_hard[gain_index]
    local_thresholds_noise_hard = thresholds_noise_hard[gain_index]

    h5path_f = h5path.format(channel)
    h5path_idx_f = h5path_idx.format(channel)

    with h5py.File(fast_data_filename, "r") as infile:
        if rawversion == 2:
            count = np.squeeze(infile[f"{h5path_idx_f}/count"])
            first = np.squeeze(infile[f"{h5path_idx_f}/first"])
            last_index = int(first[count != 0][-1]+count[count != 0][-1])
            first_index = int(first[count != 0][0])
        else:
            status = np.squeeze(infile[f"{h5path_idx_f}/status"])
            if np.count_nonzero(status != 0) == 0:
                return
            last = np.squeeze(infile[f"{h5path_idx_f}/last"])
            first = np.squeeze(infile[f"{h5path_idx_f}/first"])
            last_index = int(last[status != 0][-1]) + 1
            first_index = int(first[status != 0][0])
        im = np.array(infile[f"{h5path_f}/data"][first_index:last_index,...])
        cell_ids = np.squeeze(infile[f"{h5path_f}/cellId"][first_index:last_index,...])

    if interlaced:
        if not fixed_gain_mode:
            ga = im[1::2, 0, ...]
        im = im[0::2, 0, ...].astype(np.float32)
        cell_ids = cell_ids[::2]
    else:
        if not fixed_gain_mode:
            ga = im[:, 1, ...]
        im = im[:, 0, ...].astype(np.float32)

    im = np.transpose(im)
    if not fixed_gain_mode:
        ga = np.transpose(ga)

    context = psh.context.ThreadContext(num_workers=parallel_num_threads)
    offset = context.alloc(shape=(im.shape[0], im.shape[1], num_cells), dtype=np.float64)
    noise = context.alloc(like=offset)

    if fixed_gain_mode:
        gains = None
        gains_std = None
    else:
        gains = context.alloc(like=offset)
        gains_std = context.alloc(like=offset)

    def process_cell(worker_id, array_index, cell_number):
        cell_slice_index = (cell_ids == cell_number)
        im_slice = im[..., cell_slice_index]
        offset[..., cell_number] = np.median(im_slice, axis=2)
        noise[..., cell_number] = np.std(im_slice, axis=2)
        if not fixed_gain_mode:
            ga_slice = ga[..., cell_slice_index]
            gains[..., cell_number] = np.median(ga_slice, axis=2)
            gains_std[..., cell_number] = np.std(ga_slice, axis=2)

    context.map(process_cell, np.unique(cell_ids))

    # bad pixels
    bp = np.zeros_like(offset, dtype=np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0,1))
    offset_std = np.nanstd(offset, axis=(0,1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD
    bp[(offset < local_thresholds_offset_hard[0]) |
       (offset > local_thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0,1))
    noise_std = np.nanstd(noise, axis=(0,1))
    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD
    bp[(noise < local_thresholds_noise_hard[0]) | (noise > local_thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR

    return offset, noise, gains, gains_std, bp, num_cells, local_acq_rate
 ```

 %% Cell type:code id: tags:

 ``` python
 with multiprocessing.Pool(processes=parallel_num_procs) as pool:
    results = pool.starmap(characterize_module, inp)
 ```

 %% Cell type:code id: tags:

 ``` python
 offset_g = OrderedDict()
 noise_g = OrderedDict()
 badpix_g = OrderedDict()
 if not fixed_gain_mode:
    gain_g = OrderedDict()
    gainstd_g = OrderedDict()

 all_cells = []
 all_acq_rate = []

 for (_, module_index, gain_index), (offset, noise, gains, gains_std, bp,
                                    thiscell, thisacq) in zip(inp, results):
    all_cells.append(thiscell)
    all_acq_rate.append(thisacq)
    qm = module_index_to_qm(module_index)
    if qm not in offset_g:
        offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2], 3))
        noise_g[qm] = np.zeros_like(offset_g[qm])
        badpix_g[qm] = np.zeros_like(offset_g[qm], np.uint32)
        if not fixed_gain_mode:
            gain_g[qm] = np.zeros_like(offset_g[qm])
            gainstd_g[qm] = np.zeros_like(offset_g[qm])

    offset_g[qm][..., gain_index] = offset
    noise_g[qm][..., gain_index] = noise
    badpix_g[qm][..., gain_index] = bp
    if not fixed_gain_mode:
        gain_g[qm][..., gain_index] = gains
        gainstd_g[qm][..., gain_index] = gains_std


 max_cells = np.max(all_cells)
 print(f"Using {max_cells} memory cells")
 acq_rate = np.max(all_acq_rate)
 print(f"Using {acq_rate} MHz acquisition rate")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Add bad pixels due to bad gain separation
 if not fixed_gain_mode:
    for qm in gain_g.keys():
        for g in range(2):
            # Bad pixels during bad gain separation.
            # Fraction of pixels in the module with separation lower than "thresholds_gain_sigma".
            bad_sep = (gain_g[qm][..., g+1] - gain_g[qm][..., g]) / \
                np.sqrt(gainstd_g[qm][..., g+1]**2 + gainstd_g[qm][..., g]**2)
            badpix_g[qm][...,g+1][bad_sep<thresholds_gain_sigma] |= \
                BadPixels.GAIN_THRESHOLDING_ERROR
 ```

 %% Cell type:markdown id: tags:

 The thresholds for gain switching are then defined as the mean value between in individual gain bit levels. Note that these thresholds need to be refined with charge induced thresholds, as the two are not the same.

 %% Cell type:code id: tags:

 ``` python
 if not fixed_gain_mode:
    thresholds_g = {}
    for qm in gain_g.keys():
        thresholds_g[qm] = np.zeros((gain_g[qm].shape[0], gain_g[qm].shape[1], gain_g[qm].shape[2], 5))
        thresholds_g[qm][...,0] = (gain_g[qm][...,1]+gain_g[qm][...,0])/2
        thresholds_g[qm][...,1] = (gain_g[qm][...,2]+gain_g[qm][...,1])/2
        for i in range(3):
            thresholds_g[qm][...,2+i] = gain_g[qm][...,i]
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()
 for i in modules:
    qm = module_index_to_qm(i)
    res[qm] = {
        'Offset': offset_g[qm],
        'Noise': noise_g[qm],
        'BadPixelsDark': badpix_g[qm]
    }
    if not fixed_gain_mode:
        res[qm]['ThresholdsDark'] = thresholds_g[qm]
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read report path and create file location tuple to add with the injection
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_low, run_med, run_high)

 report = get_report(out_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 # set the operating condition
 # note: iCalibrationDB only adds gain_mode if it is truthy, so we don't need to handle None
 condition = iCalibrationDB.Conditions.Dark.AGIPD(
    memory_cells=max_cells,
    bias_voltage=bias_voltage,
    acquisition_rate=acq_rate,
    gain_setting=gain_setting,
    gain_mode=fixed_gain_mode
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create mapping from module(s) (qm) to karabo_da(s) and PDU(s)
 qm_dict = OrderedDict()
 all_pdus = get_pdu_from_db(
    karabo_id,
    karabo_da,
    constant=iCalibrationDB.CalibrationConstant(),
    condition=condition,
    cal_db_interface=cal_db_interface,
    snapshot_at=creation_time.isoformat(),
    timeout=cal_db_timeout
 )
 for module_index, module_da, module_pdu in zip(modules, karabo_da, all_pdus):
    qm = module_index_to_qm(module_index)
    qm_dict[qm] = {
        "karabo_da": module_da,
        "db_module": module_pdu
    }
 ```

 %% Cell type:code id: tags:

 ``` python
 md = None

 for qm in res:
    db_module = qm_dict[qm]["db_module"]
    for const in res[qm]:
        dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()
        dconst.data = res[qm][const]

        if db_output:
            md = send_to_db(db_module, karabo_id, dconst, condition, file_loc,
                            report, cal_db_interface, creation_time=creation_time,
                            timeout=cal_db_timeout)

        if local_output:
            md = save_const_to_h5(db_module, karabo_id, dconst, condition, dconst.data,
                                  file_loc, report, creation_time, out_folder)
            print(f"Calibration constant {const} for {qm} is stored locally in {file_loc}.\n")

    print("Constants parameter conditions are:\n")
    print(f"• memory_cells: {max_cells}\n• bias_voltage: {bias_voltage}\n"
          f"• acquisition_rate: {acq_rate}\n• gain_setting: {gain_setting}\n"
          f"• gain_mode: {fixed_gain_mode}\n"
          f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Start retrieving existing constants for comparison
 qm_x_const = [(qm, const) for const in res[qm] for qm in res]


 def retrieve_old_constant(qm, const):
    dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()

    # This should be used in case of running notebook
    # by a different method other than myMDC which already
    # sends CalCat info.
    # TODO: Set db_module to "" by default in the first cell

    data, mdata = get_from_db(
        karabo_id=karabo_id,
        karabo_da=qm_dict[qm]["karabo_da"],
        constant=dconst,
        condition=condition,
        empty_constant=None,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time-timedelta(seconds=1),
        strategy="pdu_prior_in_time",
        verbosity=1,
        timeout=cal_db_timeout
    )

    if mdata is None or data is None:
        timestamp = "Not found"
        filepath = None
        h5path = None
    else:
        timestamp = mdata.calibration_constant_version.begin_at.isoformat()
        filepath = os.path.join(
            mdata.calibration_constant_version.hdf5path,
            mdata.calibration_constant_version.filename
        )
        h5path = mdata.calibration_constant_version.h5path

    return data, timestamp, filepath, h5path


 old_retrieval_pool = multiprocessing.Pool()
 old_retrieval_res = old_retrieval_pool.starmap_async(
    retrieve_old_constant, qm_x_const
 )
 old_retrieval_pool.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 mnames=[]
 for i in modules:
    qm = module_index_to_qm(i)
    mnames.append(qm)
    display(Markdown(f'## Position of the module {qm} and its ASICs'))
 show_processed_modules(dinstance, constants=None, mnames=mnames, mode="position")
 ```

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:markdown id: tags:

 ### High Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 0
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*offset_runs.values()))
 ```

 %% Cell type:markdown id: tags:

 ### Medium Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 1
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*offset_runs.values()))
 ```

 %% Cell type:markdown id: tags:

 ### Low Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 2
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*offset_runs.values()))
 ```

 %% Cell type:code id: tags:

 ``` python
 if high_res_badpix_3d:
    cols = {
        BadPixels.NOISE_OUT_OF_THRESHOLD: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.GAIN_THRESHOLDING_ERROR: (BadPixels.GAIN_THRESHOLDING_ERROR.name, '#FF40FF40'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD | BadPixels.NOISE_OUT_OF_THRESHOLD: ('OFFSET_OUT_OF_THRESHOLD + NOISE_OUT_OF_THRESHOLD', '#DD00DD80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD | BadPixels.NOISE_OUT_OF_THRESHOLD |
        BadPixels.GAIN_THRESHOLDING_ERROR: ('MIXED', '#BFDF009F')
    }

    display(Markdown("""

    ## Global Bad Pixel Behaviour ##

    The following plots show the results of bad pixel evaluation for all evaluated memory cells.
    Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2.
    This excludes single bad pixels present only in disconnected pixels.
    Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
    Colors encode the bad pixel type, or mixed type.

    """))

    gnames = ['High Gain', 'Medium Gain', 'Low Gain']
    for gain in range(3):
        display(Markdown(f'### {gnames[gain]} ###'))
        for mod, data in badpix_g.items():
            plot_badpix_3d(data[...,gain], cols, title=mod, rebin_fac=1)
            plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Aggregate values, and per Cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior.

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(offset_g, "Offset (ADU)", max_cells, 4000, 8000,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(noise_g, "Noise (ADU)", max_cells, 0, 100,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 if not fixed_gain_mode:
    # Plot only three gain threshold maps.
    bp_thresh = OrderedDict()
    for mod, con in badpix_g.items():
        bp_thresh[mod] = np.zeros((con.shape[0], con.shape[1], con.shape[2], 5), dtype=con.dtype)
        bp_thresh[mod][...,:2] = con[...,:2]
        bp_thresh[mod][...,2:] = con

    create_constant_overview(thresholds_g, "Threshold (ADU)", max_cells, 4000, 10000, 5,
                             badpixels=[bp_thresh, np.nan],
                             gmap=['HG-MG Threshold', 'MG-LG Threshold', 'High gain', 'Medium gain', 'low gain'],
                             marker=['d','d','','','']
                             )
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixel_aggregate_g = OrderedDict()
 for m, d in badpix_g.items():
    bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)
 create_constant_overview(bad_pixel_aggregate_g, "Bad pixel fraction", max_cells, 0, 0.10, 3)
 ```

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 # now we need the old constants
 old_const = {}
 old_mdata = {}
 old_retrieval_res.wait()

 for (qm, const), (data, timestamp, filepath, h5path) in zip(qm_x_const, old_retrieval_res.get()):
    old_const.setdefault(qm, {})[const] = data
    old_mdata.setdefault(qm, {})[const] = {
        "timestamp": timestamp,
        "filepath": filepath,
        "h5path": h5path
    }
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown("The following pre-existing constants are used for comparison:"))
 for qm, consts in old_mdata.items():
    display(Markdown(f"- {qm}"))
    for const in consts:
        display(Markdown(f"    - {const} at {consts[const]['timestamp']}"))
    # saving locations of old constants for summary notebook
    with open(f"{out_folder}/module_metadata_{qm}.yml", "w") as fd:
        yaml.safe_dump(
            {
                "module": qm,
                "pdu": qm_dict[qm]["db_module"],
                "old-constants": old_mdata[qm]
            },
            fd,
        )
 ```

 %% Cell type:code id: tags:

 ``` python
 table = []
 gain_names = ['High', 'Medium', 'Low']
 bits = [BadPixels.NOISE_OUT_OF_THRESHOLD, BadPixels.OFFSET_OUT_OF_THRESHOLD, BadPixels.OFFSET_NOISE_EVAL_ERROR, BadPixels.GAIN_THRESHOLDING_ERROR]
 for qm in badpix_g.keys():
    for gain in range(3):
        l_data = []
        l_data_old = []

        data = np.copy(badpix_g[qm][:,:,:,gain])
        datau32 = data.astype(np.uint32)
        l_data.append(len(datau32[datau32>0].flatten()))
        for bit in bits:
            l_data.append(np.count_nonzero(badpix_g[qm][:,:,:,gain] & bit))

        if old_const[qm]['BadPixelsDark'] is not None:
            dataold = np.copy(old_const[qm]['BadPixelsDark'][:, :, :, gain])
            datau32old = dataold.astype(np.uint32)
            l_data_old.append(len(datau32old[datau32old>0].flatten()))
            for bit in bits:
                l_data_old.append(np.count_nonzero(old_const[qm]['BadPixelsDark'][:, :, :, gain] & bit))

        l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',
                       'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR', 'GAIN_THRESHOLDING_ERROR']

        l_threshold = ['', f'{thresholds_noise_sigma}' f'{thresholds_noise_hard[gain]}',
                       f'{thresholds_offset_sigma}' f'{thresholds_offset_hard[gain]}',
                       '', f'{thresholds_gain_sigma}']

        for i in range(len(l_data)):
            line = [f'{l_data_name[i]}, {gain_names[gain]} gain', l_threshold[i], l_data[i]]

            if old_const[qm]['BadPixelsDark'] is not None:
                line += [l_data_old[i]]
            else:
                line += ['-']

            table.append(line)
        table.append(['', '', '', ''])

 display(Markdown('''
 ### Number of bad pixels

 One pixel can be bad for different reasons, therefore, the sum of all types of bad pixels can be more than the number of all bad pixels.

 '''))
 if len(table)>0:
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                         headers=["Pixel type", "Threshold",
                                                  "New constant", "Old constant"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 if fixed_gain_mode:
    constants = ['Offset', 'Noise']
 else:
    constants = ['Offset', 'Noise', 'ThresholdsDark']

 constants_x_qms = list(itertools.product(constants, res.keys()))


 def compute_table(const, qm):
    if const == 'ThresholdsDark':
        table = [['','HG-MG threshold', 'HG-MG threshold', 'MG-LG threshold', 'MG-LG threshold']]
    else:
        table = [['','High gain', 'High gain', 'Medium gain', 'Medium gain', 'Low gain', 'Low gain']]

    compare_with_old_constant = old_const[qm][const] is not None and \
        old_const[qm]['BadPixelsDark'] is not None

    data = np.copy(res[qm][const])

    if const == 'ThresholdsDark':
        data[...,0][res[qm]['BadPixelsDark'][...,0]>0] = np.nan
        data[...,1][res[qm]['BadPixelsDark'][...,1]>0] = np.nan
    else:
        data[res[qm]['BadPixelsDark']>0] = np.nan

    if compare_with_old_constant:
        data_old = np.copy(old_const[qm][const])
        if const == 'ThresholdsDark':
            data_old[...,0][old_const[qm]['BadPixelsDark'][...,0]>0] = np.nan
            data_old[...,1][old_const[qm]['BadPixelsDark'][...,1]>0] = np.nan
        else:
            data_old[old_const[qm]['BadPixelsDark']>0] = np.nan

    f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
    n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

    def compute_row(i):
        line = [n_list[i]]
        for gain in range(3):
            # Compare only 3 threshold gain-maps
            if gain == 2 and const == 'ThresholdsDark':
                continue
            stat_measure = f_list[i](data[...,gain])
            line.append(f"{stat_measure:6.1f}")
            if compare_with_old_constant:
                old_stat_measure = f_list[i](data_old[...,gain])
                line.append(f"{old_stat_measure:6.1f}")
            else:
                line.append("-")
        return line


    with multiprocessing.pool.ThreadPool(processes=multiprocessing.cpu_count() // len(constants_x_qms)) as pool:
        rows = pool.map(compute_row, range(len(f_list)))

    table.extend(rows)

    return table


 with multiprocessing.Pool(processes=len(constants_x_qms)) as pool:
    tables = pool.starmap(compute_table, constants_x_qms)

 for (const, qm), table in zip(constants_x_qms, tables):
    display(Markdown(f"### {qm}: {const} [ADU], good pixels only"))
    display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
 ```

--- a/notebooks/LPD/LPD_Correct_and_Verify.ipynb
+++ b/notebooks/LPD/LPD_Correct_and_Verify.ipynb
 %% Cell type:markdown id: tags:

 # LPD Offline Correction #

 Author: European XFEL Detector Group, Version: 1.0

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB" # cluster profile to use
 in_folder = "/gpfs/exfel/exp/FXE/201931/p900088/raw/" # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/karnem/test_1/lpd_correct_006" # the folder to output to, required
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 run = 270 # runs to process, required

 karabo_id = "FXE_DET_LPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = '/INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_idx = '/INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images

 use_dir_creation_date = True # use the creation date of the directory for database time derivation
 cal_db_interface = "tcp://max-exfl016:8015#8020" # the database interface to use
 cal_db_timeout = 30000 # timeout for calibration db requests in milliseconds


 calfile =  "/gpfs/exfel/data/scratch/xcal/lpd_store_0519.h5" # path to constants extracted from the db into a file
 mem_cells = 512 # memory cells in data
 overwrite = True # set to True if existing data should be overwritten
 no_relative_gain = False # do not do relative gain correction
 no_flat_fields = False # do not do flat field correction
 max_pulses = 512 # maximum number of pulses per train
 no_non_linear_corrections = False # do not apply non-linear corrections
 max_cells_db = 512 # maximum cells for data from the database
 rawversion = 2 # raw format version
 capacitor = '5pF' # capacitor setting: 5pF or 50pF
 photon_energy = 9.2 # the photon energy in keV
 nodb = False # set to true if db input is to be avoided
 bias_voltage = 250 # detector bias voltage
 geometry_file = "/gpfs/exfel/d/cal/exchange/lpdMF_00.h5" # the geometry file to use, MAR 2018
 beam_center_offset =  [1.5, 1] # offset from the beam center, MAR 2018
 sequences_per_node = 1 # sequence files to process per node

 dont_mark_non_lin_region = False # do not mark non-linear regions in BP map
 linear_between_high_gain = [-5000, 2500]  # region in which high gain is considered linear, in ADU
 linear_between_med_gain = [300, 3000]  # region in which medium gain is considered linear, in ADU
 linear_between_low_gain = [300, 3000]  # region in which low gain is considered linear, in ADU
 nlc_version = 2 # version of NLC to use

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import sys
 import warnings
 from datetime import datetime

 warnings.filterwarnings('ignore')

 max_cells = mem_cells

 if sequences[0] == -1:
    sequences = None

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(16))
    karabo_da = ['LPD{:02d}'.format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
 print("Process modules: ",
      ', '.join([f"Q{x // 4 + 1}M{x % 4 + 1}" for x in modules]))

 do_rel_gain = not no_relative_gain
 do_ff = not no_flat_fields
 index_v = rawversion

 #do_ff = False
 #relgain_store = "/gpfs/exfel/d/proc/FXE/201830/p900020/calibration/lpd_ci_store_{}_16_5pf.h5"
 print("Applying FF corrections: {}".format(do_ff))

 # make sure a cluster is running with ipcluster start --n=32, give it a while to start
 import os

 import h5py
 import matplotlib
 import numpy as np

 matplotlib.use("agg")
 from collections import OrderedDict
 from datetime import datetime

 import matplotlib.pyplot as plt
 from cal_tools.enums import BadPixels
 from cal_tools.lpdlib import LpdCorrections
 from cal_tools.plotting import create_constant_overview, plot_badpix_3d, show_overview
 from cal_tools.tools import (
    gain_map_files,
    get_constant_from_db,
    get_dir_creation_date,
    get_notebook_name,
    map_modules_from_folder,
    parse_runs,
    run_prop_seq_from_path,
 )
 from iCalibrationDB import Conditions, ConstantMetaData, Constants, Detectors, Versions
 from ipyparallel import Client

 print("Connecting to profile {}".format(cluster_profile))
 view = Client(profile=cluster_profile)[:]
 view.use_dill()
 gains = np.arange(3)
 cells = np.arange(max_cells)

 CHUNK_SIZE = 512
 MAX_PAR = 32

 if not os.path.exists(out_folder):
    os.makedirs(out_folder)
 elif not overwrite:
    raise AttributeError("Output path exists! Exiting")

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
 else:
    creation_time = datetime.now()

 print("Using {} as creation time".format(creation_time.isoformat()))

 _, proposal, seq = run_prop_seq_from_path(in_folder)
 instrument = karabo_id.split("_")[0]

 mark_non_lin_region = not dont_mark_non_lin_region
 linear_between = [linear_between_high_gain, linear_between_med_gain, linear_between_low_gain]

 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)
 mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf
 MAX_PAR = min(MAX_PAR, total_sequences)
 ```

 %% Cell type:markdown id: tags:

 ## Processed Files ##

 %% Cell type:code id: tags:

 ``` python
 import copy

 import tabulate
 from IPython.display import HTML, Latex, Markdown, display

 print("Processing a total of {} sequence files in chunks of {}".format(total_sequences, MAX_PAR))
 table = []
 mfc = copy.copy(mapped_files)
 ti = 0
 for k, files in mfc.items():
    i = 0
    while not files.empty():
        f = files.get()
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["#", "module", "# module", "file"])))
 # restore the queue
 mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)
 mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf
 ```

 %% Cell type:code id: tags:

 ``` python
 import copy
 from functools import partial


 def correct_module(max_cells, do_ff, index_v, CHUNK_SIZE, total_sequences, sequences_qm,
                   bins_gain_vs_signal, bins_signal_low_range, bins_signal_high_range, max_pulses,
                   dbparms, fileparms, nodb, no_non_linear_corrections, mark_non_lin_region, linear_between,
                   nlc_version, h5path, h5path_idx, karabo_id, inp):
    import copy
    import os
    import re
    import socket
    from datetime import datetime

    import h5py
    import numpy as np
    from cal_tools.enums import BadPixels
    from cal_tools.lpdlib import LpdCorrections

    hists_signal_low = None
    hists_signal_high = None
    hists_gain_vs_signal = None
    low_edges = None
    high_edges = None
    signal_edges = None
    when = None
    qm = None
    err = None
    try:
        start = datetime.now()
        success = True
        reason = ""
        filename, filename_out, channel, karabo_da, qm = inp

        infile = h5py.File(filename, "r", driver="core")
        outfile = h5py.File(filename_out, "w")

        # LPD correction requires path without the leading "/""
        if h5path[0] == '/':
            h5path = h5path[1:]
        if h5path_idx[0] == '/':
            h5path_idx = h5path_idx[1:]

        try:
            lpd_corr = LpdCorrections(infile, outfile, max_cells, channel, max_pulses,
                                      bins_gain_vs_signal, bins_signal_low_range,
                                      bins_signal_high_range, do_ff=do_ff, raw_fmt_version=index_v,
                                      correct_non_linear=(not no_non_linear_corrections),
                                      mark_non_lin_region=mark_non_lin_region, linear_between=linear_between,
                                      nlc_version=nlc_version,
                                      h5_data_path=h5path, h5_index_path=h5path_idx)


            try:
                lpd_corr.get_valid_image_idx()
            except IOError:
                return
            if not nodb:
                when = lpd_corr.initialize_from_db(dbparms, karabo_id, karabo_da, only_dark=(fileparms != ""))
                print(when)
            if fileparms != "":
                lpd_corr.initialize_from_file(fileparms, qm, with_dark=nodb)
            print("Initialized constants")

            for irange in lpd_corr.get_iteration_range():
                lpd_corr.correct_lpd(irange)

            print("All interations finished")
            hists, edges = lpd_corr.get_histograms()
            hists_signal_low, hists_signal_high, hists_gain_vs_signal = hists
            low_edges, high_edges, signal_edges = edges
            outfile.close()
            infile.close()
            print("Closed files")
        except Exception as e1:
            err = e1
            outfile.close()
            infile.close()

    except Exception as e:
        print(e)
        success = False
        reason = "Error"
        err = e

    return (hists_signal_low, hists_signal_high, hists_gain_vs_signal, low_edges,
            high_edges, signal_edges, when, qm, err)

 done = False
 first_files = []
 inp = []
 left = total_sequences

 bins_gain_vs_signal = (100, 4)
 bins_signal_low_range = 100
 bins_signal_high_range = 100
 hists_signal_low =  np.zeros((bins_signal_low_range, max_pulses), np.float64)
 hists_signal_high =  np.zeros((bins_signal_high_range, max_pulses), np.float64)
 hists_gain_vs_signal =  np.zeros((bins_gain_vs_signal), np.float64)
 low_edges, high_edges, signal_edges = None, None, None
 dbparms = cal_db_interface, creation_time, max_cells_db, capacitor, bias_voltage, photon_energy, cal_db_timeout
 fileparms = calfile

 whens = {}

 while not done:

    dones = []
    first = True
    for i, k_da in zip(modules, karabo_da):
        qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
        if qm in mapped_files and not mapped_files[qm].empty():
            fname_in = str(mapped_files[qm].get())
            dones.append(mapped_files[qm].empty())
        else:
            print("Skipping {}".format(qm))
            first_files.append((None, None))
            continue
        fout = os.path.abspath("{}/{}".format(out_folder, (os.path.split(fname_in)[-1]).replace("RAW", "CORR")))
        if first:
            first_files.append((fname_in, fout))
        inp.append((fname_in, fout, i, k_da, qm))
    first = False
    if len(inp) >= min(MAX_PAR, left):
        print("Running {} tasks parallel".format(len(inp)))
        p = partial(correct_module, max_cells, do_ff, index_v, CHUNK_SIZE, total_sequences, sequences_qm,
                   bins_gain_vs_signal, bins_signal_low_range, bins_signal_high_range, max_pulses, dbparms,
                   fileparms, nodb, no_non_linear_corrections, mark_non_lin_region, linear_between, nlc_version,
                   h5path, h5path_idx, karabo_id)

        r = view.map_sync(p, inp)
        #r = list(map(p, inp))
        inp = []
        left -= MAX_PAR

        for rr in r:
            if rr is not None:
                hl, hh, hg, low_edges, high_edges, signal_edges, when, qm, err = rr
                whens[qm] = {}
                whens[qm]['when'] = when
                whens[qm]['err'] = err
                if hl is not None:  # any one being None will also make the others None
                    hists_signal_low += hl.astype(np.float64)
                    hists_signal_high += hh.astype(np.float64)
                    hists_gain_vs_signal += hg.astype(np.float64)


    done = all(dones)
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Offset was injected on: ")
 for k, v in whens.items():
    if v['err'] is None:
        print("{}: {}".format(k, v['when']))
    else:
        print("{}: {}: {}".format(k, v['when'], v['err']))
 ```

 %% Cell type:code id: tags:

 ``` python
 import matplotlib.pyplot as plt
 import numpy as np
 from matplotlib import cm
 from matplotlib.ticker import FormatStrFormatter, LinearLocator
 from mpl_toolkits.mplot3d import Axes3D

 %matplotlib inline
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10,10))
    ax = fig.gca(projection='3d')

    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)

    Z = data.T

    # Plot the surface.
    surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,
                           linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")
 ```

 %% Cell type:markdown id: tags:

 ## Signal vs. Analogue Gain ##

 The following plot shows plots signal vs. gain for the first 1280 images.

 %% Cell type:code id: tags:

 ``` python
 do_3d_plot(hists_gain_vs_signal, signal_edges, "Signal (ADU)", "Gain Bit Value")
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis):
    from matplotlib.colors import LogNorm
    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1,:], extent=extent, aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(data)))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")


 do_2d_plot(hists_gain_vs_signal, signal_edges, "Signal (ADU)", "Gain Bit Value")
 ```

 %% Cell type:markdown id: tags:

 ## Mean Intensity per Pulse ##

 The following plots show the mean signal for each pulse in a detailed and expanded intensity region.

 %% Cell type:code id: tags:

 ``` python
 do_3d_plot(hists_signal_low, low_edges, "Signal (ADU)", "Pulse id")
 do_2d_plot(hists_signal_low, low_edges, "Signal (ADU)", "Pulse id")
 do_3d_plot(hists_signal_high, high_edges, "Signal (ADU)", "Pulse id")
 do_2d_plot(hists_signal_high, high_edges, "Signal (ADU)", "Pulse id")
 ```

 %% Cell type:markdown id: tags:

 ## Data Preview ##

 In the following geometry information from the LPD geometry file is applied. Quadrants are positioned to last known position. No bad pixel masking has been performed.

 %% Cell type:code id: tags:

 ``` python
 # geometry information
 dc = beam_center_offset
 #d_quads = [(-14+dc[0],-300+dc[1]),(10+dc[0],-9+dc[1]),(-256+dc[0],15+dc[1]),(-280+dc[0],-276+dc[1])] # MAR 2018
 d_quads = [(-19+dc[0],-300+dc[1]),(10+dc[0],-9+dc[1]),(-256+dc[0],19+dc[1]),(-285+dc[0],-271+dc[1])]  # MAY 2019

 import cal_tools.metrology as metro

 in_files = "{}/CORR*LPD*S{:05d}*.h5".format(out_folder, sequences[0] if sequences else 0)
-datapath = "{}/data".format(h5path)
+datapath = "{}/image/data".format(h5path)
 print("Preview is from {}".format(in_files))
 ```

 %% Cell type:code id: tags:

 ``` python
 posarr = metro.positionFileList(in_files, datapath, geometry_file, d_quads, nImages = 10)
-maskpath = "{}/mask".format(h5path)
+maskpath = "{}/image/mask".format(h5path)
 maskedarr = metro.positionFileList(in_files, maskpath, geometry_file, d_quads, nImages = 10)
 ```

 %% Cell type:code id: tags:

 ``` python
 # convert the Carthesian coordinates of the detector to polar coordinates
 def mod_cart_to_pol(d, dx, dy, filter_by_val=True):
    """ Convert carthesian coords to polar coords
    """
    cx, cy = d.shape
    x = np.arange(cx)+dx
    y = np.arange(cy)+dy
    x = np.repeat(x[:,None], cy, axis=1)
    y = np.repeat(y[None,:], cx, axis=0)

    rho = np.sqrt(x**2 + y**2).flatten()
    phi = np.arctan2(y, x).flatten()
    flat = d.flatten()

    # we also perform a bit of filtering here

    if filter_by_val:
        good = np.isfinite(flat) & (flat > 0) & (flat < 1e5)

        return rho[good], phi[good], flat[good], good

    return rho, phi, flat, None
 ```

 %% Cell type:markdown id: tags:

 ### Single Short Preview ###

 A single shot image from cell 5 of the first train

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(15,15))
 ax = fig.add_subplot(111)
 parr = posarr[5,...]
 im=ax.imshow((parr),  vmin=0, vmax=max(10*np.median(parr[parr > 0]), 100))
 cb = fig.colorbar(im)
 cb.set_label("Intensity (ADU")
 ```

 %% Cell type:markdown id: tags:

 ### Pixel Mean Preview ###

 The per pixel mean value of the first 100 images

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(15,15))
 ax = fig.add_subplot(111)
 parr = np.mean(posarr, axis=0)
 im=ax.imshow((parr),  vmin=0, vmax=max(10*np.median(parr[parr > 0]), 100))
 cb = fig.colorbar(im)
 cb.set_label("Intensity (ADU")
 ```

 %% Cell type:markdown id: tags:

 ### Radial Profile ###

 The simple azimuthley integrated profile plotted assumes the beam centered in the hole, it is thus not always fully accurate.

 %% Cell type:code id: tags:

 ``` python
 # Here we create histograms of the data in a polar coordinate system.
 # We use numpys hist2d function, giving it the polar coordinates of
 # each pixels, and weighing that coordinate with the pixel's value.
 # We obtain a histogram for each module, according to its position defined
 # in the coord_list.
 from scipy.stats import binned_statistic_2d

 hs = []
 bins_nums = []
 edges = []

 goods = []
 bins = 5000

 dx, dy = -750, -750

 rho, phi, weights, good = mod_cart_to_pol(np.mean(posarr, axis=0), dy, dx, False)
 #h, phi_edges, rho_edges = np.histogram2d(phi, rho, bins=(1000,1000),
 #                                         range=((-np.pi, np.pi), (0, 1000)),
 #                                         weights=weights)
 h, phi_edges, rho_edges, bns = binned_statistic_2d(phi, rho, weights, bins=(bins,bins),
                                                       range=((-np.pi, np.pi), (0, 1000)),
                                                       statistic = "sum")
 bins_nums.append(bns)
 hs.append(h)
 edges.append((phi_edges, rho_edges))
 goods.append(good)
 ```

 %% Cell type:code id: tags:

 ``` python
 x = np.arange(bins)/bins*1000*500e-6
 y = np.arange(bins)/bins*2.
 ds = np.array(hs).sum(axis=0)
 ```

 %% Cell type:code id: tags:

 ``` python
 # With appropriate coordinates given, plotting a profile along the
 # vertical axis should give us the positions of the diffraction peaks,
 # Here still as distances on the detector plane. With knowledge of the
 # detector to sample distance, these could then be converted in
 # reciprocal coordinates.
 ds[ds == 0] = np.nan
 profile = np.nanmedian(ds, axis=0)
 fig = plt.figure(figsize=(15,5))
 ax = fig.add_subplot(111)
 p = ax.plot(x, profile)
 l =ax.set_ylabel("Median intensity (arb. units)")
 l = ax.set_xlabel("Radial distance (arb. units)")
 ```

 %% Cell type:markdown id: tags:

 ## Maxium Gain Value Reached ##

 The following plot shows the maximum gain value reached. It can be used as an indication of whether the detector went into saturation.

 %% Cell type:code id: tags:

 ``` python
 gainpath = "{}/gain".format(h5path)
 posarr = metro.positionFileList(in_files, gainpath, geometry_file, d_quads, nImages = 100)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(15,15))
 ax = fig.add_subplot(111)
 parr = np.max(posarr, axis=0)
 im=ax.imshow((parr),  vmin=0, vmax=3)
 cb = fig.colorbar(im)
 cb.set_label("Intensity (ADU")
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:markdown id: tags:

 # LPD Offline Correction #

 Author: European XFEL Detector Group, Version: 1.0

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB" # cluster profile to use
 in_folder = "/gpfs/exfel/exp/FXE/201931/p900088/raw/" # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/karnem/test_1/lpd_correct_006" # the folder to output to, required
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 run = 270 # runs to process, required

 karabo_id = "FXE_DET_LPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = '/INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_idx = '/INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images

 use_dir_creation_date = True # use the creation date of the directory for database time derivation
 cal_db_interface = "tcp://max-exfl016:8015#8020" # the database interface to use
 cal_db_timeout = 30000 # timeout for calibration db requests in milliseconds


 calfile =  "/gpfs/exfel/data/scratch/xcal/lpd_store_0519.h5" # path to constants extracted from the db into a file
 mem_cells = 512 # memory cells in data
 overwrite = True # set to True if existing data should be overwritten
 no_relative_gain = False # do not do relative gain correction
 no_flat_fields = False # do not do flat field correction
 max_pulses = 512 # maximum number of pulses per train
 no_non_linear_corrections = False # do not apply non-linear corrections
 max_cells_db = 512 # maximum cells for data from the database
 rawversion = 2 # raw format version
 capacitor = '5pF' # capacitor setting: 5pF or 50pF
 photon_energy = 9.2 # the photon energy in keV
 nodb = False # set to true if db input is to be avoided
 bias_voltage = 250 # detector bias voltage
 geometry_file = "/gpfs/exfel/d/cal/exchange/lpdMF_00.h5" # the geometry file to use, MAR 2018
 beam_center_offset =  [1.5, 1] # offset from the beam center, MAR 2018
 sequences_per_node = 1 # sequence files to process per node

 dont_mark_non_lin_region = False # do not mark non-linear regions in BP map
 linear_between_high_gain = [-5000, 2500]  # region in which high gain is considered linear, in ADU
 linear_between_med_gain = [300, 3000]  # region in which medium gain is considered linear, in ADU
 linear_between_low_gain = [300, 3000]  # region in which low gain is considered linear, in ADU
 nlc_version = 2 # version of NLC to use

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import sys
 import warnings
 from datetime import datetime

 warnings.filterwarnings('ignore')

 max_cells = mem_cells

 if sequences[0] == -1:
    sequences = None

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(16))
    karabo_da = ['LPD{:02d}'.format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
 print("Process modules: ",
      ', '.join([f"Q{x // 4 + 1}M{x % 4 + 1}" for x in modules]))

 do_rel_gain = not no_relative_gain
 do_ff = not no_flat_fields
 index_v = rawversion

 #do_ff = False
 #relgain_store = "/gpfs/exfel/d/proc/FXE/201830/p900020/calibration/lpd_ci_store_{}_16_5pf.h5"
 print("Applying FF corrections: {}".format(do_ff))

 # make sure a cluster is running with ipcluster start --n=32, give it a while to start
 import os

 import h5py
 import matplotlib
 import numpy as np

 matplotlib.use("agg")
 from collections import OrderedDict
 from datetime import datetime

 import matplotlib.pyplot as plt
 from cal_tools.enums import BadPixels
 from cal_tools.lpdlib import LpdCorrections
 from cal_tools.plotting import create_constant_overview, plot_badpix_3d, show_overview
 from cal_tools.tools import (
    gain_map_files,
    get_constant_from_db,
    get_dir_creation_date,
    get_notebook_name,
    map_modules_from_folder,
    parse_runs,
    run_prop_seq_from_path,
 )
 from iCalibrationDB import Conditions, ConstantMetaData, Constants, Detectors, Versions
 from ipyparallel import Client

 print("Connecting to profile {}".format(cluster_profile))
 view = Client(profile=cluster_profile)[:]
 view.use_dill()
 gains = np.arange(3)
 cells = np.arange(max_cells)

 CHUNK_SIZE = 512
 MAX_PAR = 32

 if not os.path.exists(out_folder):
    os.makedirs(out_folder)
 elif not overwrite:
    raise AttributeError("Output path exists! Exiting")

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
 else:
    creation_time = datetime.now()

 print("Using {} as creation time".format(creation_time.isoformat()))

 _, proposal, seq = run_prop_seq_from_path(in_folder)
 instrument = karabo_id.split("_")[0]

 mark_non_lin_region = not dont_mark_non_lin_region
 linear_between = [linear_between_high_gain, linear_between_med_gain, linear_between_low_gain]

 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)
 mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf
 MAX_PAR = min(MAX_PAR, total_sequences)
 ```

 %% Cell type:markdown id: tags:

 ## Processed Files ##

 %% Cell type:code id: tags:

 ``` python
 import copy

 import tabulate
 from IPython.display import HTML, Latex, Markdown, display

 print("Processing a total of {} sequence files in chunks of {}".format(total_sequences, MAX_PAR))
 table = []
 mfc = copy.copy(mapped_files)
 ti = 0
 for k, files in mfc.items():
    i = 0
    while not files.empty():
        f = files.get()
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["#", "module", "# module", "file"])))
 # restore the queue
 mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)
 mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf
 ```

 %% Cell type:code id: tags:

 ``` python
 import copy
 from functools import partial


 def correct_module(max_cells, do_ff, index_v, CHUNK_SIZE, total_sequences, sequences_qm,
                   bins_gain_vs_signal, bins_signal_low_range, bins_signal_high_range, max_pulses,
                   dbparms, fileparms, nodb, no_non_linear_corrections, mark_non_lin_region, linear_between,
                   nlc_version, h5path, h5path_idx, karabo_id, inp):
    import copy
    import os
    import re
    import socket
    from datetime import datetime

    import h5py
    import numpy as np
    from cal_tools.enums import BadPixels
    from cal_tools.lpdlib import LpdCorrections

    hists_signal_low = None
    hists_signal_high = None
    hists_gain_vs_signal = None
    low_edges = None
    high_edges = None
    signal_edges = None
    when = None
    qm = None
    err = None
    try:
        start = datetime.now()
        success = True
        reason = ""
        filename, filename_out, channel, karabo_da, qm = inp

        infile = h5py.File(filename, "r", driver="core")
        outfile = h5py.File(filename_out, "w")

        # LPD correction requires path without the leading "/""
        if h5path[0] == '/':
            h5path = h5path[1:]
        if h5path_idx[0] == '/':
            h5path_idx = h5path_idx[1:]

        try:
            lpd_corr = LpdCorrections(infile, outfile, max_cells, channel, max_pulses,
                                      bins_gain_vs_signal, bins_signal_low_range,
                                      bins_signal_high_range, do_ff=do_ff, raw_fmt_version=index_v,
                                      correct_non_linear=(not no_non_linear_corrections),
                                      mark_non_lin_region=mark_non_lin_region, linear_between=linear_between,
                                      nlc_version=nlc_version,
                                      h5_data_path=h5path, h5_index_path=h5path_idx)


            try:
                lpd_corr.get_valid_image_idx()
            except IOError:
                return
            if not nodb:
                when = lpd_corr.initialize_from_db(dbparms, karabo_id, karabo_da, only_dark=(fileparms != ""))
                print(when)
            if fileparms != "":
                lpd_corr.initialize_from_file(fileparms, qm, with_dark=nodb)
            print("Initialized constants")

            for irange in lpd_corr.get_iteration_range():
                lpd_corr.correct_lpd(irange)

            print("All interations finished")
            hists, edges = lpd_corr.get_histograms()
            hists_signal_low, hists_signal_high, hists_gain_vs_signal = hists
            low_edges, high_edges, signal_edges = edges
            outfile.close()
            infile.close()
            print("Closed files")
        except Exception as e1:
            err = e1
            outfile.close()
            infile.close()

    except Exception as e:
        print(e)
        success = False
        reason = "Error"
        err = e

    return (hists_signal_low, hists_signal_high, hists_gain_vs_signal, low_edges,
            high_edges, signal_edges, when, qm, err)

 done = False
 first_files = []
 inp = []
 left = total_sequences

 bins_gain_vs_signal = (100, 4)
 bins_signal_low_range = 100
 bins_signal_high_range = 100
 hists_signal_low =  np.zeros((bins_signal_low_range, max_pulses), np.float64)
 hists_signal_high =  np.zeros((bins_signal_high_range, max_pulses), np.float64)
 hists_gain_vs_signal =  np.zeros((bins_gain_vs_signal), np.float64)
 low_edges, high_edges, signal_edges = None, None, None
 dbparms = cal_db_interface, creation_time, max_cells_db, capacitor, bias_voltage, photon_energy, cal_db_timeout
 fileparms = calfile

 whens = {}

 while not done:

    dones = []
    first = True
    for i, k_da in zip(modules, karabo_da):
        qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
        if qm in mapped_files and not mapped_files[qm].empty():
            fname_in = str(mapped_files[qm].get())
            dones.append(mapped_files[qm].empty())
        else:
            print("Skipping {}".format(qm))
            first_files.append((None, None))
            continue
        fout = os.path.abspath("{}/{}".format(out_folder, (os.path.split(fname_in)[-1]).replace("RAW", "CORR")))
        if first:
            first_files.append((fname_in, fout))
        inp.append((fname_in, fout, i, k_da, qm))
    first = False
    if len(inp) >= min(MAX_PAR, left):
        print("Running {} tasks parallel".format(len(inp)))
        p = partial(correct_module, max_cells, do_ff, index_v, CHUNK_SIZE, total_sequences, sequences_qm,
                   bins_gain_vs_signal, bins_signal_low_range, bins_signal_high_range, max_pulses, dbparms,
                   fileparms, nodb, no_non_linear_corrections, mark_non_lin_region, linear_between, nlc_version,
                   h5path, h5path_idx, karabo_id)

        r = view.map_sync(p, inp)
        #r = list(map(p, inp))
        inp = []
        left -= MAX_PAR

        for rr in r:
            if rr is not None:
                hl, hh, hg, low_edges, high_edges, signal_edges, when, qm, err = rr
                whens[qm] = {}
                whens[qm]['when'] = when
                whens[qm]['err'] = err
                if hl is not None:  # any one being None will also make the others None
                    hists_signal_low += hl.astype(np.float64)
                    hists_signal_high += hh.astype(np.float64)
                    hists_gain_vs_signal += hg.astype(np.float64)


    done = all(dones)
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Offset was injected on: ")
 for k, v in whens.items():
    if v['err'] is None:
        print("{}: {}".format(k, v['when']))
    else:
        print("{}: {}: {}".format(k, v['when'], v['err']))
 ```

 %% Cell type:code id: tags:

 ``` python
 import matplotlib.pyplot as plt
 import numpy as np
 from matplotlib import cm
 from matplotlib.ticker import FormatStrFormatter, LinearLocator
 from mpl_toolkits.mplot3d import Axes3D

 %matplotlib inline
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10,10))
    ax = fig.gca(projection='3d')

    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)

    Z = data.T

    # Plot the surface.
    surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,
                           linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")
 ```

 %% Cell type:markdown id: tags:

 ## Signal vs. Analogue Gain ##

 The following plot shows plots signal vs. gain for the first 1280 images.

 %% Cell type:code id: tags:

 ``` python
 do_3d_plot(hists_gain_vs_signal, signal_edges, "Signal (ADU)", "Gain Bit Value")
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis):
    from matplotlib.colors import LogNorm
    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1,:], extent=extent, aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(data)))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")


 do_2d_plot(hists_gain_vs_signal, signal_edges, "Signal (ADU)", "Gain Bit Value")
 ```

 %% Cell type:markdown id: tags:

 ## Mean Intensity per Pulse ##

 The following plots show the mean signal for each pulse in a detailed and expanded intensity region.

 %% Cell type:code id: tags:

 ``` python
 do_3d_plot(hists_signal_low, low_edges, "Signal (ADU)", "Pulse id")
 do_2d_plot(hists_signal_low, low_edges, "Signal (ADU)", "Pulse id")
 do_3d_plot(hists_signal_high, high_edges, "Signal (ADU)", "Pulse id")
 do_2d_plot(hists_signal_high, high_edges, "Signal (ADU)", "Pulse id")
 ```

 %% Cell type:markdown id: tags:

 ## Data Preview ##

 In the following geometry information from the LPD geometry file is applied. Quadrants are positioned to last known position. No bad pixel masking has been performed.

 %% Cell type:code id: tags:

 ``` python
 # geometry information
 dc = beam_center_offset
 #d_quads = [(-14+dc[0],-300+dc[1]),(10+dc[0],-9+dc[1]),(-256+dc[0],15+dc[1]),(-280+dc[0],-276+dc[1])] # MAR 2018
 d_quads = [(-19+dc[0],-300+dc[1]),(10+dc[0],-9+dc[1]),(-256+dc[0],19+dc[1]),(-285+dc[0],-271+dc[1])]  # MAY 2019

 import cal_tools.metrology as metro

 in_files = "{}/CORR*LPD*S{:05d}*.h5".format(out_folder, sequences[0] if sequences else 0)
-datapath = "{}/data".format(h5path)
+datapath = "{}/image/data".format(h5path)
 print("Preview is from {}".format(in_files))
 ```

 %% Cell type:code id: tags:

 ``` python
 posarr = metro.positionFileList(in_files, datapath, geometry_file, d_quads, nImages = 10)
-maskpath = "{}/mask".format(h5path)
+maskpath = "{}/image/mask".format(h5path)
 maskedarr = metro.positionFileList(in_files, maskpath, geometry_file, d_quads, nImages = 10)
 ```

 %% Cell type:code id: tags:

 ``` python
 # convert the Carthesian coordinates of the detector to polar coordinates
 def mod_cart_to_pol(d, dx, dy, filter_by_val=True):
    """ Convert carthesian coords to polar coords
    """
    cx, cy = d.shape
    x = np.arange(cx)+dx
    y = np.arange(cy)+dy
    x = np.repeat(x[:,None], cy, axis=1)
    y = np.repeat(y[None,:], cx, axis=0)

    rho = np.sqrt(x**2 + y**2).flatten()
    phi = np.arctan2(y, x).flatten()
    flat = d.flatten()

    # we also perform a bit of filtering here

    if filter_by_val:
        good = np.isfinite(flat) & (flat > 0) & (flat < 1e5)

        return rho[good], phi[good], flat[good], good

    return rho, phi, flat, None
 ```

 %% Cell type:markdown id: tags:

 ### Single Short Preview ###

 A single shot image from cell 5 of the first train

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(15,15))
 ax = fig.add_subplot(111)
 parr = posarr[5,...]
 im=ax.imshow((parr),  vmin=0, vmax=max(10*np.median(parr[parr > 0]), 100))
 cb = fig.colorbar(im)
 cb.set_label("Intensity (ADU")
 ```

 %% Cell type:markdown id: tags:

 ### Pixel Mean Preview ###

 The per pixel mean value of the first 100 images

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(15,15))
 ax = fig.add_subplot(111)
 parr = np.mean(posarr, axis=0)
 im=ax.imshow((parr),  vmin=0, vmax=max(10*np.median(parr[parr > 0]), 100))
 cb = fig.colorbar(im)
 cb.set_label("Intensity (ADU")
 ```

 %% Cell type:markdown id: tags:

 ### Radial Profile ###

 The simple azimuthley integrated profile plotted assumes the beam centered in the hole, it is thus not always fully accurate.

 %% Cell type:code id: tags:

 ``` python
 # Here we create histograms of the data in a polar coordinate system.
 # We use numpys hist2d function, giving it the polar coordinates of
 # each pixels, and weighing that coordinate with the pixel's value.
 # We obtain a histogram for each module, according to its position defined
 # in the coord_list.
 from scipy.stats import binned_statistic_2d

 hs = []
 bins_nums = []
 edges = []

 goods = []
 bins = 5000

 dx, dy = -750, -750

 rho, phi, weights, good = mod_cart_to_pol(np.mean(posarr, axis=0), dy, dx, False)
 #h, phi_edges, rho_edges = np.histogram2d(phi, rho, bins=(1000,1000),
 #                                         range=((-np.pi, np.pi), (0, 1000)),
 #                                         weights=weights)
 h, phi_edges, rho_edges, bns = binned_statistic_2d(phi, rho, weights, bins=(bins,bins),
                                                       range=((-np.pi, np.pi), (0, 1000)),
                                                       statistic = "sum")
 bins_nums.append(bns)
 hs.append(h)
 edges.append((phi_edges, rho_edges))
 goods.append(good)
 ```

 %% Cell type:code id: tags:

 ``` python
 x = np.arange(bins)/bins*1000*500e-6
 y = np.arange(bins)/bins*2.
 ds = np.array(hs).sum(axis=0)
 ```

 %% Cell type:code id: tags:

 ``` python
 # With appropriate coordinates given, plotting a profile along the
 # vertical axis should give us the positions of the diffraction peaks,
 # Here still as distances on the detector plane. With knowledge of the
 # detector to sample distance, these could then be converted in
 # reciprocal coordinates.
 ds[ds == 0] = np.nan
 profile = np.nanmedian(ds, axis=0)
 fig = plt.figure(figsize=(15,5))
 ax = fig.add_subplot(111)
 p = ax.plot(x, profile)
 l =ax.set_ylabel("Median intensity (arb. units)")
 l = ax.set_xlabel("Radial distance (arb. units)")
 ```

 %% Cell type:markdown id: tags:

 ## Maxium Gain Value Reached ##

 The following plot shows the maximum gain value reached. It can be used as an indication of whether the detector went into saturation.

 %% Cell type:code id: tags:

 ``` python
 gainpath = "{}/gain".format(h5path)
 posarr = metro.positionFileList(in_files, gainpath, geometry_file, d_quads, nImages = 100)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(15,15))
 ax = fig.add_subplot(111)
 parr = np.max(posarr, axis=0)
 im=ax.imshow((parr),  vmin=0, vmax=3)
 cb = fig.colorbar(im)
 cb.set_label("Intensity (ADU")
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

--- a/notebooks/test/test-cli.ipynb
+++ b/notebooks/test/test-cli.ipynb
 %% Cell type:markdown id: tags:

 # Test Notebook - CLI

 Author: Robert Rosca

 Version: 0.1

 Notebook for use with the unit and continuous integration tests.

 %% Cell type:code id: tags:

 ``` 
 root = "root/path/for/nb"  # Variables included in the user notebook path must
 # be included in the notebook for reasons

 in_folder = "./" # input folder
 out_folder = "./" # output folder
 list_normal = [10] # parameterized list, range allowed
 list_intellilist = [2345] # parameterized list with ranges, range allowed
 concurrency_parameter = [1] # concurrency parameter, range allowed
 number = 0 # parameterized number
 ```

 %% Cell type:markdown id: tags:

 Tests notebook execution by just creating an empty file in the output directory.

 %% Cell type:code id: tags:

 ``` 
 from pathlib import Path
 ```

 %% Cell type:code id: tags:

 ``` 
 in_folder = Path(in_folder)

 (in_folder / "touch").touch()
 ```
+
+%% Cell type:markdown id: tags:
+
+Include some non-ascii characters to check that files are reliably processed as UTF-8. 🤖
+
+%% Cell type:code id: tags:
+
+``` 
+print("🥼")
+```

 %% Cell type:markdown id: tags:

 # Test Notebook - CLI

 Author: Robert Rosca

 Version: 0.1

 Notebook for use with the unit and continuous integration tests.

 %% Cell type:code id: tags:

 ``` 
 root = "root/path/for/nb"  # Variables included in the user notebook path must
 # be included in the notebook for reasons

 in_folder = "./" # input folder
 out_folder = "./" # output folder
 list_normal = [10] # parameterized list, range allowed
 list_intellilist = [2345] # parameterized list with ranges, range allowed
 concurrency_parameter = [1] # concurrency parameter, range allowed
 number = 0 # parameterized number
 ```

 %% Cell type:markdown id: tags:

 Tests notebook execution by just creating an empty file in the output directory.

 %% Cell type:code id: tags:

 ``` 
 from pathlib import Path
 ```

 %% Cell type:code id: tags:

 ``` 
 in_folder = Path(in_folder)

 (in_folder / "touch").touch()
 ```
+
+%% Cell type:markdown id: tags:
+
+Include some non-ascii characters to check that files are reliably processed as UTF-8. 🤖
+
+%% Cell type:code id: tags:
+
+``` 
+print("🥼")
+```

--- a/pyproject.toml
+++ b/pyproject.toml
 [build-system]
-requires = ["cython==0.29.21", "numpy==1.19.1", "setuptools>=40.8.0", "wheel"]
+requires = ["cython==0.29.21", "numpy==1.20.3", "setuptools>=40.8.0", "wheel"]

 [tool.isort]
 profile = "black"

--- a/setup.py
+++ b/setup.py
@@ -84,7 +84,6 @@ setup(
        "dill==0.3.0",
        "extra_data==1.4.1",
        "extra_geom==1.1.1",
-        "fabio==0.9.0",
        "gitpython==3.1.0",
        "h5py==2.10.0",
        "iminuit==1.3.8",
@@ -104,7 +103,7 @@ setup(
        "nbformat==5.0.7",
        "nbparameterise==0.5",
        "notebook==6.1.5",
-        "numpy==1.19.1",
+        "numpy==1.20.3",
        "pasha==0.1.0",
        "prettytable==0.7.2",
        "princess==0.2",

--- a/src/cal_tools/agipdlib.py
+++ b/src/cal_tools/agipdlib.py
@@ -7,6 +7,9 @@ from typing import Any, Dict, Optional, Tuple
 import h5py
 import numpy as np
 import sharedmem
+from iCalibrationDB import Conditions, Constants
+
+from cal_tools import agipdalgs as calgs
 from cal_tools.agipdutils import (
    assemble_constant_dict,
    baseline_correct_via_noise,
@@ -19,9 +22,6 @@ from cal_tools.agipdutils import (
 )
 from cal_tools.enums import AgipdGainMode, BadPixels, SnowResolution
 from cal_tools.tools import get_constant_from_db_and_time
-from iCalibrationDB import Conditions, Constants
-
-from cal_tools import agipdalgs as calgs


 def get_num_cells(fname, loc, module):

--- a/src/cal_tools/agipdutils.py
+++ b/src/cal_tools/agipdutils.py
@@ -2,11 +2,12 @@ import copy
 from typing import Tuple

 import numpy as np
-from cal_tools.enums import AgipdGainMode, BadPixels, SnowResolution
 from scipy.signal import cwt, find_peaks_cwt, ricker
 from sklearn.mixture import GaussianMixture
 from sklearn.preprocessing import StandardScaler

+from cal_tools.enums import AgipdGainMode, BadPixels, SnowResolution
+

 def assemble_constant_dict(
    corr_bools,

--- a/src/cal_tools/agipdutils_ff.py
+++ b/src/cal_tools/agipdutils_ff.py
 from typing import Any, Dict, List, Optional, Tuple

 import numpy as np
-from cal_tools.enums import BadPixelsFF
 from iminuit import Minuit

+from cal_tools.enums import BadPixelsFF
+

 def any_in(mask: np.ndarray, bits: int) -> bool:
    return mask.astype(np.uint) & bits > 0
@@ -106,7 +107,7 @@ def fit_n_peaks(x: np.ndarray,
    :param n_peaks: Number of Gaussian peaks to fit (min 2, max 4)
    :param fix_d01: Fix position of peaks to the distance between noise and
    first photon peak.
-    :param sigma_limit: Repeat fit keeping only bins within mu +- 
+    :param sigma_limit: Repeat fit keeping only bins within mu +-
    sigma_limit sigma
    :return: minuit object
    """
@@ -163,7 +164,7 @@ def fit_n_peaks(x: np.ndarray,

    minuit = Minuit(chi2_f, **pars, pedantic=False)
    minuit.migrad()
-    
+
    if sigma_limit > 0 :
        res = minuit.fitarg
        sel2 = (np.abs(x - res['g0mean']) < sigma_limit*res['g0sigma']) | \
@@ -172,7 +173,7 @@ def fit_n_peaks(x: np.ndarray,
               (np.abs(x - res['g3mean']) < sigma_limit*res['g3sigma'])
        sel = sel & sel2
        minuit.migrad()
-        
+
    if do_minos:
        if minuit.get_fmin().is_valid:
            minuit.minos()

--- a/src/cal_tools/lpdlib.py
+++ b/src/cal_tools/lpdlib.py
@@ -3,9 +3,10 @@ from typing import Optional, Tuple

 import h5py
 import numpy as np
+from iCalibrationDB import Conditions, Constants, Detectors
+
 from cal_tools.enums import BadPixels
 from cal_tools.tools import get_constant_from_db, get_constant_from_db_and_time
-from iCalibrationDB import Conditions, Constants, Detectors


 class LpdCorrections:

--- a/src/cal_tools/metrology.py
+++ b/src/cal_tools/metrology.py
@@ -358,6 +358,10 @@ def positionFileList(filelist, datapath, geometry_file, quad_pos, nImages='all',

        try:
            with h5py.File(file, 'r') as f:
+                # Band-aid fix to work around empty module files.
+                if datapath.format(ch) not in f:
+                    continue
+
                if trainIds is None:
                    d = np.squeeze(f[datapath.format(ch)][()] if nImages == 'all' else f[datapath.format(ch)][:nImages,:,:])
                else:

--- a/src/cal_tools/plotting.py
+++ b/src/cal_tools/plotting.py
@@ -14,8 +14,12 @@ from matplotlib import colors
 from matplotlib.patches import Patch
 from mpl_toolkits.axes_grid1 import AxesGrid

+plt.rcParams["mpl_toolkits.legacy_colorbar"] = False

-def show_overview(d, cell_to_preview, gain_to_preview, out_folder=None, infix=None):
+
+def show_overview(
+        d, cell_to_preview, gain_to_preview, out_folder=None, infix=None
+):
    """
    Show an overview
    :param d: A dict with the number of modules and
@@ -40,8 +44,9 @@ def show_overview(d, cell_to_preview, gain_to_preview, out_folder=None, infix=No
                        cbar_size="7%",
                        cbar_pad="2%",
                        )
-        i = 0
-        for key, item in data.items():
+
+        items = list(data.items())
+        for ax, cbar_ax, (key, item) in zip(grid, grid.cbar_axes, items):
            cf = 0
            if "ThresholdsDark" in key:
                cf = -1
@@ -60,11 +65,14 @@ def show_overview(d, cell_to_preview, gain_to_preview, out_folder=None, infix=No
                   item[..., cell_to_preview, gain_to_preview + cf].size > 0.01):  # noqa
                bound *= 2

-            if "BadPixelsDark" in key:
-                im = grid[i].imshow(np.log2(item[..., cell_to_preview,
-                                                 gain_to_preview + cf]),
-                                    interpolation="nearest", vmin=0, vmax=8,
-                                    aspect='auto')
+            is_badpixels = "BadPixels" in key
+
+            if is_badpixels:
+                im = ax.imshow(
+                    item[..., cell_to_preview, gain_to_preview + cf] != 0,
+                    cmap=plt.cm.colors.ListedColormap(["w", "k"]),
+                    aspect="auto",
+                )
            else:

                if len(item.shape) == 4:
@@ -78,27 +86,33 @@ def show_overview(d, cell_to_preview, gain_to_preview, out_folder=None, infix=No
                        im_prev = np.moveaxis(item[..., cell_to_preview], 0, 1)
                    vmax = med + np.abs(bound * medscale)

-                im = grid[i].imshow(im_prev, interpolation="nearest",
-                                    vmin=med - np.abs(bound * medscale),
-                                    vmax=vmax, aspect='auto')
+                im = ax.imshow(im_prev, interpolation="nearest",
+                               vmin=med - np.abs(bound * medscale),
+                               vmax=vmax, aspect='auto')
+
+            cb = cbar_ax.colorbar(im)
+            if is_badpixels:
+                cb.set_ticks([0.25, 0.75])
+                cb.set_ticklabels(["good", "bad"])
+            else:
+                cb.set_label("ADU")

-            cb = grid.cbar_axes[i].colorbar(im)
-            cb.set_label_text("ADU" if key != "BadPixels" else "Bad Pixel Code")
+            ax.text(
+                5, 20, key, color="k" if is_badpixels else "w", fontsize=20
+            )

-            grid[i].text(5, 20, key, color="w" if key != "BadPixels" else "k", fontsize=20)  # noqa
+        grid[0].text(5, 50, module, color="k" if "BadPixels" in items[0][0] else "r", fontsize=20)  # noqa

-            i += 1
-        grid[0].text(5, 50, module, color="r" if key != "BadPixels" else "k", fontsize=20)  # noqa
        if out_folder and infix:
            fig.savefig(f"{out_folder}/"
                        f"dark_analysis_{infix}_module_{module}.png")


 def rebin(a, *args):
-    '''rebin ndarray data into a smaller ndarray of the same rank whose dimensions
-    are factors of the original dimensions. eg. An array with 6 columns and 4 rows
-    can be reduced to have 6,3,2 or 1 columns and 4,2 or 1 rows.
-    example usages:
+    '''rebin ndarray data into a smaller ndarray of the same rank whose
+    dimensions are factors of the original dimensions. eg. An array with 6
+    columns and 4 rows can be reduced to have 6,3,2 or 1 columns and 4,2 or 1
+    rows. example usages:
    https://scipy-cookbook.readthedocs.io/items/Rebinning.html
    >>> a=rand(6,4); b=rebin(a,3,2)
    >>> a=rand(6); b=rebin(a,2)
@@ -116,7 +130,12 @@ def rebin(a, *args):

 def plot_badpix_3d(data, definitions, title=None, rebin_fac=2, azim=22.5):
    od = data
-    d, dims = rebin(od.astype(np.uint32), od.shape[0] // rebin_fac, od.shape[1] // rebin_fac, od.shape[2])
+    d, dims = rebin(
+        od.astype(np.uint32),
+        od.shape[0] // rebin_fac,
+        od.shape[1] // rebin_fac,
+        od.shape[2],
+    )
    xx, yy, zz = dims
    voxels = d.astype(np.bool)
    colors = np.full(voxels.shape, '#FFFFFF')
@@ -137,7 +156,7 @@ def plot_badpix_3d(data, definitions, title=None, rebin_fac=2, azim=22.5):
    ax.set_zlim(0, np.max(zz))

    for k, c in cols.items():
-        ax.plot([-1,], [-1,], color=c[1], label=c[0])
+        ax.plot([-1, ], [-1, ], color=c[1], label=c[0])
    ax.legend()
    if title:
        ax.set_title(title)

--- a/src/cal_tools/tools.py
+++ b/src/cal_tools/tools.py
@@ -532,6 +532,8 @@ def get_from_db(karabo_id: str, karabo_da: str,
                ntries -= 1
                timeout *= 2
                sleep(np.random.randint(30))
+                # TODO: reevaluate the need for doraise
+                # and remove if not needed.
                if ntries == 0 and doraise:
                    raise
            except Exception as e:
@@ -547,14 +549,24 @@ def get_from_db(karabo_id: str, karabo_da: str,
        if ntries > 0:
            mdata_const = metadata.calibration_constant_version
            if load_data and meta_only:
-                fpath = Path(mdata_const.hdf5path, mdata_const.filename)
-                with h5py.File(fpath, "r") as f:
-                    arr = f[f"{mdata_const.h5path}/data"][()]
+                hdf5path = getattr(mdata_const, 'hdf5path', None)
+                filename = getattr(mdata_const, 'filename', None)
+                h5path = getattr(mdata_const, 'h5path', None)
+                if not (hdf5path and filename and h5path):
+                    raise ValueError(
+                        "Wrong metadata received to access the constant data."
+                        f" Retrieved constant filepath is {hdf5path}/{filename}"  # noqa
+                        f" and data_set_name is {h5path}."
+                    )
+                with h5py.File(Path(hdf5path, filename), "r") as f:
+                    arr = f[f"{h5path}/data"][()]
                metadata.calibration_constant.data = arr

            if verbosity > 0:
                if constant.name not in already_printed or verbosity > 1:
                    already_printed[constant.name] = True
+                    # TODO: Reset mdata_const.begin_at
+                    # if comm_db_success is False.
                    begin_at = mdata_const.begin_at
                    print(f"Retrieved {constant.name} "
                          f"with creation time: {begin_at}")
@@ -601,6 +613,12 @@ def send_to_db(db_module: str, karabo_id: str, constant, condition,
        if report_path:
            # calibration_client expects a dict of injected report path
            # of at least 2 characters for each key.
+            if not isinstance(report_path, str) or len(report_path) < 2:
+                raise TypeError(
+                    "\"report_path\" needs to be a string "
+                    "of at least 2 characters."
+                )
+
            report = {"name": path.basename(report_path),
                      "file_path": report_path}
            metadata.calibration_constant_version.report_path = report
@@ -609,13 +627,17 @@ def send_to_db(db_module: str, karabo_id: str, constant, condition,
        metadata.calibration_constant_version.device_name = db_module
        metadata.calibration_constant_version.karabo_da = None
        metadata.calibration_constant_version.raw_data_location = file_loc
-
+        if constant.data is None:
+            raise ValueError(
+                "There is no data available to "
+                "inject to the database."
+            )
        while ntries > 0:

            this_interface = get_random_db_interface(cal_db_interface)
            try:
                metadata.send(this_interface, timeout=timeout)
-                success = True
+                success = True  # TODO: use comm_db_success
                break
            except zmq.error.Again:
                ntries -= 1

--- a/src/xfel_calibrate/calibrate.py
+++ b/src/xfel_calibrate/calibrate.py
@@ -3,6 +3,7 @@
 import argparse
 import ast
 import inspect
+import locale
 import math
 import os
 import pprint
@@ -859,6 +860,9 @@ def make_pipeline_yaml(parms, version, report_path, output_dir):

 def run():
    """ Run a calibration task with parser arguments """
+    # Ensure files are opened as UTF-8 by default, regardless of environment.
+    locale.setlocale(locale.LC_CTYPE, ('en_US', 'UTF-8'))
+
    parser = make_extended_parser()
    args = deconsolize_args(vars(parser.parse_args()))
    detector = args["detector"].upper()

--- a/src/xfel_calibrate/notebooks.py
+++ b/src/xfel_calibrate/notebooks.py
@@ -102,7 +102,7 @@ notebooks = {
            "notebook": "notebooks/LPD/Inject_calibration_constants_from_h5files.ipynb",
            "concurrency": {"parameter": None,
                            "default concurrency": None,
-                            "cluster cores": 1},            
+                            "cluster cores": 1},
        }
    },
    "PNCCD": {
@@ -243,7 +243,7 @@ notebooks = {
        "CORRECT": {
            "notebook": None,
            "user": {
-                "notebook": "/gpfs/exfel/exp/{instrument}/{cycle}/p{proposal}/usr/calibration/notebooks/correct.ipynb",
+                "notebook": "/gpfs/exfel/exp/SQS/202121/p002926/usr/calibration/notebooks/correct.ipynb",
                "venv": "/gpfs/exfel/sw/software/exfel_environments/sqs-remi-preview"
            },
            "concurrency": {

--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -7,7 +7,7 @@ def pytest_addoption(parser):
    parser.addoption(
        "--no-gpfs",
        action="store_true",
-        default="false",
+        default=False,
        help="Skips tests marked as requiring GPFS access",
    )


--- a/tests/test_agipdutils_ff.py
+++ b/tests/test_agipdutils_ff.py
 import numpy as np
+
 from cal_tools.agipdutils_ff import get_mask, set_par_limits



--- a/tests/test_cal_tools.py
+++ b/tests/test_cal_tools.py
 from datetime import datetime
 from pathlib import Path
+from unittest.mock import patch

+import numpy as np
 import pytest
+import zmq
+from iCalibrationDB import Conditions, ConstantMetaData, Constants
+
 from cal_tools.agipdlib import AgipdCorrections
 from cal_tools.plotting import show_processed_modules
 from cal_tools.tools import (
    get_dir_creation_date,
+    get_from_db,
    get_pdu_from_db,
    module_index_to_qm,
+    send_to_db,
 )
-from iCalibrationDB import Conditions, Constants
+
+# AGIPD operating conditions.
+ACQ_RATE = 1.1
+BIAS_VOLTAGE = 300
+GAIN_SETTING = 0
+MEM_CELLS = 352
+PHOTON_ENERGY = 9.2
+
+AGIPD_KARABO_ID = "SPB_DET_AGIPD1M-1"
+WRONG_AGIPD_MODULE = "AGIPD_**"
+
+CAL_DB_INTERFACE = "tcp://max-exfl017:8020"
+WRONG_CAL_DB_INTERFACE = "tcp://max-exfl017:0000"
+
+
+@pytest.fixture
+def _agipd_const_cond():
+    # AGIPD dark offset metadata
+    constant = Constants.AGIPD.Offset()
+
+    condition = Conditions.Dark.AGIPD(
+        memory_cells=MEM_CELLS,
+        bias_voltage=BIAS_VOLTAGE,
+        acquisition_rate=ACQ_RATE,
+        gain_setting=GAIN_SETTING,
+    )
+    return constant, condition


 def test_show_processed_modules():
@@ -39,22 +72,224 @@ def test_dir_creation_date():
    assert str(date) == '2019-12-16 08:52:25.196603'


-# AGIPD dark offset metadata
-constant = Constants.AGIPD.Offset()
-mem_cells = 352
-bias_voltage = 300
-acq_rate = 1.1
-gain_setting = 0
-photon_energy = 9.2
-condition = Conditions.Dark.AGIPD(memory_cells=mem_cells,
-                                  bias_voltage=bias_voltage,
-                                  acquisition_rate=acq_rate,
-                                  gain_setting=gain_setting)
-cal_db_interface = "tcp://max-exfl017:8020"
+def _call_get_from_db(
+    constant,
+    condition,
+    karabo_id,
+    karabo_da,
+    load_data=True,
+    cal_db_interface=CAL_DB_INTERFACE,
+    creation_time=None,
+    doraise=True,
+    timeout=10000,
+):
+
+    data, metadata = get_from_db(
+        karabo_id=karabo_id,
+        karabo_da=karabo_da,
+        constant=constant,
+        condition=condition,
+        empty_constant=None,
+        cal_db_interface=cal_db_interface,
+        creation_time=creation_time,
+        meta_only=True,
+        load_data=load_data,
+        ntries=1,
+        doraise=doraise,
+        timeout=timeout,
+    )
+    return data, metadata
+
+
+def _call_send_to_db(
+    constant,
+    condition,
+    db_module,
+    data=np.zeros((2, 2, 2)),
+    cal_db_interface=CAL_DB_INTERFACE,
+    report_path="",
+    doraise=True,
+    timeout=1000,
+    ntries=1,
+):
+
+    # TODO: create a known_constant for testing.
+    constant.data = data
+    metadata = send_to_db(
+        karabo_id=AGIPD_KARABO_ID,
+        db_module=db_module,
+        constant=constant,
+        condition=condition,
+        file_loc="proposal, runs",
+        report_path=report_path,
+        cal_db_interface=cal_db_interface,
+        creation_time=None,
+        ntries=ntries,
+        doraise=doraise,
+        timeout=timeout,
+    )
+    return metadata
+
+
+# TODO add a marker for accessing zmq end_point
+@pytest.mark.requires_gpfs
+def test_get_from_db_load_data(_agipd_const_cond):
+    """ Test retrieving calibration constants with get_from_db
+    with different loading data scenarios.
+    """
+    constant, condition = _agipd_const_cond
+
+    # Normal operation and loading data from h5file.
+    data, md = _call_get_from_db(
+        constant=constant, condition=condition,
+        karabo_id=AGIPD_KARABO_ID, karabo_da="AGIPD00",
+    )
+
+    assert type(data) is np.ndarray
+    assert isinstance(md, ConstantMetaData)
+
+    # None karabo_id is given.
+    data, md = _call_get_from_db(
+        constant=constant, condition=condition,
+        karabo_id=None, karabo_da="AGIPD00",
+    )
+
+    assert data is None
+    assert md is None
+
+    # Retrieve constant without loading the data.
+    data, md = _call_get_from_db(
+        constant=constant, condition=condition,
+        karabo_id=AGIPD_KARABO_ID, karabo_da="AGIPD00",
+        load_data=False,
+    )
+
+    assert data is None
+    assert isinstance(md, ConstantMetaData)


-def test_get_pdu_from_db():
+# TODO add a marker for accessing zmq end_point
+@pytest.mark.requires_gpfs
+def test_raise_get_from_db(_agipd_const_cond):
+    """ Test error raised scenarios for get_from_db:"""
+
+    constant, condition = _agipd_const_cond
+
+    # Wrong address for the calibration database.
+    with pytest.raises(zmq.error.Again) as excinfo:
+        _call_get_from_db(
+            constant=constant, condition=condition,
+            karabo_id=AGIPD_KARABO_ID, karabo_da="AGIPD00",
+            cal_db_interface=WRONG_CAL_DB_INTERFACE,
+        )
+    assert str(excinfo.value) == "Resource temporarily unavailable"
+
+    # Wrong type for creation_time.
+    with pytest.raises(ValueError):
+        _call_get_from_db(
+            constant=constant, condition=condition,
+            karabo_id=AGIPD_KARABO_ID, karabo_da="AGIPD00",
+            creation_time="WRONG_CREATION_TIME",
+        )
+
+    # No constant file path metadata retrieved.
+    with patch("iCalibrationDB.ConstantMetaData.retrieve", return_value=""):
+        with pytest.raises(ValueError):
+            _call_get_from_db(
+                constant=constant, condition=condition,
+                karabo_id=AGIPD_KARABO_ID, karabo_da="AGIPD00",
+            )
+
+
+def test_no_doraise_get_from_db(_agipd_const_cond):
+    """get_from_db using wrong cal_db_interface
+    fails without raising errors, as doraise = False
+    """
+    constant, condition = _agipd_const_cond
+
+    data, _ = _call_get_from_db(
+        constant=constant, condition=condition,
+        karabo_id=AGIPD_KARABO_ID, karabo_da="AGIPD00",
+        cal_db_interface=WRONG_CAL_DB_INTERFACE,
+        doraise=False,
+    )
+    assert data is None
+

+@patch(
+    'iCalibrationDB.ConstantMetaData.send',
+    return_value='',
+)
+def test_send_to_db_success(send, _agipd_const_cond):
+    """test sending constants to the database (send_to_db):
+    Injecting constant as expected.
+    # TODO: Add a test calibration constant to the test physical module
+    # to inject without mocking `send` method.
+    """
+    # Use wrong AGIPD module as a backup.
+    # To avoid constants injection, in-case of mock failure.
+
+    constant, condition = _agipd_const_cond
+
+    db_module = WRONG_AGIPD_MODULE
+    metadata = _call_send_to_db(
+        constant=constant,
+        condition=condition,
+        db_module=db_module,
+    )
+
+    assert isinstance(metadata, ConstantMetaData)
+
+
+@patch(
+    'iCalibrationDB.ConstantMetaData.send',
+    return_value='',
+)
+def test_raise_send_to_db_mocked(send, _agipd_const_cond):
+    """Test raised errors while sending constants to the
+    database (send_to_db):
+    """
+    # Use wrong AGIPD module as a backup.
+    # To avoid constants injection, in-case of mock failure.
+    constant, condition = _agipd_const_cond
+
+    # report_path has the wrong type.
+    with pytest.raises(TypeError):
+        _call_send_to_db(
+            constant=constant,
+            condition=condition,
+            db_module=WRONG_AGIPD_MODULE,
+            report_path=2,
+        )
+
+    # No constant data to inject.
+    with pytest.raises(ValueError):
+        _call_send_to_db(
+            constant=constant,
+            condition=condition,
+            db_module=WRONG_AGIPD_MODULE,
+            data=None,
+        )
+
+
+def test_raise_send_to_db(_agipd_const_cond):
+
+    constant, condition = _agipd_const_cond
+
+    # wrong calibration database address.
+    with pytest.raises(zmq.error.Again) as excinfo:
+        _call_send_to_db(
+            constant=constant,
+            condition=condition,
+            db_module=WRONG_AGIPD_MODULE,
+            cal_db_interface=WRONG_CAL_DB_INTERFACE,
+        )
+    assert str(excinfo.value) == "Resource temporarily unavailable"
+
+
+def test_get_pdu_from_db(_agipd_const_cond):
+
+    constant, condition = _agipd_const_cond
    snapshot_at = "2021-05-06 00:20:10.00"

    # A karabo_da str returns a list of one element.
@@ -62,7 +297,7 @@ def test_get_pdu_from_db():
                               karabo_da="TEST_DET_CAL_DA0",
                               constant=constant,
                               condition=condition,
-                               cal_db_interface=cal_db_interface,
+                               cal_db_interface=CAL_DB_INTERFACE,
                               snapshot_at=snapshot_at,
                               timeout=30000)
    assert len(pdu_dict) == 1
@@ -70,11 +305,13 @@ def test_get_pdu_from_db():

    # A list of karabo_das to return thier PDUs, if available.
    pdu_dict = get_pdu_from_db(karabo_id="TEST_DET_CAL_CI-1",
-                               karabo_da=["TEST_DET_CAL_DA0", "TEST_DET_CAL_DA1",
-                                          "UNAVAILABLE_DA"],
+                               karabo_da=[
+                                   "TEST_DET_CAL_DA0",
+                                   "TEST_DET_CAL_DA1",
+                                   "UNAVAILABLE_DA"],
                               constant=constant,
                               condition=condition,
-                               cal_db_interface=cal_db_interface,
+                               cal_db_interface=CAL_DB_INTERFACE,
                               snapshot_at=snapshot_at,
                               timeout=30000)

@@ -87,7 +324,7 @@ def test_get_pdu_from_db():
                               karabo_da="all",
                               constant=constant,
                               condition=condition,
-                               cal_db_interface=cal_db_interface,
+                               cal_db_interface=CAL_DB_INTERFACE,
                               snapshot_at=snapshot_at,
                               timeout=30000)

@@ -96,27 +333,30 @@ def test_get_pdu_from_db():
                        "CAL_PHYSICAL_DETECTOR_UNIT-2_TEST"]


+# TODO add a marker for accessing zmq end_point
 @pytest.mark.requires_gpfs
 def test_initialize_from_db():
-    creation_time = datetime.strptime("2020-01-07 13:26:48.00",
-                                      "%Y-%m-%d %H:%M:%S.%f")
+    creation_time = datetime.strptime(
+        "2020-01-07 13:26:48.00", "%Y-%m-%d %H:%M:%S.%f")

-    agipd_corr = AgipdCorrections(max_cells=mem_cells,
-                                  max_pulses=[0, 500, 1])
+    agipd_corr = AgipdCorrections(
+        max_cells=MEM_CELLS,
+        max_pulses=[0, 500, 1])

-    agipd_corr.allocate_constants(modules=[0],
-                                  constant_shape=(3, mem_cells, 512, 128))
+    agipd_corr.allocate_constants(
+        modules=[0],
+        constant_shape=(3, MEM_CELLS, 512, 128))

    dark_const_time_dict = agipd_corr.initialize_from_db(
-        karabo_id="TEST_DET_CI-2",
-        karabo_da="TEST_DAQ_DA_01",
-        cal_db_interface=cal_db_interface,
+        karabo_id="TEST_DET_CAL_CI-1",
+        karabo_da="TEST_DET_CAL_DA1",
+        cal_db_interface=CAL_DB_INTERFACE,
        creation_time=creation_time,
-        memory_cells=mem_cells,
-        bias_voltage=bias_voltage,
-        photon_energy=photon_energy,
-        gain_setting=gain_setting,
-        acquisition_rate=acq_rate,
+        memory_cells=MEM_CELLS,
+        bias_voltage=BIAS_VOLTAGE,
+        photon_energy=PHOTON_ENERGY,
+        gain_setting=GAIN_SETTING,
+        acquisition_rate=ACQ_RATE,
        module_idx=0,
        only_dark=False,
    )
@@ -129,14 +369,16 @@ def test_initialize_from_db():
    }

    dark_const_time_dict = agipd_corr.initialize_from_db(
-        karabo_id="SPB_DET_AGIPD1M-1",
+        karabo_id=AGIPD_KARABO_ID,
        karabo_da="AGIPD00",
-        cal_db_interface=cal_db_interface,
+        cal_db_interface=CAL_DB_INTERFACE,
        creation_time=creation_time,
-        memory_cells=mem_cells, bias_voltage=bias_voltage,
-        photon_energy=photon_energy, gain_setting=gain_setting,
-        acquisition_rate=acq_rate, module_idx=0,
-        only_dark=False)
+        memory_cells=MEM_CELLS, bias_voltage=BIAS_VOLTAGE,
+        photon_energy=PHOTON_ENERGY, gain_setting=GAIN_SETTING,
+        acquisition_rate=ACQ_RATE, module_idx=0,
+        only_dark=False,
+    )
+
    # A retrieved constant has a value of datetime creation_time
    assert isinstance(dark_const_time_dict["Offset"], datetime)
    assert list(dark_const_time_dict.keys()) == [
No results found