Compare revisions

Michele Cascella · Michele Cascella · Michele Cascella · Michele Cascella · Karim Ahmed · Karim Ahmed
--- a/.gitignore
+++ b/.gitignore
@@ -10,11 +10,12 @@
 *.pkl
 *.png
 *.png
+*.secrets.yaml
+*.so
 *.tar
 *.tif
 *.tiff
 *.tmp
-*.so
 */slurm_tmp*
 *egg*
 ./temp
@@ -34,4 +35,3 @@ slurm_tmp*
 src/cal_tools/agipdalgs.c
 webservice/*.log
 webservice/*sqlite
-webservice/webservice.yaml
--- a/README.rst
+++ b/README.rst
@@ -40,13 +40,13 @@ python. This can be activated with ``source /gpfs/exfel/sw/calsoft/.pyenv/bin/ac

 A quick setup would be:

-0. ``source /gpfs/exfel/sw/calsoft/.pyenv/bin/activate``
-1. ``git clone ssh://git@git.xfel.eu:10022/detectors/pycalibration.git && cd pycalibration`` - clone the offline calibration package from EuXFEL GitLab
-2. ``pyenv shell 3.8.11`` - load required version of python
-3. ``python3 -m venv .venv`` - create the virtual environment
-4. ``source .venv/bin/activate`` - activate the virtual environment
-5. ``python3 -m pip install --upgrade pip`` - upgrade version of pip
-6. ``python3 -m pip install .`` - install the pycalibration package (add ``-e`` flag for editable development installation)
+1. ``source /gpfs/exfel/sw/calsoft/.pyenv/bin/activate``
+2. ``git clone ssh://git@git.xfel.eu:10022/detectors/pycalibration.git && cd pycalibration`` - clone the offline calibration package from EuXFEL GitLab
+3. ``pyenv shell 3.8.11`` - load required version of python
+4. ``python3 -m venv .venv`` - create the virtual environment
+5. ``source .venv/bin/activate`` - activate the virtual environment
+6. ``python3 -m pip install --upgrade pip`` - upgrade version of pip
+7. ``python3 -m pip install .`` - install the pycalibration package (add ``-e`` flag for editable development installation)

 Copy/paste script:

@@ -71,11 +71,11 @@ will downgrade/upgrade your local packages, which may cause major issues and may
 **break your local environment**, it is highly recommended to use the venv
 installation method instead.

-0. ``source /gpfs/exfel/sw/calsoft/.pyenv/bin/activate``
-1. ``git clone ssh://git@git.xfel.eu:10022/detectors/pycalibration.git && cd pycalibration`` - clone the offline calibration package from EuXFEL GitLab
-2. ``pyenv shell 3.8.11`` - load required version of python
-3. ``pip install .`` - install the pycalibration package (add ``-e`` flag for editable development installation)
-4. ``export PATH=$HOME/.local/bin:$PATH`` - make sure that the home directory is in the PATH environment variable
+1. ``source /gpfs/exfel/sw/calsoft/.pyenv/bin/activate``
+2. ``git clone ssh://git@git.xfel.eu:10022/detectors/pycalibration.git && cd pycalibration`` - clone the offline calibration package from EuXFEL GitLab
+3. ``pyenv shell 3.8.11`` - load required version of python
+4. ``pip install .`` - install the pycalibration package (add ``-e`` flag for editable development installation)
+5. ``export PATH=$HOME/.local/bin:$PATH`` - make sure that the home directory is in the PATH environment variable

 Copy/paste script:

@@ -103,10 +103,142 @@ venv) activate the virtual environment first, and then run:
 This can be useful for Jupyter notebook tools as https://max-jhub.desy.de/hub/login


+Offline Calibration Configuration
+*********************************
+
+The offline calibration package is configured with three configuration files:
+
+- `webservice/config/webservice.yaml` - configuration for the web service
+- `webservice/config/serve_overview.yaml` - configuration for the overview page
+- `src/cal_tools/mdc_config.yaml` - configuration for MDC access by cal tools
+
+These configuration files should not be modified directly, instead you should
+create a file `$CONFIG.secrets.yaml` (e.g. `webservice.secrets.yaml`) in the
+configuration directory, and then add any modifications, such as secrets, to
+this file.
+
+Alternatively, configurations are also searched for in
+`~/.config/pycalibration/$MODULE/$CONFIG.yaml` (e.g.
+`~/.config/pycalibration/webservice/serve_overview.yaml`), which is a useful
+place to store configurations like secrets so that they are present even if you
+delete the pycalibration directory, or if you have multiple `pycalibration`
+repos checked out, as you no longer need to copy/paste the configurations each
+time.
+
+Finally, you can use environment variables to override the configuration without
+modifying any files, which is useful for one-off changes or if you are running
+tests in a CI environment. The environment variables should be prefixed with:
+
+- `webservice/config/webservice.yaml` - `CAL_WEBSERVICE`
+- `webservice/config/serve_overview.yaml` - `CAL_SERVE_OVERVIEW`
+- `src/cal_tools/mdc_config.yaml` - `CAL_CAL_TOOLS`
+
+Followed by an underscore and the configuration key you wish to change. Nested
+keys can be accessed with two underscores, e.g.
+`CAL_WEBSERVICE_CONFIG_REPO__URL` would modify the `config-repo: url: ` value.
+
+Note that the order of priority is:
+
+- default configuration - e.g. `webservice/config/webservice.yaml`
+- local configuration - e.g. `webservice/config/webservice.secrets.yaml`
+- user configuration - e.g. `~/.config/pycalibration/webservice/webservice.yaml`
+- environment variables - e.g. `export CAL_WEBSERVICE_*=...`
+
+Examples
+========
+
+For example, `webservice/config/webservice.yaml` has:
+
+```yaml
+config-repo:
+    url:  "@note add this to secrets file"
+    local-path: "@format {env[HOME]}/calibration_config"
+...
+metadata-client:
+    user-id: "@note add this to secrets file"
+    user-secret: "@note add this to secrets file"
+    user-email: "@note add this to secrets file"
+```
+
+So you would create a file `webservice/config/webservice.secrets.yaml`:
+
+```yaml
+config-repo:
+    url: "https://USERNAME:TOKEN@git.xfel.eu/gitlab/detectors/calibration_configurations.git"
+
+metadata-client:
+    user-id: "id..."
+    user-secret: "secret..."
+    user-email: "calibration@example.com"
+```
+
+Alternatively, this file could be placed at `~/.config/pycalibration/webservice/webservice.yaml`
+
+Checking Configurations
+=======================
+
+Having multiple nested configurations can get a bit confusing, so `dynaconf`
+includes a command to help view what a configuration will be resolved to. Once
+you have activated the python environment pycalibration is installed in, you
+can run the command `dynaconf -i webservice.config.webservice list` to list the
+current configuration values:
+
+```
+> dynaconf -i webservice.config.webservice list
+Working in main environment
+WEBSERVICE_DIR<PosixPath> PosixPath('/home/roscar/work/git.xfel.eu/detectors/pycalibration/webservice')
+CONFIG-REPO<dict> {'local-path': '/home/roscar/calibration_config',
+ 'url': 'https://haufs:AAABBBCCCDDDEEEFFF@git.xfel.eu/gitlab/detectors/calibration_configurations.git'}
+WEB-SERVICE<dict> {'allowed-ips': '131.169.4.197, 131.169.212.226',
+ 'bind-to': 'tcp://*',
+ 'job-db': '/home/roscar/work/git.xfel.eu/detectors/pycalibration/webservice/webservice_jobs.sqlite',
+ 'job-timeout': 3600,
+ 'job-update-interval': 60,
+ 'port': 5556}
+METADATA-CLIENT<dict> {'auth-url': 'https://in.xfel.eu/test_metadata/oauth/authorize',
+ 'base-api-url': 'https://in.xfel.eu/metadata/api/',
+ 'metadata-web-app-url': 'https://in.xfel.eu/test_metadata',
+ 'refresh-url': 'https://in.xfel.eu/test_metadata/oauth/token',
+ 'scope': '',
+ 'token-url': 'https://in.xfel.eu/test_metadata/oauth/token',
+ 'user-email': 'calibration@example.com',
+ 'user-id': 'AAABBBCCCDDDEEEFFF',
+ 'user-secret': 'AAABBBCCCDDDEEEFFF'}
+KAFKA<dict> {'brokers': ['it-kafka-broker01.desy.de',
+             'it-kafka-broker02.desy.de',
+             'it-kafka-broker03.desy.de'],
+ 'topic': 'xfel-test-offline-cal'}
+CORRECT<dict> {'cmd': 'python -m xfel_calibrate.calibrate {detector} CORRECT '
+        '--slurm-scheduling {sched_prio} --slurm-mem 750 --request-time '
+        '{request_time} --slurm-name '
+        '{action}_{instrument}_{detector}_{cycle}_p{proposal}_{runs} '
+        '--report-to '
+        '/gpfs/exfel/exp/{instrument}/{cycle}/p{proposal}/usr/Reports/{runs}/{det_instance}_{action}_{proposal}_{runs}_{time_stamp} '
+        '--cal-db-timeout 300000 --cal-db-interface '
+        'tcp://max-exfl016:8015#8044',
+ 'in-folder': '/gpfs/exfel/exp/{instrument}/{cycle}/p{proposal}/raw',
+ 'out-folder': '/gpfs/exfel/d/proc/{instrument}/{cycle}/p{proposal}/{run}',
+ 'sched-prio': 80}
+DARK<dict> {'cmd': 'python -m xfel_calibrate.calibrate {detector} DARK --concurrency-par '
+        'karabo_da --slurm-scheduling {sched_prio} --request-time '
+        '{request_time} --slurm-name '
+        '{action}_{instrument}_{detector}_{cycle}_p{proposal}_{runs} '
+        '--report-to '
+        '/gpfs/exfel/d/cal/caldb_store/xfel/reports/{instrument}/{det_instance}/{action}/{action}_{proposal}_{runs}_{time_stamp} '
+        '--cal-db-interface tcp://max-exfl016:8015#8044 --db-output',
+ 'in-folder': '/gpfs/exfel/exp/{instrument}/{cycle}/p{proposal}/raw',
+ 'out-folder': '/gpfs/exfel/u/usr/{instrument}/{cycle}/p{proposal}/dark/runs_{runs}',
+ 'sched-prio': 10}
+```
+
+And here you can see that `metadata-client: user-id: ` contains the ID now
+instead of the note "add this to secrets file", so the substitution has worked
+correctly.
+
+
 Contributing
 ************

-
 Guidelines
 ==========


--- a/bin/slurm_calibrate.sh
+++ b/bin/slurm_calibrate.sh
 #!/bin/bash

+set -euo pipefail
+
 # set paths to use
 nb_path=$1
 python_path=$2
@@ -7,19 +9,15 @@ ipcluster_profile=$3
 notebook=$4
 detector=$5
 caltype=$6
-finalize=$7
-cluster_cores=$8
-cal_python_path=$9
+cluster_cores=$7

 echo "Running with the following parameters:"
 echo "Notebook path: $nb_path"
 echo "Python path: $python_path"
-echo "Calibration Python: $cal_python_path"
 echo "IP-Cluster profile: $ipcluster_profile"
 echo "notebook: $notebook"
 echo "detector: $detector"
 echo "caltype: $caltype"
-echo "finalize: $finalize"
 echo "cluster_cores: $cluster_cores"
 echo "job ID: $SLURM_JOB_ID"

@@ -28,7 +26,6 @@ export CAL_NOTEBOOK_NAME="$notebook"
 # set-up enviroment
 source /etc/profile.d/modules.sh
 module load anaconda/3
-module load texlive/2019

 # make sure we use agg backend
 export MPLBACKEND=AGG
@@ -47,7 +44,6 @@ fi

 echo "Running notebook"
 ${python_path} -m princess ${nb_path} --save
-${cal_python_path} -m nbconvert --to rst --TemplateExporter.exclude_input=True ${nb_path}

 # stop the cluster if requested
 if [ "${ipcluster_profile}" != "NO_CLUSTER" ]
@@ -57,8 +53,3 @@ then
    echo "Removing cluster profile from: $profile_path"
    rm -rf $profile_path
 fi
-
-if [ -n "${finalize}" ]
-then
-   ${cal_python_path} ${finalize}
-fi
--- a/bin/slurm_finalize.sh
+++ b/bin/slurm_finalize.sh
+#!/bin/bash
+
+set -euo pipefail
+
+# set paths to use
+python_path=$1
+temp_dir=$2
+finalize_script=$3
+
+echo "Running with the following parameters:"
+echo "Python path: $python_path"
+echo "Correction temp dir: $temp_dir"
+echo "finalize script: $finalize_script"
+echo "job ID: $SLURM_JOB_ID"
+
+# set-up enviroment
+source /etc/profile.d/modules.sh
+module load texlive/2019
+
+# make sure we use agg backend
+export MPLBACKEND=AGG
+
+# Ensure Python uses UTF-8 for files by default
+export LANG=en_US.UTF-8
+
+shopt -s failglob  # Fail fast if there are no notebooks found
+echo "Converting notebooks"
+${python_path} -m nbconvert --to rst --TemplateExporter.exclude_input=True "$temp_dir"/*.ipynb
+shopt -u failglob  # Restore default glob behaviour
+
+${python_path} "$finalize_script"
--- a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
+++ b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
 %% Cell type:markdown id: tags:

 # AGIPD Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/HED/202031/p900174/raw" # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/hibef_agipd2"  # the folder to output to, required
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 run = 155 # runs to process, required

 karabo_id = "HED_DET_AGIPD500K2G" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information
 karabo_id_control = "HED_EXP_AGIPD500K2G" # karabo-id for control device
 karabo_da_control = 'AGIPD500K2G00' # karabo DA for control infromation

 slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milli seconds
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants

 max_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300 # Bias voltage
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
 photon_energy = 9.2 # photon energy in keV
 overwrite = True # set to True if existing data should be overwritten
 max_pulses = [0, 500, 1] # range list [st, end, step] of maximum pulse indices within a train. 3 allowed maximum list input elements.
 mem_cells_db = 0 # set to a value different than 0 to use this value for DB queries
 cell_id_preview = 1 # cell Id used for preview in single-shot plots
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction parameters
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
 cm_dark_fraction = 0.66 # threshold for fraction of  empty pixels to consider module enough dark to perform CM correction
 cm_dark_range = [-50.,30] # range for signal value ADU for pixel to be consider as a dark pixel
 cm_n_itr = 4 # number of iterations for common mode correction
 hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
 mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
 noisy_adc_threshold = 0.25 # threshold to mask complete adc
 ff_gain = 7.2 # conversion gain for absolute FlatField constants, while applying xray_gain

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 zero_nans = False # set NaN values in corrected data to 0
 zero_orange = False # set to 0 very negative and very large values in corrected data
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
 corr_asic_diag = False # if set, diagonal drop offs on ASICs are correted
 force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold
 force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold
 mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
 common_mode = False # Common mode correction
 melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
 mask_zero_std = False # Mask pixels with zero standard deviation across train
 low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain

 # Paralellization parameters
 chunk_size = 1000 # Size of chunk for image-weise correction
 chunk_size_idim = 1  # chunking size of imaging dimension, adjust if user software is sensitive to this.
 n_cores_correct = 16 # Number of chunks to be processed in parallel
 n_cores_files = 4 # Number of files to be processed in parallel
 sequences_per_node = 2 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
 max_nodes = 8 # Maximum number of Slurm jobs to split correction work into

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import os
 import math
 import multiprocessing
 import re
 import traceback
 import warnings
 from datetime import timedelta
 from pathlib import Path
 from time import perf_counter

 import tabulate
 from dateutil import parser
 from IPython.display import Latex, Markdown, display

 warnings.filterwarnings('ignore')
 import matplotlib
 import matplotlib.pyplot as plt
 import yaml
 from extra_data import RunDirectory, stack_detector_data
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from matplotlib import cm as colormap
 from matplotlib.colors import LogNorm

 matplotlib.use("agg")
 %matplotlib inline
 import numpy as np
 import seaborn as sns

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")

 import cal_tools
 import seaborn as sns
 from cal_tools import agipdalgs as calgs
 from cal_tools.agipdlib import (
    AgipdCorrections,
    get_acq_rate,
    get_gain_mode,
    get_integration_time,
    get_gain_setting,
    get_num_cells,
 )
 from cal_tools.ana_tools import get_range
 from cal_tools.enums import BadPixels
 from cal_tools.step_timing import StepTimer

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 ```

 %% Cell type:markdown id: tags:

 ## Evaluated parameters ##

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the hierarchy and dependability for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["zero_nans"] = zero_nans
    corr_bools["zero_orange"] = zero_orange
    corr_bools["mask_noisy_adc"] = mask_noisy_adc
    corr_bools["force_hg_if_below"] = force_hg_if_below
    corr_bools["force_mg_if_below"] = force_mg_if_below
    corr_bools["common_mode"] = common_mode
    corr_bools["melt_snow"] = melt_snow
    corr_bools["mask_zero_std"] = mask_zero_std
    corr_bools["low_medium_gap"] = low_medium_gap

 # Many corrections don't apply to fixed gain mode; will explicitly disable later if detected
 disable_for_fixed_gain = [
    "adjust_mg_baseline",
    "blc_set_min",
    "force_hg_if_below",
    "force_mg_if_below",
    "low_medium_gap",
    "melt_snow",
    "rel_gain"
 ]
 ```

 %% Cell type:code id: tags:

 ``` python
 if sequences[0] == -1:
    sequences = None

 control_fn = in_folder / f'r{run:04d}' / f'RAW-R{run:04d}-{karabo_da_control}-S00000.h5'
 h5path_ctrl = h5path_ctrl.format(karabo_id_control)
 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)

 print(f'Path to control file {control_fn}')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create output folder
 out_folder.mkdir(parents=True, exist_ok=True)

 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 # Evaluate requested modules
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 print("Process modules:", ', '.join(cal_tools.tools.module_index_to_qm(x) for x in modules))
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Display Information about the selected pulses indices for correction.
 pulses_lst = list(range(*max_pulses)) if not (len(max_pulses)==1 and max_pulses[0]==0) else max_pulses

 try:
    if len(pulses_lst) > 1:
        print("A range of {} pulse indices is selected: from {} to {} with a step of {}"
               .format(len(pulses_lst), pulses_lst[0] , pulses_lst[-1] + (pulses_lst[1] - pulses_lst[0]),
                       pulses_lst[1] - pulses_lst[0]))
    else:
        print(f"one pulse is selected: a pulse of idx {pulses_lst[0]}")
 except Exception as e:
    raise ValueError(f"max_pulses input Error: {e}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mapped_files, _, total_sequences, _, _ =  cal_tools.tools.map_modules_from_folder(
    str(in_folder), run, path_template, karabo_da, sequences
 )
 file_list = []

 # ToDo: Split table over pages
 print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
 table = []
 ti = 0
 for k, files in mapped_files.items():
    i = 0
    for f in list(files.queue):
        file_list.append(f)
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["#", "module", "# module", "file"])))
 file_list = sorted(file_list, key=lambda name: name[-10:])
 ```

 %% Cell type:code id: tags:

 ``` python
 filename = file_list[0]
 channel = int(re.findall(r".*-AGIPD([0-9]+)-.*", filename)[0])

 # Evaluate number of memory cells
 mem_cells = get_num_cells(filename, karabo_id, channel)
 if mem_cells is None:
    raise ValueError(f"No raw images found in {filename}")

 mem_cells_db = mem_cells if mem_cells_db == 0 else mem_cells_db
 max_cells = mem_cells if max_cells == 0 else max_cells

+fast_paths = (filename, karabo_id, channel)
+slow_paths = (control_fn, karabo_id_control)
+
 # Evaluate aquisition rate
 if acq_rate == 0:
-    acq_rate = get_acq_rate((filename, karabo_id, channel))
+    acq_rate = get_acq_rate(fast_paths, slow_paths)

 print(f"Maximum memory cells to calibrate: {max_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate creation time
 creation_time = None
 if use_dir_creation_date:
    creation_time = cal_tools.tools.get_dir_creation_date(str(in_folder), run)
    offset = parser.parse(creation_date_offset)
    delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
    creation_time += delta

 # Evaluate gain setting
 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            gain_setting = get_gain_setting(str(control_fn), h5path_ctrl)
        except Exception as e:
            print(f'ERROR: while reading gain setting from: \n{control_fn}')
            print(e)
            print("Set gain setting to 0")
            gain_setting = 0

 # Evaluate gain mode (operation mode)
 gain_mode = get_gain_mode(control_fn, h5path_ctrl)

 # Evaluate integration time
 if integration_time < 0:
    integration_time = get_integration_time(control_fn, h5path_ctrl)
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Using {creation_time} as creation time")
 print("Operating conditions are:")
 print(f"• Bias voltage: {bias_voltage}")
 print(f"• Memory cells: {mem_cells_db}")
 print(f"• Acquisition rate: {acq_rate}")
 print(f"• Gain setting: {gain_setting}")
 print(f"• Gain mode: {gain_mode.name}")
 print(f"• Integration time: {integration_time}")
 print(f"• Photon Energy: {photon_energy}")
 ```

 %% Cell type:code id: tags:

 ``` python
 if gain_mode:
    for to_disable in disable_for_fixed_gain:
        if corr_bools.get(to_disable, False):
            print(f"Warning: {to_disable} correction was requested, but does not apply to fixed gain mode")
            corr_bools[to_disable] = False
 ```

 %% Cell type:markdown id: tags:

 ## Data processing ##

 %% Cell type:code id: tags:

 ``` python
 agipd_corr = AgipdCorrections(
    max_cells,
    max_pulses,
    h5_data_path=h5path,
    h5_index_path=h5path_idx,
    corr_bools=corr_bools,
    gain_mode=gain_mode,
    comp_threads=os.cpu_count() // n_cores_files,
 )

 agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
 agipd_corr.hg_hard_threshold = hg_hard_threshold
 agipd_corr.mg_hard_threshold = mg_hard_threshold

 agipd_corr.cm_dark_min = cm_dark_range[0]
 agipd_corr.cm_dark_max = cm_dark_range[1]
 agipd_corr.cm_dark_fraction = cm_dark_fraction
 agipd_corr.cm_n_itr = cm_n_itr
 agipd_corr.noisy_adc_threshold = noisy_adc_threshold
 agipd_corr.ff_gain = ff_gain
 ```

 %% Cell type:code id: tags:

 ``` python
 module_index_to_karabo_da = {mod: da for (mod, da) in zip(modules, karabo_da)}
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve calibration constants to RAM
 agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))

 metadata = cal_tools.tools.CalibrationMetadata(out_folder)
 # NOTE: this notebook will not overwrite calibration metadata file
 const_yaml = metadata.get("retrieved-constants", {})

 def retrieve_constants(mod):
    """
    Retrieve calibration constants and load them to shared memory

    Metadata for constants is taken from yml file or retrieved from the DB
    """
    err = ""
    k_da = module_index_to_karabo_da[mod]
    try:
        # check if there is a yaml file in out_folder that has the device constants.
        if k_da in const_yaml:
            when = agipd_corr.initialize_from_yaml(k_da, const_yaml, mod)
        else:
            # TODO: replace with proper retrieval (as done in pre-correction)
            when = agipd_corr.initialize_from_db(
                karabo_id=karabo_id,
                karabo_da=k_da,
                cal_db_interface=cal_db_interface,
                creation_time=creation_time,
                memory_cells=mem_cells_db,
                bias_voltage=bias_voltage,
                photon_energy=photon_energy,
                gain_setting=gain_setting,
                acquisition_rate=acq_rate,
                integration_time=integration_time,
                module_idx=mod,
                only_dark=False,
            )
    except Exception as e:
        err = f"Error: {e}\nError traceback: {traceback.format_exc()}"
        when = None
    return err, mod, when, k_da


 ts = perf_counter()
 with multiprocessing.Pool(processes=len(modules)) as pool:
    const_out = pool.map(retrieve_constants, modules)
 print(f"Constants were loaded in {perf_counter()-ts:.01f}s")
 ```

 %% Cell type:code id: tags:

 ``` python
 # allocate memory for images and hists
 n_images_max = max_cells * 256
 data_shape = (n_images_max, 512, 128)
 agipd_corr.allocate_images(data_shape, n_cores_files)
 ```

 %% Cell type:code id: tags:

 ``` python
 def batches(l, batch_size):
    """Group a list into batches of (up to) batch_size elements"""
    start = 0
    while start < len(l):
        yield l[start:start + batch_size]
        start += batch_size
 ```

 %% Cell type:code id: tags:

 ``` python
 def imagewise_chunks(img_counts):
    """Break up the loaded data into chunks of up to chunk_size

    Yields (file data slot, start index, stop index)
    """
    for i_proc, n_img in enumerate(img_counts):
        n_chunks = math.ceil(n_img / chunk_size)
        for i in range(n_chunks):
            yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id: tags:

 ``` python
 with multiprocessing.Pool() as pool:
    for file_batch in batches(file_list, n_cores_files):
        # TODO: Move some printed output to logging or similar
        print(f"Processing next {len(file_batch)} files")
        step_timer.start()
        img_counts = pool.starmap(agipd_corr.read_file, zip(range(len(file_batch)), file_batch,
                                                                  [not common_mode]*len(file_batch)))
        step_timer.done_step(f'Loading data from files')

        if mask_zero_std:
            # Evaluate zero-data-std mask
            pool.starmap(agipd_corr.mask_zero_std, itertools.product(
                range(len(file_batch)), np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
            ))
            step_timer.done_step('Mask 0 std')

        # Perform offset image-wise correction
        pool.starmap(agipd_corr.offset_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Offset correction")

        if blc_noise or blc_stripes or blc_hmatch:
            # Perform image-wise correction
            pool.starmap(agipd_corr.baseline_correction, imagewise_chunks(img_counts))
            step_timer.done_step("Base-line shift correction")

        if common_mode:
            # Perform cross-file correction parallel over asics
            pool.starmap(agipd_corr.cm_correction, itertools.product(
                range(len(file_batch)), range(16)  # 16 ASICs per module
            ))
            step_timer.done_step("Common-mode correction")

            img_counts = pool.map(agipd_corr.apply_selected_pulses, range(len(file_batch)))
            step_timer.done_step("Applying selected pulses after common mode correction")

        # Perform image-wise correction
        pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Gain corrections")

        # Save corrected data
        pool.starmap(agipd_corr.write_file, [
            (i_proc, file_name, str(out_folder / Path(file_name).name.replace("RAW", "CORR")))
            for i_proc, file_name in enumerate(file_batch)
        ])
        step_timer.done_step("Save")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Correction of {len(file_list)} files is finished")
 print(f"Total processing time {step_timer.timespan():.01f} s")
 print(f"Timing summary per batch of {n_cores_files} files:")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 # if the yml file contains "retrieved-constants", that means a leading
 # notebook got processed and the reporting would be generated from it.
 fst_print = True
 timestamps = {}

 for i, (error, modno, when, k_da) in enumerate(const_out):
    qm = cal_tools.tools.module_index_to_qm(modno)
    # expose errors while applying correction
    if error:
        print("Error: {}".format(error) )

    if k_da not in const_yaml:
        if fst_print:
            print("Constants are retrieved with creation time: ")
            fst_print = False

        module_timestamps = {}

        # If correction is crashed
        if not error:
            print(f"{qm}:")
            for key, item in when.items():
                if hasattr(item, 'strftime'):
                    item = item.strftime('%y-%m-%d %H:%M')
                when[key] = item
                print('{:.<12s}'.format(key), item)

        # Store few time stamps if exists
        # Add NA to keep array structure
        for key in ['Offset', 'SlopesPC', 'SlopesFF']:
            if when and key in when and when[key]:
                module_timestamps[key] = when[key]
            else:
                if error is not None:
                    module_timestamps[key] = "Err"
                else:
                    module_timestamps[key] = "NA"
        timestamps[qm] = module_timestamps

 seq = sequences[0] if sequences else 0

 if timestamps:
    with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fd:
        yaml.safe_dump({"time-summary": {f"S{seq}": timestamps}}, fd)
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.gca(projection='3d')

    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)
    Z = data.T

    # Plot the surface.
    ax.plot_surface(X, Y, Z, cmap=colormap.coolwarm, linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")


 def do_2d_plot(data, edges, y_axis, x_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),
              np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
                   norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 def get_trains_data(run_folder, source, include, detector_id, tid=None, modules=16, fillvalue=None):
    """Load single train for all module

    :param run_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param detector_id: The karabo id of the detector to get data for
    :param tid: Train Id to be loaded. First train is considered if None is given
    :param path: Path to find image data inside h5 file
    """
    run_data = RunDirectory(run_folder, include)
    if tid is not None:
        tid, data = run_data.select(f'{detector_id}/DET/*', source).train_from_id(tid)
    else:
        tid, data = next(iter(run_data.select(f'{detector_id}/DET/*', source).trains(require_all=True)))

    return tid, stack_detector_data(train=data, data=source, fillvalue=fillvalue, modules=modules)
 ```

 %% Cell type:code id: tags:

 ``` python
 if dinstance == "AGIPD500K":
    geom = AGIPD_500K2GGeometry.from_origin()
 else:
    geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
        (-525, 625),
        (-550, -10),
        (520, -160),
        (542.5, 475),
    ])
 ```

 %% Cell type:code id: tags:

 ``` python
 include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
 tid, corrected = get_trains_data(out_folder, 'image.data', include, karabo_id, modules=nmods)

 _, gains = get_trains_data(out_folder, 'image.gain', include, karabo_id, tid, modules=nmods)
 _, mask = get_trains_data(out_folder, 'image.mask', include, karabo_id, tid, modules=nmods)
 _, blshift = get_trains_data(out_folder, 'image.blShift', include, karabo_id, tid, modules=nmods)
 _, cellId = get_trains_data(out_folder, 'image.cellId', include, karabo_id, tid, modules=nmods)
 _, pulseId = get_trains_data(out_folder, 'image.pulseId', include, karabo_id, tid, modules=nmods, fillvalue=0)
 _, raw = get_trains_data(f'{in_folder}/r{run:04d}/', 'image.data', include, karabo_id, tid, modules=nmods)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Analogue Gain ###

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(raw[:,0,...].flatten().astype(np.float32),
                                         raw[:,1,...].flatten().astype(np.float32),
                                         bins=(100, 100),
                                         range=[[4000, 8192], [4000, 8192]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Digitized Gain ###

 The following plot shows plots signal vs. digitized gain

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(corrected.flatten().astype(np.float32),
                                         gains.flatten().astype(np.float32), bins=(100, 3),
                                         range=[[-50, 8192], [0, 3]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Gain statistics in %")
 table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
          f'{gains[gains==1].size/gains.size*100:.03f}',
          f'{gains[gains==2].size/gains.size*100:.03f}']]
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["High", "Medium", "Low"])))
 ```

 %% Cell type:markdown id: tags:

 ### Intensity per Pulse ###

 %% Cell type:code id: tags:

 ``` python
 pulse_range = [np.min(pulseId[pulseId>=0]), np.max(pulseId[pulseId>=0])]

 mean_data = np.nanmean(corrected, axis=(2, 3))
 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100, int(pulse_range[1])),
                                      range=[[-50, 1000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")

 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100,  int(pulse_range[1])),
                                      range=[[-50, 200000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 ```

 %% Cell type:markdown id: tags:

 ### Baseline shift ###

 Estimated base-line shift with respect to the total ADU counts of corrected image.

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(blshift.flatten(), bins=100, log=True)
 _ = plt.xlabel('Baseline shift [ADU]')
 _ = plt.ylabel('Counts')
 _ = ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 10))
 corrected_ave = np.nansum(corrected, axis=(2, 3))
 plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)
 plt.xlim(-1, 1000)
 plt.grid()
 plt.xlabel('Illuminated corrected [MADU] ')
 _ = plt.ylabel('Estimated baseline shift [ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Raw preview ###\n'))
 display(Markdown(f'Mean over images of the RAW data\n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 data = np.mean(raw[:, 0, ...], axis=0)
 vmin, vmax = get_range(data, 5)
 ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'Single shot of the RAW data from cell {np.max(cellId[cell_id_preview])} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(raw[cell_id_preview, 0, ...], 5)
 ax = geom.plot_data_fast(raw[cell_id_preview, 0, ...], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Corrected preview ###\n'))
 display(Markdown(f'A single shot image from cell {np.max(cellId[cell_id_preview])} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_id_preview], 7, -50)
 vmin = - 50
 ax = geom.plot_data_fast(corrected[cell_id_preview], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_id_preview], 5, -50)
 nbins = np.int((vmax + 50) / 2)
 h = ax.hist(corrected[cell_id_preview].flatten(),
            bins=nbins, range=(-50, vmax),
            histtype='stepfilled', log=True)
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Mean CORRECTED Preview ###\n'))
 display(Markdown(f'A mean across one train\n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 data = np.mean(corrected, axis=0)
 vmin, vmax = get_range(data, 7)
 ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=-50, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected, 10, -100)
 vmax = np.nanmax(corrected)
 if vmax > 50000:
    vmax=50000
 nbins = np.int((vmax + 100) / 5)
 h = ax.hist(corrected.flatten(), bins=nbins,
            range=(-100, vmax), histtype='step', log=True, label = 'All')
 ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='High gain', color='green')
 ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Medium gain', color='red')
 ax.hist(corrected[gains == 2].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Low gain', color='yellow')
 ax.legend()
 ax.grid()
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Maximum GAIN Preview ###\n'))
 display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.max(gains, axis=0), ax=ax,
                         cmap="jet", vmin=-1, vmax=3)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'### Single Shot Bad Pixels ### \n'))
 display(Markdown(f'A single shot bad pixel map from cell {np.max(cellId[cell_id_preview])} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(np.log2(mask[cell_id_preview]), ax=ax, vmin=0, vmax=32, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train  ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(np.mean(mask>0, axis=0), vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train. Only Dark Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 cm = np.copy(mask)
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 ax = geom.plot_data_fast(np.mean(cm>0, axis=0),
                         vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 # AGIPD Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/HED/202031/p900174/raw" # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/hibef_agipd2"  # the folder to output to, required
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 run = 155 # runs to process, required

 karabo_id = "HED_DET_AGIPD500K2G" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information
 karabo_id_control = "HED_EXP_AGIPD500K2G" # karabo-id for control device
 karabo_da_control = 'AGIPD500K2G00' # karabo DA for control infromation

 slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milli seconds
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants

 max_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300 # Bias voltage
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
 photon_energy = 9.2 # photon energy in keV
 overwrite = True # set to True if existing data should be overwritten
 max_pulses = [0, 500, 1] # range list [st, end, step] of maximum pulse indices within a train. 3 allowed maximum list input elements.
 mem_cells_db = 0 # set to a value different than 0 to use this value for DB queries
 cell_id_preview = 1 # cell Id used for preview in single-shot plots
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction parameters
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
 cm_dark_fraction = 0.66 # threshold for fraction of  empty pixels to consider module enough dark to perform CM correction
 cm_dark_range = [-50.,30] # range for signal value ADU for pixel to be consider as a dark pixel
 cm_n_itr = 4 # number of iterations for common mode correction
 hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
 mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
 noisy_adc_threshold = 0.25 # threshold to mask complete adc
 ff_gain = 7.2 # conversion gain for absolute FlatField constants, while applying xray_gain

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 zero_nans = False # set NaN values in corrected data to 0
 zero_orange = False # set to 0 very negative and very large values in corrected data
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
 corr_asic_diag = False # if set, diagonal drop offs on ASICs are correted
 force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold
 force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold
 mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
 common_mode = False # Common mode correction
 melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
 mask_zero_std = False # Mask pixels with zero standard deviation across train
 low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain

 # Paralellization parameters
 chunk_size = 1000 # Size of chunk for image-weise correction
 chunk_size_idim = 1  # chunking size of imaging dimension, adjust if user software is sensitive to this.
 n_cores_correct = 16 # Number of chunks to be processed in parallel
 n_cores_files = 4 # Number of files to be processed in parallel
 sequences_per_node = 2 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
 max_nodes = 8 # Maximum number of Slurm jobs to split correction work into

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import os
 import math
 import multiprocessing
 import re
 import traceback
 import warnings
 from datetime import timedelta
 from pathlib import Path
 from time import perf_counter

 import tabulate
 from dateutil import parser
 from IPython.display import Latex, Markdown, display

 warnings.filterwarnings('ignore')
 import matplotlib
 import matplotlib.pyplot as plt
 import yaml
 from extra_data import RunDirectory, stack_detector_data
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from matplotlib import cm as colormap
 from matplotlib.colors import LogNorm

 matplotlib.use("agg")
 %matplotlib inline
 import numpy as np
 import seaborn as sns

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")

 import cal_tools
 import seaborn as sns
 from cal_tools import agipdalgs as calgs
 from cal_tools.agipdlib import (
    AgipdCorrections,
    get_acq_rate,
    get_gain_mode,
    get_integration_time,
    get_gain_setting,
    get_num_cells,
 )
 from cal_tools.ana_tools import get_range
 from cal_tools.enums import BadPixels
 from cal_tools.step_timing import StepTimer

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 ```

 %% Cell type:markdown id: tags:

 ## Evaluated parameters ##

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the hierarchy and dependability for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["zero_nans"] = zero_nans
    corr_bools["zero_orange"] = zero_orange
    corr_bools["mask_noisy_adc"] = mask_noisy_adc
    corr_bools["force_hg_if_below"] = force_hg_if_below
    corr_bools["force_mg_if_below"] = force_mg_if_below
    corr_bools["common_mode"] = common_mode
    corr_bools["melt_snow"] = melt_snow
    corr_bools["mask_zero_std"] = mask_zero_std
    corr_bools["low_medium_gap"] = low_medium_gap

 # Many corrections don't apply to fixed gain mode; will explicitly disable later if detected
 disable_for_fixed_gain = [
    "adjust_mg_baseline",
    "blc_set_min",
    "force_hg_if_below",
    "force_mg_if_below",
    "low_medium_gap",
    "melt_snow",
    "rel_gain"
 ]
 ```

 %% Cell type:code id: tags:

 ``` python
 if sequences[0] == -1:
    sequences = None

 control_fn = in_folder / f'r{run:04d}' / f'RAW-R{run:04d}-{karabo_da_control}-S00000.h5'
 h5path_ctrl = h5path_ctrl.format(karabo_id_control)
 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)

 print(f'Path to control file {control_fn}')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create output folder
 out_folder.mkdir(parents=True, exist_ok=True)

 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 # Evaluate requested modules
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 print("Process modules:", ', '.join(cal_tools.tools.module_index_to_qm(x) for x in modules))
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Display Information about the selected pulses indices for correction.
 pulses_lst = list(range(*max_pulses)) if not (len(max_pulses)==1 and max_pulses[0]==0) else max_pulses

 try:
    if len(pulses_lst) > 1:
        print("A range of {} pulse indices is selected: from {} to {} with a step of {}"
               .format(len(pulses_lst), pulses_lst[0] , pulses_lst[-1] + (pulses_lst[1] - pulses_lst[0]),
                       pulses_lst[1] - pulses_lst[0]))
    else:
        print(f"one pulse is selected: a pulse of idx {pulses_lst[0]}")
 except Exception as e:
    raise ValueError(f"max_pulses input Error: {e}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mapped_files, _, total_sequences, _, _ =  cal_tools.tools.map_modules_from_folder(
    str(in_folder), run, path_template, karabo_da, sequences
 )
 file_list = []

 # ToDo: Split table over pages
 print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
 table = []
 ti = 0
 for k, files in mapped_files.items():
    i = 0
    for f in list(files.queue):
        file_list.append(f)
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["#", "module", "# module", "file"])))
 file_list = sorted(file_list, key=lambda name: name[-10:])
 ```

 %% Cell type:code id: tags:

 ``` python
 filename = file_list[0]
 channel = int(re.findall(r".*-AGIPD([0-9]+)-.*", filename)[0])

 # Evaluate number of memory cells
 mem_cells = get_num_cells(filename, karabo_id, channel)
 if mem_cells is None:
    raise ValueError(f"No raw images found in {filename}")

 mem_cells_db = mem_cells if mem_cells_db == 0 else mem_cells_db
 max_cells = mem_cells if max_cells == 0 else max_cells

+fast_paths = (filename, karabo_id, channel)
+slow_paths = (control_fn, karabo_id_control)
+
 # Evaluate aquisition rate
 if acq_rate == 0:
-    acq_rate = get_acq_rate((filename, karabo_id, channel))
+    acq_rate = get_acq_rate(fast_paths, slow_paths)

 print(f"Maximum memory cells to calibrate: {max_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate creation time
 creation_time = None
 if use_dir_creation_date:
    creation_time = cal_tools.tools.get_dir_creation_date(str(in_folder), run)
    offset = parser.parse(creation_date_offset)
    delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
    creation_time += delta

 # Evaluate gain setting
 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            gain_setting = get_gain_setting(str(control_fn), h5path_ctrl)
        except Exception as e:
            print(f'ERROR: while reading gain setting from: \n{control_fn}')
            print(e)
            print("Set gain setting to 0")
            gain_setting = 0

 # Evaluate gain mode (operation mode)
 gain_mode = get_gain_mode(control_fn, h5path_ctrl)

 # Evaluate integration time
 if integration_time < 0:
    integration_time = get_integration_time(control_fn, h5path_ctrl)
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Using {creation_time} as creation time")
 print("Operating conditions are:")
 print(f"• Bias voltage: {bias_voltage}")
 print(f"• Memory cells: {mem_cells_db}")
 print(f"• Acquisition rate: {acq_rate}")
 print(f"• Gain setting: {gain_setting}")
 print(f"• Gain mode: {gain_mode.name}")
 print(f"• Integration time: {integration_time}")
 print(f"• Photon Energy: {photon_energy}")
 ```

 %% Cell type:code id: tags:

 ``` python
 if gain_mode:
    for to_disable in disable_for_fixed_gain:
        if corr_bools.get(to_disable, False):
            print(f"Warning: {to_disable} correction was requested, but does not apply to fixed gain mode")
            corr_bools[to_disable] = False
 ```

 %% Cell type:markdown id: tags:

 ## Data processing ##

 %% Cell type:code id: tags:

 ``` python
 agipd_corr = AgipdCorrections(
    max_cells,
    max_pulses,
    h5_data_path=h5path,
    h5_index_path=h5path_idx,
    corr_bools=corr_bools,
    gain_mode=gain_mode,
    comp_threads=os.cpu_count() // n_cores_files,
 )

 agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
 agipd_corr.hg_hard_threshold = hg_hard_threshold
 agipd_corr.mg_hard_threshold = mg_hard_threshold

 agipd_corr.cm_dark_min = cm_dark_range[0]
 agipd_corr.cm_dark_max = cm_dark_range[1]
 agipd_corr.cm_dark_fraction = cm_dark_fraction
 agipd_corr.cm_n_itr = cm_n_itr
 agipd_corr.noisy_adc_threshold = noisy_adc_threshold
 agipd_corr.ff_gain = ff_gain
 ```

 %% Cell type:code id: tags:

 ``` python
 module_index_to_karabo_da = {mod: da for (mod, da) in zip(modules, karabo_da)}
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve calibration constants to RAM
 agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))

 metadata = cal_tools.tools.CalibrationMetadata(out_folder)
 # NOTE: this notebook will not overwrite calibration metadata file
 const_yaml = metadata.get("retrieved-constants", {})

 def retrieve_constants(mod):
    """
    Retrieve calibration constants and load them to shared memory

    Metadata for constants is taken from yml file or retrieved from the DB
    """
    err = ""
    k_da = module_index_to_karabo_da[mod]
    try:
        # check if there is a yaml file in out_folder that has the device constants.
        if k_da in const_yaml:
            when = agipd_corr.initialize_from_yaml(k_da, const_yaml, mod)
        else:
            # TODO: replace with proper retrieval (as done in pre-correction)
            when = agipd_corr.initialize_from_db(
                karabo_id=karabo_id,
                karabo_da=k_da,
                cal_db_interface=cal_db_interface,
                creation_time=creation_time,
                memory_cells=mem_cells_db,
                bias_voltage=bias_voltage,
                photon_energy=photon_energy,
                gain_setting=gain_setting,
                acquisition_rate=acq_rate,
                integration_time=integration_time,
                module_idx=mod,
                only_dark=False,
            )
    except Exception as e:
        err = f"Error: {e}\nError traceback: {traceback.format_exc()}"
        when = None
    return err, mod, when, k_da


 ts = perf_counter()
 with multiprocessing.Pool(processes=len(modules)) as pool:
    const_out = pool.map(retrieve_constants, modules)
 print(f"Constants were loaded in {perf_counter()-ts:.01f}s")
 ```

 %% Cell type:code id: tags:

 ``` python
 # allocate memory for images and hists
 n_images_max = max_cells * 256
 data_shape = (n_images_max, 512, 128)
 agipd_corr.allocate_images(data_shape, n_cores_files)
 ```

 %% Cell type:code id: tags:

 ``` python
 def batches(l, batch_size):
    """Group a list into batches of (up to) batch_size elements"""
    start = 0
    while start < len(l):
        yield l[start:start + batch_size]
        start += batch_size
 ```

 %% Cell type:code id: tags:

 ``` python
 def imagewise_chunks(img_counts):
    """Break up the loaded data into chunks of up to chunk_size

    Yields (file data slot, start index, stop index)
    """
    for i_proc, n_img in enumerate(img_counts):
        n_chunks = math.ceil(n_img / chunk_size)
        for i in range(n_chunks):
            yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id: tags:

 ``` python
 with multiprocessing.Pool() as pool:
    for file_batch in batches(file_list, n_cores_files):
        # TODO: Move some printed output to logging or similar
        print(f"Processing next {len(file_batch)} files")
        step_timer.start()
        img_counts = pool.starmap(agipd_corr.read_file, zip(range(len(file_batch)), file_batch,
                                                                  [not common_mode]*len(file_batch)))
        step_timer.done_step(f'Loading data from files')

        if mask_zero_std:
            # Evaluate zero-data-std mask
            pool.starmap(agipd_corr.mask_zero_std, itertools.product(
                range(len(file_batch)), np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
            ))
            step_timer.done_step('Mask 0 std')

        # Perform offset image-wise correction
        pool.starmap(agipd_corr.offset_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Offset correction")

        if blc_noise or blc_stripes or blc_hmatch:
            # Perform image-wise correction
            pool.starmap(agipd_corr.baseline_correction, imagewise_chunks(img_counts))
            step_timer.done_step("Base-line shift correction")

        if common_mode:
            # Perform cross-file correction parallel over asics
            pool.starmap(agipd_corr.cm_correction, itertools.product(
                range(len(file_batch)), range(16)  # 16 ASICs per module
            ))
            step_timer.done_step("Common-mode correction")

            img_counts = pool.map(agipd_corr.apply_selected_pulses, range(len(file_batch)))
            step_timer.done_step("Applying selected pulses after common mode correction")

        # Perform image-wise correction
        pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Gain corrections")

        # Save corrected data
        pool.starmap(agipd_corr.write_file, [
            (i_proc, file_name, str(out_folder / Path(file_name).name.replace("RAW", "CORR")))
            for i_proc, file_name in enumerate(file_batch)
        ])
        step_timer.done_step("Save")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Correction of {len(file_list)} files is finished")
 print(f"Total processing time {step_timer.timespan():.01f} s")
 print(f"Timing summary per batch of {n_cores_files} files:")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 # if the yml file contains "retrieved-constants", that means a leading
 # notebook got processed and the reporting would be generated from it.
 fst_print = True
 timestamps = {}

 for i, (error, modno, when, k_da) in enumerate(const_out):
    qm = cal_tools.tools.module_index_to_qm(modno)
    # expose errors while applying correction
    if error:
        print("Error: {}".format(error) )

    if k_da not in const_yaml:
        if fst_print:
            print("Constants are retrieved with creation time: ")
            fst_print = False

        module_timestamps = {}

        # If correction is crashed
        if not error:
            print(f"{qm}:")
            for key, item in when.items():
                if hasattr(item, 'strftime'):
                    item = item.strftime('%y-%m-%d %H:%M')
                when[key] = item
                print('{:.<12s}'.format(key), item)

        # Store few time stamps if exists
        # Add NA to keep array structure
        for key in ['Offset', 'SlopesPC', 'SlopesFF']:
            if when and key in when and when[key]:
                module_timestamps[key] = when[key]
            else:
                if error is not None:
                    module_timestamps[key] = "Err"
                else:
                    module_timestamps[key] = "NA"
        timestamps[qm] = module_timestamps

 seq = sequences[0] if sequences else 0

 if timestamps:
    with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fd:
        yaml.safe_dump({"time-summary": {f"S{seq}": timestamps}}, fd)
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.gca(projection='3d')

    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)
    Z = data.T

    # Plot the surface.
    ax.plot_surface(X, Y, Z, cmap=colormap.coolwarm, linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")


 def do_2d_plot(data, edges, y_axis, x_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),
              np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
                   norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 def get_trains_data(run_folder, source, include, detector_id, tid=None, modules=16, fillvalue=None):
    """Load single train for all module

    :param run_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param detector_id: The karabo id of the detector to get data for
    :param tid: Train Id to be loaded. First train is considered if None is given
    :param path: Path to find image data inside h5 file
    """
    run_data = RunDirectory(run_folder, include)
    if tid is not None:
        tid, data = run_data.select(f'{detector_id}/DET/*', source).train_from_id(tid)
    else:
        tid, data = next(iter(run_data.select(f'{detector_id}/DET/*', source).trains(require_all=True)))

    return tid, stack_detector_data(train=data, data=source, fillvalue=fillvalue, modules=modules)
 ```

 %% Cell type:code id: tags:

 ``` python
 if dinstance == "AGIPD500K":
    geom = AGIPD_500K2GGeometry.from_origin()
 else:
    geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
        (-525, 625),
        (-550, -10),
        (520, -160),
        (542.5, 475),
    ])
 ```

 %% Cell type:code id: tags:

 ``` python
 include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
 tid, corrected = get_trains_data(out_folder, 'image.data', include, karabo_id, modules=nmods)

 _, gains = get_trains_data(out_folder, 'image.gain', include, karabo_id, tid, modules=nmods)
 _, mask = get_trains_data(out_folder, 'image.mask', include, karabo_id, tid, modules=nmods)
 _, blshift = get_trains_data(out_folder, 'image.blShift', include, karabo_id, tid, modules=nmods)
 _, cellId = get_trains_data(out_folder, 'image.cellId', include, karabo_id, tid, modules=nmods)
 _, pulseId = get_trains_data(out_folder, 'image.pulseId', include, karabo_id, tid, modules=nmods, fillvalue=0)
 _, raw = get_trains_data(f'{in_folder}/r{run:04d}/', 'image.data', include, karabo_id, tid, modules=nmods)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Analogue Gain ###

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(raw[:,0,...].flatten().astype(np.float32),
                                         raw[:,1,...].flatten().astype(np.float32),
                                         bins=(100, 100),
                                         range=[[4000, 8192], [4000, 8192]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Digitized Gain ###

 The following plot shows plots signal vs. digitized gain

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(corrected.flatten().astype(np.float32),
                                         gains.flatten().astype(np.float32), bins=(100, 3),
                                         range=[[-50, 8192], [0, 3]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Gain statistics in %")
 table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
          f'{gains[gains==1].size/gains.size*100:.03f}',
          f'{gains[gains==2].size/gains.size*100:.03f}']]
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["High", "Medium", "Low"])))
 ```

 %% Cell type:markdown id: tags:

 ### Intensity per Pulse ###

 %% Cell type:code id: tags:

 ``` python
 pulse_range = [np.min(pulseId[pulseId>=0]), np.max(pulseId[pulseId>=0])]

 mean_data = np.nanmean(corrected, axis=(2, 3))
 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100, int(pulse_range[1])),
                                      range=[[-50, 1000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")

 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100,  int(pulse_range[1])),
                                      range=[[-50, 200000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 ```

 %% Cell type:markdown id: tags:

 ### Baseline shift ###

 Estimated base-line shift with respect to the total ADU counts of corrected image.

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(blshift.flatten(), bins=100, log=True)
 _ = plt.xlabel('Baseline shift [ADU]')
 _ = plt.ylabel('Counts')
 _ = ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 10))
 corrected_ave = np.nansum(corrected, axis=(2, 3))
 plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)
 plt.xlim(-1, 1000)
 plt.grid()
 plt.xlabel('Illuminated corrected [MADU] ')
 _ = plt.ylabel('Estimated baseline shift [ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Raw preview ###\n'))
 display(Markdown(f'Mean over images of the RAW data\n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 data = np.mean(raw[:, 0, ...], axis=0)
 vmin, vmax = get_range(data, 5)
 ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'Single shot of the RAW data from cell {np.max(cellId[cell_id_preview])} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(raw[cell_id_preview, 0, ...], 5)
 ax = geom.plot_data_fast(raw[cell_id_preview, 0, ...], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Corrected preview ###\n'))
 display(Markdown(f'A single shot image from cell {np.max(cellId[cell_id_preview])} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_id_preview], 7, -50)
 vmin = - 50
 ax = geom.plot_data_fast(corrected[cell_id_preview], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_id_preview], 5, -50)
 nbins = np.int((vmax + 50) / 2)
 h = ax.hist(corrected[cell_id_preview].flatten(),
            bins=nbins, range=(-50, vmax),
            histtype='stepfilled', log=True)
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Mean CORRECTED Preview ###\n'))
 display(Markdown(f'A mean across one train\n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 data = np.mean(corrected, axis=0)
 vmin, vmax = get_range(data, 7)
 ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=-50, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected, 10, -100)
 vmax = np.nanmax(corrected)
 if vmax > 50000:
    vmax=50000
 nbins = np.int((vmax + 100) / 5)
 h = ax.hist(corrected.flatten(), bins=nbins,
            range=(-100, vmax), histtype='step', log=True, label = 'All')
 ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='High gain', color='green')
 ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Medium gain', color='red')
 ax.hist(corrected[gains == 2].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Low gain', color='yellow')
 ax.legend()
 ax.grid()
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Maximum GAIN Preview ###\n'))
 display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.max(gains, axis=0), ax=ax,
                         cmap="jet", vmin=-1, vmax=3)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'### Single Shot Bad Pixels ### \n'))
 display(Markdown(f'A single shot bad pixel map from cell {np.max(cellId[cell_id_preview])} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(np.log2(mask[cell_id_preview]), ax=ax, vmin=0, vmax=32, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train  ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(np.mean(mask>0, axis=0), vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train. Only Dark Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 cm = np.copy(mask)
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 ax = geom.plot_data_fast(np.mean(cm>0, axis=0),
                         vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

--- a/notebooks/AGIPD/AGIPD_Retrieve_Constants_Precorrection.ipynb
+++ b/notebooks/AGIPD/AGIPD_Retrieve_Constants_Precorrection.ipynb
 %% Cell type:markdown id: tags:

 # AGIPD Retrieving Constants Pre-correction #

 Author: European XFEL Detector Group, Version: 1.0

 Retrieving Required Constants for Offline Calibration of the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202030/p900119/raw" # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/AGIPD_"  # the folder to output to, required
 sequences =  [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 run = 80 # runs to process, required

 karabo_id = "SPB_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information
 karabo_id_control = "SPB_IRU_AGIPD1M1" # karabo-id for control device
 karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants

 slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants
 calfile =  "" # path to calibration file. Leave empty if all data should come from DB
 nodb = False # if set only file-based constants will be used
 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
 photon_energy = 9.2 # photon energy in keV
 max_cells_db_dark = 0  # set to a value different than 0 to use this value for dark data DB queries
 max_cells_db = 0 # set to a value different than 0 to use this value for DB queries
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = True # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 ```

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis
 # Here the hierarichy and dependencies for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_hmatch"] = blc_hmatch
 ```

 %% Cell type:code id: tags:

 ``` python
 from typing import List, Tuple

 import matplotlib
 import numpy as np

 matplotlib.use("agg")
 import multiprocessing
 from datetime import timedelta
 from pathlib import Path

 import matplotlib.pyplot as plt
 from cal_tools import agipdlib, tools
 from dateutil import parser
 from iCalibrationDB import Conditions, Constants, Detectors
 ```

 %% Cell type:code id: tags:

 ``` python
 # slopes_ff_from_files left as str for now
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 metadata = tools.CalibrationMetadata(out_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 max_cells = mem_cells

 creation_time = None
 if use_dir_creation_date:
    creation_time = tools.get_dir_creation_date(str(in_folder), run)
    offset = parser.parse(creation_date_offset)
    delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
    creation_time += delta
    print(f"Using {creation_time} as creation time")

 if sequences[0] == -1:
    sequences = None

 print(f"Outputting to {out_folder}")
 out_folder.mkdir(parents=True, exist_ok=True)

 melt_snow = False if corr_bools["only_offset"] else agipdlib.SnowResolution.NONE
 ```

 %% Cell type:code id: tags:

 ``` python
 control_fn = in_folder / f'r{run:04d}' / f'RAW-R{run:04d}-{karabo_da_control}-S00000.h5'
 h5path_ctrl = h5path_ctrl.format(karabo_id_control)

 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            gain_setting = agipdlib.get_gain_setting(str(control_fn), h5path_ctrl)
        except Exception as e:
            print(f'ERROR: while reading gain setting from: \n{control_fn}')
            print(e)
            print("Set gain setting to 0")
            gain_setting = 0

 # Evaluate gain mode (operation mode)
 gain_mode = agipdlib.get_gain_mode(control_fn, h5path_ctrl)

 # Evaluate integration time
 if integration_time < 0:
-    integration_time = agipblib.get_integration_time(control_fn, h5path_ctrl)
+    integration_time = agipdlib.get_integration_time(control_fn, h5path_ctrl)

 print(f"Gain setting: {gain_setting}")
 print(f"Gain mode: {gain_mode.name}")
 print(f"Detector in use is {karabo_id}")


 # Extracting Instrument string
 instrument = karabo_id.split("_")[0]
 # Evaluate detector instance for mapping
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")


 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
 ```

 %% Cell type:markdown id: tags:

 ## Retrieve Constants ##

 %% Cell type:code id: tags:

 ``` python
 def retrieve_constants(
    qm_files: List[Path], qm: str, karabo_da: str, idx: int
 ) -> Tuple[str, str, float, float, str, dict]:
    """
    Retrieve constants for a module.

    :return:
            qm: module virtual name i.e. Q1M1.
            karabo_da: karabo data aggregator.
            acq_rate: acquisition rate parameter.
            max_cells: number of memory cells.
            err: string of faced errors.
            mdata_dict: (DICT) dictionary with the metadata for the retrieved constants.
    """

    err = None
    if max_cells != 0:
        # either use overriding notebook parameter
        local_max_cells = max_cells
    else:
        # or look around in sequence files
        for f in qm_files:
            local_max_cells = agipdlib.get_num_cells(f, karabo_id, idx)
            if local_max_cells is not None:
                break
    # maybe we never found this in a sequence file...
    if local_max_cells is None:
        raise ValueError(f"No raw images found for {qm} for all sequences")

    if acq_rate == 0:
        local_acq_rate = agipdlib.get_acq_rate(fast_paths=(f, karabo_id, idx))
    else:
        local_acq_rate = acq_rate

    # avoid retrieving constant, if requested.
    if nodb_with_dark:
        return

    const_dict = agipdlib.assemble_constant_dict(
        corr_bools,
        pc_bools,
        local_max_cells,
        bias_voltage,
        gain_setting,
        local_acq_rate,
        photon_energy,
        gain_mode=gain_mode,
        beam_energy=None,
        only_dark=only_dark,
        integration_time=integration_time
    )

    # Retrieve multiple constants through an input dictionary
    # to return a dict of useful metadata.
    mdata_dict = dict()
    mdata_dict["constants"] = dict()
    mdata_dict["physical-detector-unit"] = None  # initialization

    for const_name, (const_init_fun, const_shape, (cond_type, cond_param)) in const_dict.items():
        if gain_mode and const_name in ("ThresholdsDark",):
            continue

        # saving metadata in a dict
        const_mdata = dict()
        mdata_dict["constants"][const_name] = const_mdata

        if slopes_ff_from_files and const_name in ["SlopesFF", "BadPixelsFF"]:
            const_mdata["file-path"] = f"{slopes_ff_from_files}/slopesff_bpmask_module_{qm}.h5"
            const_mdata["creation-time"] = "00:00:00"
            continue

        if gain_mode and const_name in ("BadPixelsPC", "SlopesPC", "BadPixelsFF", "SlopesFF"):
            param_copy = cond_param.copy()
            del param_copy["gain_mode"]
            condition = getattr(Conditions, cond_type).AGIPD(**param_copy)
        else:
            condition = getattr(Conditions, cond_type).AGIPD(**cond_param)

        _, mdata = tools.get_from_db(
            karabo_id,
            karabo_da,
            getattr(Constants.AGIPD, const_name)(),
            condition,
            getattr(np, const_init_fun)(const_shape),
            cal_db_interface,
            creation_time,
            meta_only=True,
            verbosity=0,
        )
        mdata_const = mdata.calibration_constant_version
        # check if constant was sucessfully retrieved.
        if mdata.comm_db_success:
            const_mdata["file-path"] = (
                f"{mdata_const.hdf5path}" f"{mdata_const.filename}"
            )
            const_mdata["creation-time"] = f"{mdata_const.begin_at}"
            mdata_dict["physical-detector-unit"] = mdata_const.device_name
        else:
            const_mdata["file-path"] = const_dict[const_name][:2]
            const_mdata["creation-time"] = None

    return qm, mdata_dict, karabo_da, acq_rate, local_max_cells, err
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mapped_files, _, _, _, _ = tools.map_modules_from_folder(
    str(in_folder), run, path_template, karabo_da, sequences
 )

 pc_bools = [corr_bools.get("rel_gain"),
            corr_bools.get("adjust_mg_baseline"),
            corr_bools.get('blc_noise'),
            corr_bools.get('blc_hmatch'),
            corr_bools.get('blc_stripes'),
            melt_snow]

 inp = []
 only_dark = False
 nodb_with_dark = False
 if not nodb:
    only_dark = (calfile != "")
 if calfile != "" and not corr_bools["only_offset"]:
    nodb_with_dark = nodb

 # A dict to connect virtual device
 # to actual device name.
 for module_index, k_da in zip(modules, karabo_da):
    qm = tools.module_index_to_qm(module_index)
    if qm in mapped_files and not mapped_files[qm].empty():
        # TODO: make map_modules_from_folder just return list(s)
        qm_files = [Path(mapped_files[qm].get()) for _ in range(mapped_files[qm].qsize())]
    else:
        continue

    inp.append((qm_files, qm, k_da, module_index))
 ```

 %% Cell type:code id: tags:

 ``` python
 with multiprocessing.Pool(processes=nmods) as pool:
    results = pool.starmap(retrieve_constants, inp)
 ```

 %% Cell type:code id: tags:

 ``` python
 mod_dev = dict()
 mdata_dict = dict()
 for qm, md_dict, karabo_da, acq_rate, max_cells, err in results:
    mod_dev[karabo_da] = {"mod": qm, "err": err}
    if err:
        print(f"Error for module {qm}: {err}")
    mdata_dict[karabo_da] = md_dict
 # check if it is requested not to retrieve any constants from the database
 if nodb_with_dark:
    print("No constants were retrieved as calibrated files will be used.")
 else:
    metadata.update({"retrieved-constants": mdata_dict})

    print("\nRetrieved constants for modules:",
          ', '.join([tools.module_index_to_qm(x) for x in modules]))
    print(f"Operating conditions are:")
    print(f"• Bias voltage: {bias_voltage}")
    print(f"• Memory cells: {max_cells}")
    print(f"• Acquisition rate: {acq_rate}")
    print(f"• Gain mode: {gain_mode.name}")
    print(f"• Gain setting: {gain_setting}")
    print(f"• Integration time: {integration_time}")
    print(f"• Photon Energy: {photon_energy}")
    print("Constant metadata is saved under \"retrieved-constants\" in calibration_metadata.yml\n")
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Constants are retrieved with creation time:")
 timestamps = {}

 for k_da, dinfo in mod_dev.items():
    module_timestamps = {}
    module_name = dinfo["mod"]
    print(f"{module_name}:")
    if k_da in mdata_dict:
        for cname, mdata in mdata_dict[k_da]["constants"].items():
            if hasattr(mdata["creation-time"], 'strftime'):
                mdata["creation-time"] = mdata["creation-time"].strftime('%y-%m-%d %H:%M')
            print(f'{cname:.<12s}', mdata["creation-time"])
    for cname in ['Offset', 'SlopesPC', 'SlopesFF']:
        if k_da not in mdata_dict or dinfo["err"]:
            module_timestamps[cname] = "Err"
        else:
            if cname in mdata_dict[k_da]["constants"]:
                module_timestamps[cname] = mdata_dict[k_da]["constants"][cname]["creation-time"]
            else:
                module_timestamps[cname] = "NA"
    timestamps[module_name] = module_timestamps

 time_summary = metadata.setdefault("retrieved-constants", {}).setdefault("time-summary", {})
 time_summary["SAll"] = timestamps

 metadata.save()
 ```

 %% Cell type:markdown id: tags:

 # AGIPD Retrieving Constants Pre-correction #

 Author: European XFEL Detector Group, Version: 1.0

 Retrieving Required Constants for Offline Calibration of the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202030/p900119/raw" # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/AGIPD_"  # the folder to output to, required
 sequences =  [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 run = 80 # runs to process, required

 karabo_id = "SPB_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information
 karabo_id_control = "SPB_IRU_AGIPD1M1" # karabo-id for control device
 karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants

 slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants
 calfile =  "" # path to calibration file. Leave empty if all data should come from DB
 nodb = False # if set only file-based constants will be used
 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
 photon_energy = 9.2 # photon energy in keV
 max_cells_db_dark = 0  # set to a value different than 0 to use this value for dark data DB queries
 max_cells_db = 0 # set to a value different than 0 to use this value for DB queries
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = True # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 ```

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis
 # Here the hierarichy and dependencies for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_hmatch"] = blc_hmatch
 ```

 %% Cell type:code id: tags:

 ``` python
 from typing import List, Tuple

 import matplotlib
 import numpy as np

 matplotlib.use("agg")
 import multiprocessing
 from datetime import timedelta
 from pathlib import Path

 import matplotlib.pyplot as plt
 from cal_tools import agipdlib, tools
 from dateutil import parser
 from iCalibrationDB import Conditions, Constants, Detectors
 ```

 %% Cell type:code id: tags:

 ``` python
 # slopes_ff_from_files left as str for now
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 metadata = tools.CalibrationMetadata(out_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 max_cells = mem_cells

 creation_time = None
 if use_dir_creation_date:
    creation_time = tools.get_dir_creation_date(str(in_folder), run)
    offset = parser.parse(creation_date_offset)
    delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
    creation_time += delta
    print(f"Using {creation_time} as creation time")

 if sequences[0] == -1:
    sequences = None

 print(f"Outputting to {out_folder}")
 out_folder.mkdir(parents=True, exist_ok=True)

 melt_snow = False if corr_bools["only_offset"] else agipdlib.SnowResolution.NONE
 ```

 %% Cell type:code id: tags:

 ``` python
 control_fn = in_folder / f'r{run:04d}' / f'RAW-R{run:04d}-{karabo_da_control}-S00000.h5'
 h5path_ctrl = h5path_ctrl.format(karabo_id_control)

 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            gain_setting = agipdlib.get_gain_setting(str(control_fn), h5path_ctrl)
        except Exception as e:
            print(f'ERROR: while reading gain setting from: \n{control_fn}')
            print(e)
            print("Set gain setting to 0")
            gain_setting = 0

 # Evaluate gain mode (operation mode)
 gain_mode = agipdlib.get_gain_mode(control_fn, h5path_ctrl)

 # Evaluate integration time
 if integration_time < 0:
-    integration_time = agipblib.get_integration_time(control_fn, h5path_ctrl)
+    integration_time = agipdlib.get_integration_time(control_fn, h5path_ctrl)

 print(f"Gain setting: {gain_setting}")
 print(f"Gain mode: {gain_mode.name}")
 print(f"Detector in use is {karabo_id}")


 # Extracting Instrument string
 instrument = karabo_id.split("_")[0]
 # Evaluate detector instance for mapping
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")


 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
 ```

 %% Cell type:markdown id: tags:

 ## Retrieve Constants ##

 %% Cell type:code id: tags:

 ``` python
 def retrieve_constants(
    qm_files: List[Path], qm: str, karabo_da: str, idx: int
 ) -> Tuple[str, str, float, float, str, dict]:
    """
    Retrieve constants for a module.

    :return:
            qm: module virtual name i.e. Q1M1.
            karabo_da: karabo data aggregator.
            acq_rate: acquisition rate parameter.
            max_cells: number of memory cells.
            err: string of faced errors.
            mdata_dict: (DICT) dictionary with the metadata for the retrieved constants.
    """

    err = None
    if max_cells != 0:
        # either use overriding notebook parameter
        local_max_cells = max_cells
    else:
        # or look around in sequence files
        for f in qm_files:
            local_max_cells = agipdlib.get_num_cells(f, karabo_id, idx)
            if local_max_cells is not None:
                break
    # maybe we never found this in a sequence file...
    if local_max_cells is None:
        raise ValueError(f"No raw images found for {qm} for all sequences")

    if acq_rate == 0:
        local_acq_rate = agipdlib.get_acq_rate(fast_paths=(f, karabo_id, idx))
    else:
        local_acq_rate = acq_rate

    # avoid retrieving constant, if requested.
    if nodb_with_dark:
        return

    const_dict = agipdlib.assemble_constant_dict(
        corr_bools,
        pc_bools,
        local_max_cells,
        bias_voltage,
        gain_setting,
        local_acq_rate,
        photon_energy,
        gain_mode=gain_mode,
        beam_energy=None,
        only_dark=only_dark,
        integration_time=integration_time
    )

    # Retrieve multiple constants through an input dictionary
    # to return a dict of useful metadata.
    mdata_dict = dict()
    mdata_dict["constants"] = dict()
    mdata_dict["physical-detector-unit"] = None  # initialization

    for const_name, (const_init_fun, const_shape, (cond_type, cond_param)) in const_dict.items():
        if gain_mode and const_name in ("ThresholdsDark",):
            continue

        # saving metadata in a dict
        const_mdata = dict()
        mdata_dict["constants"][const_name] = const_mdata

        if slopes_ff_from_files and const_name in ["SlopesFF", "BadPixelsFF"]:
            const_mdata["file-path"] = f"{slopes_ff_from_files}/slopesff_bpmask_module_{qm}.h5"
            const_mdata["creation-time"] = "00:00:00"
            continue

        if gain_mode and const_name in ("BadPixelsPC", "SlopesPC", "BadPixelsFF", "SlopesFF"):
            param_copy = cond_param.copy()
            del param_copy["gain_mode"]
            condition = getattr(Conditions, cond_type).AGIPD(**param_copy)
        else:
            condition = getattr(Conditions, cond_type).AGIPD(**cond_param)

        _, mdata = tools.get_from_db(
            karabo_id,
            karabo_da,
            getattr(Constants.AGIPD, const_name)(),
            condition,
            getattr(np, const_init_fun)(const_shape),
            cal_db_interface,
            creation_time,
            meta_only=True,
            verbosity=0,
        )
        mdata_const = mdata.calibration_constant_version
        # check if constant was sucessfully retrieved.
        if mdata.comm_db_success:
            const_mdata["file-path"] = (
                f"{mdata_const.hdf5path}" f"{mdata_const.filename}"
            )
            const_mdata["creation-time"] = f"{mdata_const.begin_at}"
            mdata_dict["physical-detector-unit"] = mdata_const.device_name
        else:
            const_mdata["file-path"] = const_dict[const_name][:2]
            const_mdata["creation-time"] = None

    return qm, mdata_dict, karabo_da, acq_rate, local_max_cells, err
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mapped_files, _, _, _, _ = tools.map_modules_from_folder(
    str(in_folder), run, path_template, karabo_da, sequences
 )

 pc_bools = [corr_bools.get("rel_gain"),
            corr_bools.get("adjust_mg_baseline"),
            corr_bools.get('blc_noise'),
            corr_bools.get('blc_hmatch'),
            corr_bools.get('blc_stripes'),
            melt_snow]

 inp = []
 only_dark = False
 nodb_with_dark = False
 if not nodb:
    only_dark = (calfile != "")
 if calfile != "" and not corr_bools["only_offset"]:
    nodb_with_dark = nodb

 # A dict to connect virtual device
 # to actual device name.
 for module_index, k_da in zip(modules, karabo_da):
    qm = tools.module_index_to_qm(module_index)
    if qm in mapped_files and not mapped_files[qm].empty():
        # TODO: make map_modules_from_folder just return list(s)
        qm_files = [Path(mapped_files[qm].get()) for _ in range(mapped_files[qm].qsize())]
    else:
        continue

    inp.append((qm_files, qm, k_da, module_index))
 ```

 %% Cell type:code id: tags:

 ``` python
 with multiprocessing.Pool(processes=nmods) as pool:
    results = pool.starmap(retrieve_constants, inp)
 ```

 %% Cell type:code id: tags:

 ``` python
 mod_dev = dict()
 mdata_dict = dict()
 for qm, md_dict, karabo_da, acq_rate, max_cells, err in results:
    mod_dev[karabo_da] = {"mod": qm, "err": err}
    if err:
        print(f"Error for module {qm}: {err}")
    mdata_dict[karabo_da] = md_dict
 # check if it is requested not to retrieve any constants from the database
 if nodb_with_dark:
    print("No constants were retrieved as calibrated files will be used.")
 else:
    metadata.update({"retrieved-constants": mdata_dict})

    print("\nRetrieved constants for modules:",
          ', '.join([tools.module_index_to_qm(x) for x in modules]))
    print(f"Operating conditions are:")
    print(f"• Bias voltage: {bias_voltage}")
    print(f"• Memory cells: {max_cells}")
    print(f"• Acquisition rate: {acq_rate}")
    print(f"• Gain mode: {gain_mode.name}")
    print(f"• Gain setting: {gain_setting}")
    print(f"• Integration time: {integration_time}")
    print(f"• Photon Energy: {photon_energy}")
    print("Constant metadata is saved under \"retrieved-constants\" in calibration_metadata.yml\n")
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Constants are retrieved with creation time:")
 timestamps = {}

 for k_da, dinfo in mod_dev.items():
    module_timestamps = {}
    module_name = dinfo["mod"]
    print(f"{module_name}:")
    if k_da in mdata_dict:
        for cname, mdata in mdata_dict[k_da]["constants"].items():
            if hasattr(mdata["creation-time"], 'strftime'):
                mdata["creation-time"] = mdata["creation-time"].strftime('%y-%m-%d %H:%M')
            print(f'{cname:.<12s}', mdata["creation-time"])
    for cname in ['Offset', 'SlopesPC', 'SlopesFF']:
        if k_da not in mdata_dict or dinfo["err"]:
            module_timestamps[cname] = "Err"
        else:
            if cname in mdata_dict[k_da]["constants"]:
                module_timestamps[cname] = mdata_dict[k_da]["constants"][cname]["creation-time"]
            else:
                module_timestamps[cname] = "NA"
    timestamps[module_name] = module_timestamps

 time_summary = metadata.setdefault("retrieved-constants", {}).setdefault("time-summary", {})
 time_summary["SAll"] = timestamps

 metadata.save()
 ```

--- a/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
+++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
 %% Cell type:markdown id: tags:

 # AGIPD Characterize Dark Images #

 Author: S. Hauf, Version: 0.1

 The following code analyzes a set of dark images taken with the AGIPD detector to deduce detector offsets , noise, bad-pixel maps and thresholding. All four types of constants are evaluated per-pixel and per-memory cell. Data for the detector's three gain stages needs to be present, separated into separate runs.

 The evaluated calibration constants are stored locally and injected in the calibration data base.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/d/raw/CALLAB/202031/p900113" # path to input data, required
 out_folder = "" # path to output to, required
 sequences = [-1] # sequence files to evaluate.
 modules = [-1]  # list of modules to evaluate, RANGE ALLOWED
 run_high = 9985 # run number in which high gain data was recorded, required
 run_med = 9984 # run number in which medium gain data was recorded, required
 run_low = 9983 # run number in which low gain data was recorded, required
 operation_mode = "ADAPTIVE_GAIN"  # Detector operation mode, optional (defaults to "ADAPTIVE_GAIN")

 karabo_id = "HED_DET_AGIPD500K2G" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = '/INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
 h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information
 karabo_id_control = "HED_EXP_AGIPD500K2G" # karabo-id for control device '
 karabo_da_control = "AGIPD500K2G00" # karabo DA for control infromation

 use_dir_creation_date = True  # use dir creation date as data production reference date
 cal_db_interface = "tcp://max-exfl016:8020" # the database interface to use
 cal_db_timeout = 3000000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database

 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 0 # detector bias voltage
 gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
+integration_time = -1 # integration time, negative values for auto-detection.
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 interlaced = False # assume interlaced data format, for data prior to Dec. 2017
 rawversion = 2 # RAW file format version

 thresholds_offset_sigma = 3. # offset sigma thresholds for offset deduced bad pixels
 thresholds_offset_hard = [0, 0]  # For setting the same threshold offset for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.
 thresholds_offset_hard_hg = [3000, 7000]  # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_mg = [6000, 10000]  # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_lg = [6000, 10000]  # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_hg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_hg, but for fixed gain operation
 thresholds_offset_hard_mg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_mg, but for fixed gain operation
 thresholds_offset_hard_lg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_lg, but for fixed gain operation

 thresholds_noise_sigma = 5. # noise sigma thresholds for offset deduced bad pixels
 thresholds_noise_hard = [0, 0] # For setting the same threshold noise for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.
 thresholds_noise_hard_hg = [4, 20] # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_mg = [4, 20] # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_lg = [4, 20] # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels

 thresholds_gain_sigma = 5. # Gain separation sigma threshold

 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. ~7mins extra time for 64 memory cells
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import multiprocessing
 import os
 from collections import OrderedDict
 from datetime import timedelta
 from typing import Tuple

 import dateutil.parser
 import h5py
 import matplotlib
 import numpy as np
 import pasha as psh
 import tabulate
 import yaml

 matplotlib.use('agg')

 import iCalibrationDB
 import matplotlib.pyplot as plt
 from cal_tools.agipdlib import (
    get_acq_rate,
    get_bias_voltage,
    get_gain_mode,
    get_gain_setting,
    get_integration_time,
    get_num_cells,
 )
 from cal_tools.enums import AgipdGainMode, BadPixels
 from cal_tools.plotting import (
    create_constant_overview,
    plot_badpix_3d,
    show_overview,
    show_processed_modules,
 )
 from cal_tools.tools import (
    get_dir_creation_date,
    get_from_db,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    map_gain_stages,
    module_index_to_qm,
    run_prop_seq_from_path,
    save_const_to_h5,
    send_to_db,
 )
 from IPython.display import Latex, Markdown, display

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 # insert control device if format string (does nothing otherwise)
 h5path_ctrl = h5path_ctrl.format(karabo_id_control)

 max_cells = mem_cells

 offset_runs = OrderedDict()
 offset_runs["high"] = run_high
 offset_runs["med"] = run_med
 offset_runs["low"] = run_low

 creation_time=None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)

 print(f"Using {creation_time} as creation time of constant.")

 run, prop, seq = run_prop_seq_from_path(in_folder)

 cal_db_interface = get_random_db_interface(cal_db_interface)
 print(f'Calibration database interface: {cal_db_interface}')

 instrument = karabo_id.split("_")[0]

 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 if sequences == [-1]:
    sequences = None
 control_names = [f'{in_folder}/r{r:04d}/RAW-R{r:04d}-{karabo_da_control}-S00000.h5'
                 for r in (run_high, run_med, run_low)]

 if operation_mode not in ("ADAPTIVE_GAIN", "FIXED_GAIN"):
    print(f"WARNING: unknown operation_mode \"{operation_mode}\" parameter set")
 run_gain_modes = [get_gain_mode(fn, h5path_ctrl) for fn in control_names]
 if all(gm == AgipdGainMode.ADAPTIVE_GAIN for gm in run_gain_modes):
    fixed_gain_mode = False
    if operation_mode == "FIXED_GAIN":
        print("WARNING: operation_mode parameter is FIXED_GAIN, slow data indicates adaptive gain")
 elif run_gain_modes == [AgipdGainMode.FIXED_HIGH_GAIN, AgipdGainMode.FIXED_MEDIUM_GAIN, AgipdGainMode.FIXED_LOW_GAIN]:
    if operation_mode == "ADAPTIVE_GAIN":
        print("WARNING: operation_mode parameter ix ADAPTIVE_GAIN, slow data indicates fixed gain")
    fixed_gain_mode = True
 else:
    print(f'Something is clearly wrong; slow data indicates gain modes {run_gain_modes}')

 if integration_time < 0:
    integration_times = [get_integration_time(fn, h5path_ctrl) for fn in control_names]
    if len(set(integration_times)) > 1:
        print(f'WARNING: integration time is not constant across the specified dark runs')

 integration_time = integration_times[0]

 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
 runs = [run_high, run_med, run_low]

 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < dateutil.parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            # extract gain setting and validate that all runs have the same setting
            gsettings = []
            for r in runs:
                control_fname = '{}/r{:04d}/RAW-R{:04d}-{}-S00000.h5'.format(in_folder, r, r,
                                                                             karabo_da_control)
                gsettings.append(get_gain_setting(control_fname, h5path_ctrl))
            if not all(g == gsettings[0] for g in gsettings):
                raise ValueError(f"Different gain settings for the 3 input runs {gsettings}")
            gain_setting = gsettings[0]
        except Exception as e:
            print(f'Error while reading gain setting from: \n{control_fname}')
            print(f'Error: {e}')
            if "component not found" in str(e):
                print("Gain setting is not found in the control information")
            print("Data will not be processed")
            sequences = []
 ```

 %% Cell type:code id: tags:

 ``` python
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)

 if bias_voltage == 0:
    # Read the bias voltage from files, if recorded.
    # If not available, make use of the historical voltage the detector is running at
    bias_voltage = get_bias_voltage(control_names[0], karabo_id_control)
    bias_voltage = bias_voltage if bias_voltage is not None else 300

 print("Parameters are:")
 print(f"Proposal: {prop}")
 print(f"Memory cells: {mem_cells}/{max_cells}")
 print("Runs: {}".format([v for v in offset_runs.values()]))
 print(f"Sequences: {sequences if sequences else 'All'}")
 print(f"Interlaced mode: {interlaced}")
 print(f"Using DB: {db_output}")
 print(f"Input: {in_folder}")
 print(f"Output: {out_folder}")
 print(f"Bias voltage: {bias_voltage}V")
 print(f"Gain setting: {gain_setting}")
 print(f"Integration time: {integration_time}")
 print(f"Operation mode is {'fixed' if fixed_gain_mode else 'adaptive'} gain mode")
 ```

 %% Cell type:code id: tags:

 ``` python
 if thresholds_offset_hard != [0, 0]:
    # if set, this will override the individual parameters
    thresholds_offset_hard = [thresholds_offset_hard] * 3
 elif fixed_gain_mode:
    thresholds_offset_hard = [
        thresholds_offset_hard_hg_fixed,
        thresholds_offset_hard_mg_fixed,
        thresholds_offset_hard_lg_fixed,
    ]
 else:
    thresholds_offset_hard = [
        thresholds_offset_hard_hg,
        thresholds_offset_hard_mg,
        thresholds_offset_hard_lg,
    ]
 print("Will use the following hard offset thresholds")
 for name, value in zip(("High", "Medium", "Low"), thresholds_offset_hard):
    print(f"- {name} gain: {value}")

 if thresholds_noise_hard != [0, 0]:
    thresholds_noise_hard = [thresholds_noise_hard] * 3
 else:
    thresholds_noise_hard = [
        thresholds_noise_hard_hg,
        thresholds_noise_hard_mg,
        thresholds_noise_hard_lg,
    ]
 ```

 %% Cell type:markdown id: tags:

 The following lines will create a queue of files which will the be executed module-parallel. Distiguishing between different gains.

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 os.makedirs(out_folder, exist_ok=True)
 gain_mapped_files, total_files, total_file_size = map_gain_stages(
    in_folder, offset_runs, path_template, karabo_da, sequences
 )
 print(f"Will process a total of {total_files} files ({total_file_size:.02f} GB).")

 inp = []
 for gain_index, (gain, qm_file_map) in enumerate(gain_mapped_files.items()):
+    gain_input = []
    for module_index in modules:
        qm = module_index_to_qm(module_index)
        if qm not in qm_file_map:
            print(f"Did not find files for {qm}")
            continue
        file_queue = qm_file_map[qm]
        while not file_queue.empty():
            filename = file_queue.get()
            # TODO: remove after using EXtra-data to read files
            # and skip empty trains.
            with h5py.File(filename, "r") as fin:
                if fin[h5path.format(module_index)+"/trainId"].shape[0] != 0:
                    print(f"Process {filename} for {qm}")
-                    inp.append((filename, module_index, gain_index))
+                    gain_input.append((filename, module_index, gain_index))
                else:
                    print(f"Do not process {filename} because it is empty.")
+    if not gain_input:
+        raise ValueError(
+            "No images to process for run: "
+            f"{[v for v in offset_runs.values()][gain_index]}"
+        )
+    inp += gain_input
 ```

 %% Cell type:markdown id: tags:

 ## Calculate Offsets, Noise and Thresholds ##

 The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array.

 %% Cell type:code id: tags:

 ``` python
 # min() only relevant if running on multiple modules (i.e. within notebook)
 parallel_num_procs = min(12, total_files)
 parallel_num_threads = multiprocessing.cpu_count() // parallel_num_procs
 print(f"Will use {parallel_num_procs} processes with {parallel_num_threads} threads each")


 def characterize_module(
    fast_data_filename: str, channel: int, gain_index: int
 ) -> Tuple[np.array, np.array, np.array, np.array, int, np.array, int, float]:
    if max_cells == 0:
        num_cells = get_num_cells(fast_data_filename, karabo_id, channel)
    else:
        num_cells = max_cells

    if acq_rate == 0.:
        slow_paths = control_names[gain_index], karabo_id_control
        fast_paths = fast_data_filename, karabo_id, channel
        local_acq_rate = get_acq_rate(fast_paths, slow_paths)
    else:
        local_acq_rate = acq_rate

    local_thresholds_offset_hard = thresholds_offset_hard[gain_index]
    local_thresholds_noise_hard = thresholds_noise_hard[gain_index]

    h5path_f = h5path.format(channel)
    h5path_idx_f = h5path_idx.format(channel)

    with h5py.File(fast_data_filename, "r") as infile:
        if rawversion == 2:
            count = np.squeeze(infile[f"{h5path_idx_f}/count"])
            first = np.squeeze(infile[f"{h5path_idx_f}/first"])
            last_index = int(first[count != 0][-1]+count[count != 0][-1])
            first_index = int(first[count != 0][0])
        else:
            status = np.squeeze(infile[f"{h5path_idx_f}/status"])
            if np.count_nonzero(status != 0) == 0:
                return
            last = np.squeeze(infile[f"{h5path_idx_f}/last"])
            first = np.squeeze(infile[f"{h5path_idx_f}/first"])
            last_index = int(last[status != 0][-1]) + 1
            first_index = int(first[status != 0][0])
        im = np.array(infile[f"{h5path_f}/data"][first_index:last_index,...])
        cell_ids = np.squeeze(infile[f"{h5path_f}/cellId"][first_index:last_index,...])

    if interlaced:
        if not fixed_gain_mode:
            ga = im[1::2, 0, ...]
        im = im[0::2, 0, ...].astype(np.float32)
        cell_ids = cell_ids[::2]
    else:
        if not fixed_gain_mode:
            ga = im[:, 1, ...]
        im = im[:, 0, ...].astype(np.float32)

    im = np.transpose(im)
    if not fixed_gain_mode:
        ga = np.transpose(ga)

    context = psh.context.ThreadContext(num_workers=parallel_num_threads)
    offset = context.alloc(shape=(im.shape[0], im.shape[1], num_cells), dtype=np.float64)
    noise = context.alloc(like=offset)

    if fixed_gain_mode:
        gains = None
        gains_std = None
    else:
        gains = context.alloc(like=offset)
        gains_std = context.alloc(like=offset)

    def process_cell(worker_id, array_index, cell_number):
        cell_slice_index = (cell_ids == cell_number)
        im_slice = im[..., cell_slice_index]
        offset[..., cell_number] = np.median(im_slice, axis=2)
        noise[..., cell_number] = np.std(im_slice, axis=2)
        if not fixed_gain_mode:
            ga_slice = ga[..., cell_slice_index]
            gains[..., cell_number] = np.median(ga_slice, axis=2)
            gains_std[..., cell_number] = np.std(ga_slice, axis=2)

    context.map(process_cell, np.unique(cell_ids))

    # bad pixels
    bp = np.zeros_like(offset, dtype=np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0,1))
    offset_std = np.nanstd(offset, axis=(0,1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD
    bp[(offset < local_thresholds_offset_hard[0]) |
       (offset > local_thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0,1))
    noise_std = np.nanstd(noise, axis=(0,1))
    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD
    bp[(noise < local_thresholds_noise_hard[0]) | (noise > local_thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR

    return offset, noise, gains, gains_std, bp, num_cells, local_acq_rate
 ```

 %% Cell type:code id: tags:

 ``` python
 with multiprocessing.Pool(processes=parallel_num_procs) as pool:
    results = pool.starmap(characterize_module, inp)
 ```

 %% Cell type:code id: tags:

 ``` python
 offset_g = OrderedDict()
 noise_g = OrderedDict()
 badpix_g = OrderedDict()
 if not fixed_gain_mode:
    gain_g = OrderedDict()
    gainstd_g = OrderedDict()

 all_cells = []
 all_acq_rate = []

 for (_, module_index, gain_index), (offset, noise, gains, gains_std, bp,
                                    thiscell, thisacq) in zip(inp, results):
    all_cells.append(thiscell)
    all_acq_rate.append(thisacq)
    qm = module_index_to_qm(module_index)
    if qm not in offset_g:
        offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2], 3))
        noise_g[qm] = np.zeros_like(offset_g[qm])
        badpix_g[qm] = np.zeros_like(offset_g[qm], np.uint32)
        if not fixed_gain_mode:
            gain_g[qm] = np.zeros_like(offset_g[qm])
            gainstd_g[qm] = np.zeros_like(offset_g[qm])

    offset_g[qm][..., gain_index] = offset
    noise_g[qm][..., gain_index] = noise
    badpix_g[qm][..., gain_index] = bp
    if not fixed_gain_mode:
        gain_g[qm][..., gain_index] = gains
        gainstd_g[qm][..., gain_index] = gains_std


 max_cells = np.max(all_cells)
 print(f"Using {max_cells} memory cells")
 acq_rate = np.max(all_acq_rate)
 print(f"Using {acq_rate} MHz acquisition rate")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Add bad pixels due to bad gain separation
 if not fixed_gain_mode:
    for qm in gain_g.keys():
        for g in range(2):
            # Bad pixels during bad gain separation.
            # Fraction of pixels in the module with separation lower than "thresholds_gain_sigma".
            bad_sep = (gain_g[qm][..., g+1] - gain_g[qm][..., g]) / \
                np.sqrt(gainstd_g[qm][..., g+1]**2 + gainstd_g[qm][..., g]**2)
            badpix_g[qm][...,g+1][bad_sep<thresholds_gain_sigma] |= \
                BadPixels.GAIN_THRESHOLDING_ERROR
 ```

 %% Cell type:markdown id: tags:

 The thresholds for gain switching are then defined as the mean value between in individual gain bit levels. Note that these thresholds need to be refined with charge induced thresholds, as the two are not the same.

 %% Cell type:code id: tags:

 ``` python
 if not fixed_gain_mode:
    thresholds_g = {}
    for qm in gain_g.keys():
        thresholds_g[qm] = np.zeros((gain_g[qm].shape[0], gain_g[qm].shape[1], gain_g[qm].shape[2], 5))
        thresholds_g[qm][...,0] = (gain_g[qm][...,1]+gain_g[qm][...,0])/2
        thresholds_g[qm][...,1] = (gain_g[qm][...,2]+gain_g[qm][...,1])/2
        for i in range(3):
            thresholds_g[qm][...,2+i] = gain_g[qm][...,i]
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()
 for i in modules:
    qm = module_index_to_qm(i)
    res[qm] = {
        'Offset': offset_g[qm],
        'Noise': noise_g[qm],
        'BadPixelsDark': badpix_g[qm]
    }
    if not fixed_gain_mode:
        res[qm]['ThresholdsDark'] = thresholds_g[qm]
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read report path and create file location tuple to add with the injection
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_low, run_med, run_high)

 report = get_report(out_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 # set the operating condition
 # note: iCalibrationDB only adds gain_mode if it is truthy, so we don't need to handle None
 condition = iCalibrationDB.Conditions.Dark.AGIPD(
    memory_cells=max_cells,
    bias_voltage=bias_voltage,
    acquisition_rate=acq_rate,
    gain_setting=gain_setting,
    gain_mode=fixed_gain_mode,
    integration_time=integration_time
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create mapping from module(s) (qm) to karabo_da(s) and PDU(s)
 qm_dict = OrderedDict()
 all_pdus = get_pdu_from_db(
    karabo_id,
    karabo_da,
    constant=iCalibrationDB.CalibrationConstant(),
    condition=condition,
    cal_db_interface=cal_db_interface,
    snapshot_at=creation_time.isoformat(),
    timeout=cal_db_timeout
 )
 for module_index, module_da, module_pdu in zip(modules, karabo_da, all_pdus):
    qm = module_index_to_qm(module_index)
    qm_dict[qm] = {
        "karabo_da": module_da,
        "db_module": module_pdu
    }
 ```

 %% Cell type:code id: tags:

 ``` python
 md = None

 for qm in res:
    db_module = qm_dict[qm]["db_module"]
    for const in res[qm]:
        dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()
        dconst.data = res[qm][const]

        if db_output:
            md = send_to_db(db_module, karabo_id, dconst, condition, file_loc,
                            report, cal_db_interface, creation_time=creation_time,
                            timeout=cal_db_timeout)

        if local_output:
            md = save_const_to_h5(db_module, karabo_id, dconst, condition, dconst.data,
                                  file_loc, report, creation_time, out_folder)
            print(f"Calibration constant {const} for {qm} is stored locally in {file_loc}.\n")

    print("Constants parameter conditions are:\n")
    print(f"• memory_cells: {max_cells}\n• bias_voltage: {bias_voltage}\n"
          f"• acquisition_rate: {acq_rate}\n• gain_setting: {gain_setting}\n"
          f"• gain_mode: {fixed_gain_mode}\n• integration_time: {integration_time}\n"
          f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Start retrieving existing constants for comparison
 qm_x_const = [(qm, const) for const in res[qm] for qm in res]


 def retrieve_old_constant(qm, const):
    dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()

    # This should be used in case of running notebook
    # by a different method other than myMDC which already
    # sends CalCat info.
    # TODO: Set db_module to "" by default in the first cell

    data, mdata = get_from_db(
        karabo_id=karabo_id,
        karabo_da=qm_dict[qm]["karabo_da"],
        constant=dconst,
        condition=condition,
        empty_constant=None,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time-timedelta(seconds=1),
        strategy="pdu_prior_in_time",
        verbosity=1,
        timeout=cal_db_timeout
    )

    if mdata is None or data is None:
        timestamp = "Not found"
        filepath = None
        h5path = None
    else:
        timestamp = mdata.calibration_constant_version.begin_at.isoformat()
        filepath = os.path.join(
            mdata.calibration_constant_version.hdf5path,
            mdata.calibration_constant_version.filename
        )
        h5path = mdata.calibration_constant_version.h5path

    return data, timestamp, filepath, h5path


 old_retrieval_pool = multiprocessing.Pool()
 old_retrieval_res = old_retrieval_pool.starmap_async(
    retrieve_old_constant, qm_x_const
 )
 old_retrieval_pool.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 mnames=[]
 for i in modules:
    qm = module_index_to_qm(i)
    mnames.append(qm)
    display(Markdown(f'## Position of the module {qm} and its ASICs'))
 show_processed_modules(dinstance, constants=None, mnames=mnames, mode="position")
 ```

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:markdown id: tags:

 ### High Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 0
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*offset_runs.values()))
 ```

 %% Cell type:markdown id: tags:

 ### Medium Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 1
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*offset_runs.values()))
 ```

 %% Cell type:markdown id: tags:

 ### Low Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 2
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*offset_runs.values()))
 ```

 %% Cell type:code id: tags:

 ``` python
 if high_res_badpix_3d:
    cols = {
        BadPixels.NOISE_OUT_OF_THRESHOLD: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.GAIN_THRESHOLDING_ERROR: (BadPixels.GAIN_THRESHOLDING_ERROR.name, '#FF40FF40'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD | BadPixels.NOISE_OUT_OF_THRESHOLD: ('OFFSET_OUT_OF_THRESHOLD + NOISE_OUT_OF_THRESHOLD', '#DD00DD80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD | BadPixels.NOISE_OUT_OF_THRESHOLD |
        BadPixels.GAIN_THRESHOLDING_ERROR: ('MIXED', '#BFDF009F')
    }

    display(Markdown("""

    ## Global Bad Pixel Behaviour ##

    The following plots show the results of bad pixel evaluation for all evaluated memory cells.
    Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2.
    This excludes single bad pixels present only in disconnected pixels.
    Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
    Colors encode the bad pixel type, or mixed type.

    """))

    gnames = ['High Gain', 'Medium Gain', 'Low Gain']
    for gain in range(3):
        display(Markdown(f'### {gnames[gain]} ###'))
        for mod, data in badpix_g.items():
            plot_badpix_3d(data[...,gain], cols, title=mod, rebin_fac=1)
            plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Aggregate values, and per Cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior.

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(offset_g, "Offset (ADU)", max_cells, 4000, 8000,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(noise_g, "Noise (ADU)", max_cells, 0, 100,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 if not fixed_gain_mode:
    # Plot only three gain threshold maps.
    bp_thresh = OrderedDict()
    for mod, con in badpix_g.items():
        bp_thresh[mod] = np.zeros((con.shape[0], con.shape[1], con.shape[2], 5), dtype=con.dtype)
        bp_thresh[mod][...,:2] = con[...,:2]
        bp_thresh[mod][...,2:] = con

    create_constant_overview(thresholds_g, "Threshold (ADU)", max_cells, 4000, 10000, 5,
                             badpixels=[bp_thresh, np.nan],
                             gmap=['HG-MG Threshold', 'MG-LG Threshold', 'High gain', 'Medium gain', 'low gain'],
                             marker=['d','d','','','']
                             )
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixel_aggregate_g = OrderedDict()
 for m, d in badpix_g.items():
    bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)
 create_constant_overview(bad_pixel_aggregate_g, "Bad pixel fraction", max_cells, 0, 0.10, 3)
 ```

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 # now we need the old constants
 old_const = {}
 old_mdata = {}
 old_retrieval_res.wait()

 for (qm, const), (data, timestamp, filepath, h5path) in zip(qm_x_const, old_retrieval_res.get()):
    old_const.setdefault(qm, {})[const] = data
    old_mdata.setdefault(qm, {})[const] = {
        "timestamp": timestamp,
        "filepath": filepath,
        "h5path": h5path
    }
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown("The following pre-existing constants are used for comparison:"))
 for qm, consts in old_mdata.items():
    display(Markdown(f"- {qm}"))
    for const in consts:
        display(Markdown(f"    - {const} at {consts[const]['timestamp']}"))
    # saving locations of old constants for summary notebook
    with open(f"{out_folder}/module_metadata_{qm}.yml", "w") as fd:
        yaml.safe_dump(
            {
                "module": qm,
                "pdu": qm_dict[qm]["db_module"],
                "old-constants": old_mdata[qm]
            },
            fd,
        )
 ```

 %% Cell type:code id: tags:

 ``` python
 table = []
 gain_names = ['High', 'Medium', 'Low']
 bits = [BadPixels.NOISE_OUT_OF_THRESHOLD, BadPixels.OFFSET_OUT_OF_THRESHOLD, BadPixels.OFFSET_NOISE_EVAL_ERROR, BadPixels.GAIN_THRESHOLDING_ERROR]
 for qm in badpix_g.keys():
    for gain in range(3):
        l_data = []
        l_data_old = []

        data = np.copy(badpix_g[qm][:,:,:,gain])
        datau32 = data.astype(np.uint32)
        l_data.append(len(datau32[datau32>0].flatten()))
        for bit in bits:
            l_data.append(np.count_nonzero(badpix_g[qm][:,:,:,gain] & bit))

        if old_const[qm]['BadPixelsDark'] is not None:
            dataold = np.copy(old_const[qm]['BadPixelsDark'][:, :, :, gain])
            datau32old = dataold.astype(np.uint32)
            l_data_old.append(len(datau32old[datau32old>0].flatten()))
            for bit in bits:
                l_data_old.append(np.count_nonzero(old_const[qm]['BadPixelsDark'][:, :, :, gain] & bit))

        l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',
                       'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR', 'GAIN_THRESHOLDING_ERROR']

        l_threshold = ['', f'{thresholds_noise_sigma}' f'{thresholds_noise_hard[gain]}',
                       f'{thresholds_offset_sigma}' f'{thresholds_offset_hard[gain]}',
                       '', f'{thresholds_gain_sigma}']

        for i in range(len(l_data)):
            line = [f'{l_data_name[i]}, {gain_names[gain]} gain', l_threshold[i], l_data[i]]

            if old_const[qm]['BadPixelsDark'] is not None:
                line += [l_data_old[i]]
            else:
                line += ['-']

            table.append(line)
        table.append(['', '', '', ''])

 display(Markdown('''
 ### Number of bad pixels

 One pixel can be bad for different reasons, therefore, the sum of all types of bad pixels can be more than the number of all bad pixels.

 '''))
 if len(table)>0:
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                         headers=["Pixel type", "Threshold",
                                                  "New constant", "Old constant"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 if fixed_gain_mode:
    constants = ['Offset', 'Noise']
 else:
    constants = ['Offset', 'Noise', 'ThresholdsDark']

 constants_x_qms = list(itertools.product(constants, res.keys()))


 def compute_table(const, qm):
    if const == 'ThresholdsDark':
        table = [['','HG-MG threshold', 'HG-MG threshold', 'MG-LG threshold', 'MG-LG threshold']]
    else:
        table = [['','High gain', 'High gain', 'Medium gain', 'Medium gain', 'Low gain', 'Low gain']]

    compare_with_old_constant = old_const[qm][const] is not None and \
        old_const[qm]['BadPixelsDark'] is not None

    data = np.copy(res[qm][const])

    if const == 'ThresholdsDark':
        data[...,0][res[qm]['BadPixelsDark'][...,0]>0] = np.nan
        data[...,1][res[qm]['BadPixelsDark'][...,1]>0] = np.nan
    else:
        data[res[qm]['BadPixelsDark']>0] = np.nan

    if compare_with_old_constant:
        data_old = np.copy(old_const[qm][const])
        if const == 'ThresholdsDark':
            data_old[...,0][old_const[qm]['BadPixelsDark'][...,0]>0] = np.nan
            data_old[...,1][old_const[qm]['BadPixelsDark'][...,1]>0] = np.nan
        else:
            data_old[old_const[qm]['BadPixelsDark']>0] = np.nan

    f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
    n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

    def compute_row(i):
        line = [n_list[i]]
        for gain in range(3):
            # Compare only 3 threshold gain-maps
            if gain == 2 and const == 'ThresholdsDark':
                continue
            stat_measure = f_list[i](data[...,gain])
            line.append(f"{stat_measure:6.1f}")
            if compare_with_old_constant:
                old_stat_measure = f_list[i](data_old[...,gain])
                line.append(f"{old_stat_measure:6.1f}")
            else:
                line.append("-")
        return line


    with multiprocessing.pool.ThreadPool(processes=multiprocessing.cpu_count() // len(constants_x_qms)) as pool:
        rows = pool.map(compute_row, range(len(f_list)))

    table.extend(rows)

    return table


 with multiprocessing.Pool(processes=len(constants_x_qms)) as pool:
    tables = pool.starmap(compute_table, constants_x_qms)

 for (const, qm), table in zip(constants_x_qms, tables):
    display(Markdown(f"### {qm}: {const} [ADU], good pixels only"))
    display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
 ```

 %% Cell type:markdown id: tags:

 # AGIPD Characterize Dark Images #

 Author: S. Hauf, Version: 0.1

 The following code analyzes a set of dark images taken with the AGIPD detector to deduce detector offsets , noise, bad-pixel maps and thresholding. All four types of constants are evaluated per-pixel and per-memory cell. Data for the detector's three gain stages needs to be present, separated into separate runs.

 The evaluated calibration constants are stored locally and injected in the calibration data base.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/d/raw/CALLAB/202031/p900113" # path to input data, required
 out_folder = "" # path to output to, required
 sequences = [-1] # sequence files to evaluate.
 modules = [-1]  # list of modules to evaluate, RANGE ALLOWED
 run_high = 9985 # run number in which high gain data was recorded, required
 run_med = 9984 # run number in which medium gain data was recorded, required
 run_low = 9983 # run number in which low gain data was recorded, required
 operation_mode = "ADAPTIVE_GAIN"  # Detector operation mode, optional (defaults to "ADAPTIVE_GAIN")

 karabo_id = "HED_DET_AGIPD500K2G" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = '/INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
 h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information
 karabo_id_control = "HED_EXP_AGIPD500K2G" # karabo-id for control device '
 karabo_da_control = "AGIPD500K2G00" # karabo DA for control infromation

 use_dir_creation_date = True  # use dir creation date as data production reference date
 cal_db_interface = "tcp://max-exfl016:8020" # the database interface to use
 cal_db_timeout = 3000000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database

 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 0 # detector bias voltage
 gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
+integration_time = -1 # integration time, negative values for auto-detection.
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 interlaced = False # assume interlaced data format, for data prior to Dec. 2017
 rawversion = 2 # RAW file format version

 thresholds_offset_sigma = 3. # offset sigma thresholds for offset deduced bad pixels
 thresholds_offset_hard = [0, 0]  # For setting the same threshold offset for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.
 thresholds_offset_hard_hg = [3000, 7000]  # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_mg = [6000, 10000]  # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_lg = [6000, 10000]  # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_hg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_hg, but for fixed gain operation
 thresholds_offset_hard_mg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_mg, but for fixed gain operation
 thresholds_offset_hard_lg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_lg, but for fixed gain operation

 thresholds_noise_sigma = 5. # noise sigma thresholds for offset deduced bad pixels
 thresholds_noise_hard = [0, 0] # For setting the same threshold noise for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.
 thresholds_noise_hard_hg = [4, 20] # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_mg = [4, 20] # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_lg = [4, 20] # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels

 thresholds_gain_sigma = 5. # Gain separation sigma threshold

 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. ~7mins extra time for 64 memory cells
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import multiprocessing
 import os
 from collections import OrderedDict
 from datetime import timedelta
 from typing import Tuple

 import dateutil.parser
 import h5py
 import matplotlib
 import numpy as np
 import pasha as psh
 import tabulate
 import yaml

 matplotlib.use('agg')

 import iCalibrationDB
 import matplotlib.pyplot as plt
 from cal_tools.agipdlib import (
    get_acq_rate,
    get_bias_voltage,
    get_gain_mode,
    get_gain_setting,
    get_integration_time,
    get_num_cells,
 )
 from cal_tools.enums import AgipdGainMode, BadPixels
 from cal_tools.plotting import (
    create_constant_overview,
    plot_badpix_3d,
    show_overview,
    show_processed_modules,
 )
 from cal_tools.tools import (
    get_dir_creation_date,
    get_from_db,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    map_gain_stages,
    module_index_to_qm,
    run_prop_seq_from_path,
    save_const_to_h5,
    send_to_db,
 )
 from IPython.display import Latex, Markdown, display

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 # insert control device if format string (does nothing otherwise)
 h5path_ctrl = h5path_ctrl.format(karabo_id_control)

 max_cells = mem_cells

 offset_runs = OrderedDict()
 offset_runs["high"] = run_high
 offset_runs["med"] = run_med
 offset_runs["low"] = run_low

 creation_time=None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)

 print(f"Using {creation_time} as creation time of constant.")

 run, prop, seq = run_prop_seq_from_path(in_folder)

 cal_db_interface = get_random_db_interface(cal_db_interface)
 print(f'Calibration database interface: {cal_db_interface}')

 instrument = karabo_id.split("_")[0]

 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 if sequences == [-1]:
    sequences = None
 control_names = [f'{in_folder}/r{r:04d}/RAW-R{r:04d}-{karabo_da_control}-S00000.h5'
                 for r in (run_high, run_med, run_low)]

 if operation_mode not in ("ADAPTIVE_GAIN", "FIXED_GAIN"):
    print(f"WARNING: unknown operation_mode \"{operation_mode}\" parameter set")
 run_gain_modes = [get_gain_mode(fn, h5path_ctrl) for fn in control_names]
 if all(gm == AgipdGainMode.ADAPTIVE_GAIN for gm in run_gain_modes):
    fixed_gain_mode = False
    if operation_mode == "FIXED_GAIN":
        print("WARNING: operation_mode parameter is FIXED_GAIN, slow data indicates adaptive gain")
 elif run_gain_modes == [AgipdGainMode.FIXED_HIGH_GAIN, AgipdGainMode.FIXED_MEDIUM_GAIN, AgipdGainMode.FIXED_LOW_GAIN]:
    if operation_mode == "ADAPTIVE_GAIN":
        print("WARNING: operation_mode parameter ix ADAPTIVE_GAIN, slow data indicates fixed gain")
    fixed_gain_mode = True
 else:
    print(f'Something is clearly wrong; slow data indicates gain modes {run_gain_modes}')

 if integration_time < 0:
    integration_times = [get_integration_time(fn, h5path_ctrl) for fn in control_names]
    if len(set(integration_times)) > 1:
        print(f'WARNING: integration time is not constant across the specified dark runs')

 integration_time = integration_times[0]

 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
 runs = [run_high, run_med, run_low]

 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < dateutil.parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            # extract gain setting and validate that all runs have the same setting
            gsettings = []
            for r in runs:
                control_fname = '{}/r{:04d}/RAW-R{:04d}-{}-S00000.h5'.format(in_folder, r, r,
                                                                             karabo_da_control)
                gsettings.append(get_gain_setting(control_fname, h5path_ctrl))
            if not all(g == gsettings[0] for g in gsettings):
                raise ValueError(f"Different gain settings for the 3 input runs {gsettings}")
            gain_setting = gsettings[0]
        except Exception as e:
            print(f'Error while reading gain setting from: \n{control_fname}')
            print(f'Error: {e}')
            if "component not found" in str(e):
                print("Gain setting is not found in the control information")
            print("Data will not be processed")
            sequences = []
 ```

 %% Cell type:code id: tags:

 ``` python
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)

 if bias_voltage == 0:
    # Read the bias voltage from files, if recorded.
    # If not available, make use of the historical voltage the detector is running at
    bias_voltage = get_bias_voltage(control_names[0], karabo_id_control)
    bias_voltage = bias_voltage if bias_voltage is not None else 300

 print("Parameters are:")
 print(f"Proposal: {prop}")
 print(f"Memory cells: {mem_cells}/{max_cells}")
 print("Runs: {}".format([v for v in offset_runs.values()]))
 print(f"Sequences: {sequences if sequences else 'All'}")
 print(f"Interlaced mode: {interlaced}")
 print(f"Using DB: {db_output}")
 print(f"Input: {in_folder}")
 print(f"Output: {out_folder}")
 print(f"Bias voltage: {bias_voltage}V")
 print(f"Gain setting: {gain_setting}")
 print(f"Integration time: {integration_time}")
 print(f"Operation mode is {'fixed' if fixed_gain_mode else 'adaptive'} gain mode")
 ```

 %% Cell type:code id: tags:

 ``` python
 if thresholds_offset_hard != [0, 0]:
    # if set, this will override the individual parameters
    thresholds_offset_hard = [thresholds_offset_hard] * 3
 elif fixed_gain_mode:
    thresholds_offset_hard = [
        thresholds_offset_hard_hg_fixed,
        thresholds_offset_hard_mg_fixed,
        thresholds_offset_hard_lg_fixed,
    ]
 else:
    thresholds_offset_hard = [
        thresholds_offset_hard_hg,
        thresholds_offset_hard_mg,
        thresholds_offset_hard_lg,
    ]
 print("Will use the following hard offset thresholds")
 for name, value in zip(("High", "Medium", "Low"), thresholds_offset_hard):
    print(f"- {name} gain: {value}")

 if thresholds_noise_hard != [0, 0]:
    thresholds_noise_hard = [thresholds_noise_hard] * 3
 else:
    thresholds_noise_hard = [
        thresholds_noise_hard_hg,
        thresholds_noise_hard_mg,
        thresholds_noise_hard_lg,
    ]
 ```

 %% Cell type:markdown id: tags:

 The following lines will create a queue of files which will the be executed module-parallel. Distiguishing between different gains.

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 os.makedirs(out_folder, exist_ok=True)
 gain_mapped_files, total_files, total_file_size = map_gain_stages(
    in_folder, offset_runs, path_template, karabo_da, sequences
 )
 print(f"Will process a total of {total_files} files ({total_file_size:.02f} GB).")

 inp = []
 for gain_index, (gain, qm_file_map) in enumerate(gain_mapped_files.items()):
+    gain_input = []
    for module_index in modules:
        qm = module_index_to_qm(module_index)
        if qm not in qm_file_map:
            print(f"Did not find files for {qm}")
            continue
        file_queue = qm_file_map[qm]
        while not file_queue.empty():
            filename = file_queue.get()
            # TODO: remove after using EXtra-data to read files
            # and skip empty trains.
            with h5py.File(filename, "r") as fin:
                if fin[h5path.format(module_index)+"/trainId"].shape[0] != 0:
                    print(f"Process {filename} for {qm}")
-                    inp.append((filename, module_index, gain_index))
+                    gain_input.append((filename, module_index, gain_index))
                else:
                    print(f"Do not process {filename} because it is empty.")
+    if not gain_input:
+        raise ValueError(
+            "No images to process for run: "
+            f"{[v for v in offset_runs.values()][gain_index]}"
+        )
+    inp += gain_input
 ```

 %% Cell type:markdown id: tags:

 ## Calculate Offsets, Noise and Thresholds ##

 The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array.

 %% Cell type:code id: tags:

 ``` python
 # min() only relevant if running on multiple modules (i.e. within notebook)
 parallel_num_procs = min(12, total_files)
 parallel_num_threads = multiprocessing.cpu_count() // parallel_num_procs
 print(f"Will use {parallel_num_procs} processes with {parallel_num_threads} threads each")


 def characterize_module(
    fast_data_filename: str, channel: int, gain_index: int
 ) -> Tuple[np.array, np.array, np.array, np.array, int, np.array, int, float]:
    if max_cells == 0:
        num_cells = get_num_cells(fast_data_filename, karabo_id, channel)
    else:
        num_cells = max_cells

    if acq_rate == 0.:
        slow_paths = control_names[gain_index], karabo_id_control
        fast_paths = fast_data_filename, karabo_id, channel
        local_acq_rate = get_acq_rate(fast_paths, slow_paths)
    else:
        local_acq_rate = acq_rate

    local_thresholds_offset_hard = thresholds_offset_hard[gain_index]
    local_thresholds_noise_hard = thresholds_noise_hard[gain_index]

    h5path_f = h5path.format(channel)
    h5path_idx_f = h5path_idx.format(channel)

    with h5py.File(fast_data_filename, "r") as infile:
        if rawversion == 2:
            count = np.squeeze(infile[f"{h5path_idx_f}/count"])
            first = np.squeeze(infile[f"{h5path_idx_f}/first"])
            last_index = int(first[count != 0][-1]+count[count != 0][-1])
            first_index = int(first[count != 0][0])
        else:
            status = np.squeeze(infile[f"{h5path_idx_f}/status"])
            if np.count_nonzero(status != 0) == 0:
                return
            last = np.squeeze(infile[f"{h5path_idx_f}/last"])
            first = np.squeeze(infile[f"{h5path_idx_f}/first"])
            last_index = int(last[status != 0][-1]) + 1
            first_index = int(first[status != 0][0])
        im = np.array(infile[f"{h5path_f}/data"][first_index:last_index,...])
        cell_ids = np.squeeze(infile[f"{h5path_f}/cellId"][first_index:last_index,...])

    if interlaced:
        if not fixed_gain_mode:
            ga = im[1::2, 0, ...]
        im = im[0::2, 0, ...].astype(np.float32)
        cell_ids = cell_ids[::2]
    else:
        if not fixed_gain_mode:
            ga = im[:, 1, ...]
        im = im[:, 0, ...].astype(np.float32)

    im = np.transpose(im)
    if not fixed_gain_mode:
        ga = np.transpose(ga)

    context = psh.context.ThreadContext(num_workers=parallel_num_threads)
    offset = context.alloc(shape=(im.shape[0], im.shape[1], num_cells), dtype=np.float64)
    noise = context.alloc(like=offset)

    if fixed_gain_mode:
        gains = None
        gains_std = None
    else:
        gains = context.alloc(like=offset)
        gains_std = context.alloc(like=offset)

    def process_cell(worker_id, array_index, cell_number):
        cell_slice_index = (cell_ids == cell_number)
        im_slice = im[..., cell_slice_index]
        offset[..., cell_number] = np.median(im_slice, axis=2)
        noise[..., cell_number] = np.std(im_slice, axis=2)
        if not fixed_gain_mode:
            ga_slice = ga[..., cell_slice_index]
            gains[..., cell_number] = np.median(ga_slice, axis=2)
            gains_std[..., cell_number] = np.std(ga_slice, axis=2)

    context.map(process_cell, np.unique(cell_ids))

    # bad pixels
    bp = np.zeros_like(offset, dtype=np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0,1))
    offset_std = np.nanstd(offset, axis=(0,1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD
    bp[(offset < local_thresholds_offset_hard[0]) |
       (offset > local_thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0,1))
    noise_std = np.nanstd(noise, axis=(0,1))
    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD
    bp[(noise < local_thresholds_noise_hard[0]) | (noise > local_thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR

    return offset, noise, gains, gains_std, bp, num_cells, local_acq_rate
 ```

 %% Cell type:code id: tags:

 ``` python
 with multiprocessing.Pool(processes=parallel_num_procs) as pool:
    results = pool.starmap(characterize_module, inp)
 ```

 %% Cell type:code id: tags:

 ``` python
 offset_g = OrderedDict()
 noise_g = OrderedDict()
 badpix_g = OrderedDict()
 if not fixed_gain_mode:
    gain_g = OrderedDict()
    gainstd_g = OrderedDict()

 all_cells = []
 all_acq_rate = []

 for (_, module_index, gain_index), (offset, noise, gains, gains_std, bp,
                                    thiscell, thisacq) in zip(inp, results):
    all_cells.append(thiscell)
    all_acq_rate.append(thisacq)
    qm = module_index_to_qm(module_index)
    if qm not in offset_g:
        offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2], 3))
        noise_g[qm] = np.zeros_like(offset_g[qm])
        badpix_g[qm] = np.zeros_like(offset_g[qm], np.uint32)
        if not fixed_gain_mode:
            gain_g[qm] = np.zeros_like(offset_g[qm])
            gainstd_g[qm] = np.zeros_like(offset_g[qm])

    offset_g[qm][..., gain_index] = offset
    noise_g[qm][..., gain_index] = noise
    badpix_g[qm][..., gain_index] = bp
    if not fixed_gain_mode:
        gain_g[qm][..., gain_index] = gains
        gainstd_g[qm][..., gain_index] = gains_std


 max_cells = np.max(all_cells)
 print(f"Using {max_cells} memory cells")
 acq_rate = np.max(all_acq_rate)
 print(f"Using {acq_rate} MHz acquisition rate")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Add bad pixels due to bad gain separation
 if not fixed_gain_mode:
    for qm in gain_g.keys():
        for g in range(2):
            # Bad pixels during bad gain separation.
            # Fraction of pixels in the module with separation lower than "thresholds_gain_sigma".
            bad_sep = (gain_g[qm][..., g+1] - gain_g[qm][..., g]) / \
                np.sqrt(gainstd_g[qm][..., g+1]**2 + gainstd_g[qm][..., g]**2)
            badpix_g[qm][...,g+1][bad_sep<thresholds_gain_sigma] |= \
                BadPixels.GAIN_THRESHOLDING_ERROR
 ```

 %% Cell type:markdown id: tags:

 The thresholds for gain switching are then defined as the mean value between in individual gain bit levels. Note that these thresholds need to be refined with charge induced thresholds, as the two are not the same.

 %% Cell type:code id: tags:

 ``` python
 if not fixed_gain_mode:
    thresholds_g = {}
    for qm in gain_g.keys():
        thresholds_g[qm] = np.zeros((gain_g[qm].shape[0], gain_g[qm].shape[1], gain_g[qm].shape[2], 5))
        thresholds_g[qm][...,0] = (gain_g[qm][...,1]+gain_g[qm][...,0])/2
        thresholds_g[qm][...,1] = (gain_g[qm][...,2]+gain_g[qm][...,1])/2
        for i in range(3):
            thresholds_g[qm][...,2+i] = gain_g[qm][...,i]
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()
 for i in modules:
    qm = module_index_to_qm(i)
    res[qm] = {
        'Offset': offset_g[qm],
        'Noise': noise_g[qm],
        'BadPixelsDark': badpix_g[qm]
    }
    if not fixed_gain_mode:
        res[qm]['ThresholdsDark'] = thresholds_g[qm]
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read report path and create file location tuple to add with the injection
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_low, run_med, run_high)

 report = get_report(out_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 # set the operating condition
 # note: iCalibrationDB only adds gain_mode if it is truthy, so we don't need to handle None
 condition = iCalibrationDB.Conditions.Dark.AGIPD(
    memory_cells=max_cells,
    bias_voltage=bias_voltage,
    acquisition_rate=acq_rate,
    gain_setting=gain_setting,
    gain_mode=fixed_gain_mode,
    integration_time=integration_time
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create mapping from module(s) (qm) to karabo_da(s) and PDU(s)
 qm_dict = OrderedDict()
 all_pdus = get_pdu_from_db(
    karabo_id,
    karabo_da,
    constant=iCalibrationDB.CalibrationConstant(),
    condition=condition,
    cal_db_interface=cal_db_interface,
    snapshot_at=creation_time.isoformat(),
    timeout=cal_db_timeout
 )
 for module_index, module_da, module_pdu in zip(modules, karabo_da, all_pdus):
    qm = module_index_to_qm(module_index)
    qm_dict[qm] = {
        "karabo_da": module_da,
        "db_module": module_pdu
    }
 ```

 %% Cell type:code id: tags:

 ``` python
 md = None

 for qm in res:
    db_module = qm_dict[qm]["db_module"]
    for const in res[qm]:
        dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()
        dconst.data = res[qm][const]

        if db_output:
            md = send_to_db(db_module, karabo_id, dconst, condition, file_loc,
                            report, cal_db_interface, creation_time=creation_time,
                            timeout=cal_db_timeout)

        if local_output:
            md = save_const_to_h5(db_module, karabo_id, dconst, condition, dconst.data,
                                  file_loc, report, creation_time, out_folder)
            print(f"Calibration constant {const} for {qm} is stored locally in {file_loc}.\n")

    print("Constants parameter conditions are:\n")
    print(f"• memory_cells: {max_cells}\n• bias_voltage: {bias_voltage}\n"
          f"• acquisition_rate: {acq_rate}\n• gain_setting: {gain_setting}\n"
          f"• gain_mode: {fixed_gain_mode}\n• integration_time: {integration_time}\n"
          f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Start retrieving existing constants for comparison
 qm_x_const = [(qm, const) for const in res[qm] for qm in res]


 def retrieve_old_constant(qm, const):
    dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()

    # This should be used in case of running notebook
    # by a different method other than myMDC which already
    # sends CalCat info.
    # TODO: Set db_module to "" by default in the first cell

    data, mdata = get_from_db(
        karabo_id=karabo_id,
        karabo_da=qm_dict[qm]["karabo_da"],
        constant=dconst,
        condition=condition,
        empty_constant=None,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time-timedelta(seconds=1),
        strategy="pdu_prior_in_time",
        verbosity=1,
        timeout=cal_db_timeout
    )

    if mdata is None or data is None:
        timestamp = "Not found"
        filepath = None
        h5path = None
    else:
        timestamp = mdata.calibration_constant_version.begin_at.isoformat()
        filepath = os.path.join(
            mdata.calibration_constant_version.hdf5path,
            mdata.calibration_constant_version.filename
        )
        h5path = mdata.calibration_constant_version.h5path

    return data, timestamp, filepath, h5path


 old_retrieval_pool = multiprocessing.Pool()
 old_retrieval_res = old_retrieval_pool.starmap_async(
    retrieve_old_constant, qm_x_const
 )
 old_retrieval_pool.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 mnames=[]
 for i in modules:
    qm = module_index_to_qm(i)
    mnames.append(qm)
    display(Markdown(f'## Position of the module {qm} and its ASICs'))
 show_processed_modules(dinstance, constants=None, mnames=mnames, mode="position")
 ```

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:markdown id: tags:

 ### High Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 0
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*offset_runs.values()))
 ```

 %% Cell type:markdown id: tags:

 ### Medium Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 1
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*offset_runs.values()))
 ```

 %% Cell type:markdown id: tags:

 ### Low Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 2
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*offset_runs.values()))
 ```

 %% Cell type:code id: tags:

 ``` python
 if high_res_badpix_3d:
    cols = {
        BadPixels.NOISE_OUT_OF_THRESHOLD: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.GAIN_THRESHOLDING_ERROR: (BadPixels.GAIN_THRESHOLDING_ERROR.name, '#FF40FF40'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD | BadPixels.NOISE_OUT_OF_THRESHOLD: ('OFFSET_OUT_OF_THRESHOLD + NOISE_OUT_OF_THRESHOLD', '#DD00DD80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD | BadPixels.NOISE_OUT_OF_THRESHOLD |
        BadPixels.GAIN_THRESHOLDING_ERROR: ('MIXED', '#BFDF009F')
    }

    display(Markdown("""

    ## Global Bad Pixel Behaviour ##

    The following plots show the results of bad pixel evaluation for all evaluated memory cells.
    Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2.
    This excludes single bad pixels present only in disconnected pixels.
    Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
    Colors encode the bad pixel type, or mixed type.

    """))

    gnames = ['High Gain', 'Medium Gain', 'Low Gain']
    for gain in range(3):
        display(Markdown(f'### {gnames[gain]} ###'))
        for mod, data in badpix_g.items():
            plot_badpix_3d(data[...,gain], cols, title=mod, rebin_fac=1)
            plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Aggregate values, and per Cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior.

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(offset_g, "Offset (ADU)", max_cells, 4000, 8000,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(noise_g, "Noise (ADU)", max_cells, 0, 100,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 if not fixed_gain_mode:
    # Plot only three gain threshold maps.
    bp_thresh = OrderedDict()
    for mod, con in badpix_g.items():
        bp_thresh[mod] = np.zeros((con.shape[0], con.shape[1], con.shape[2], 5), dtype=con.dtype)
        bp_thresh[mod][...,:2] = con[...,:2]
        bp_thresh[mod][...,2:] = con

    create_constant_overview(thresholds_g, "Threshold (ADU)", max_cells, 4000, 10000, 5,
                             badpixels=[bp_thresh, np.nan],
                             gmap=['HG-MG Threshold', 'MG-LG Threshold', 'High gain', 'Medium gain', 'low gain'],
                             marker=['d','d','','','']
                             )
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixel_aggregate_g = OrderedDict()
 for m, d in badpix_g.items():
    bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)
 create_constant_overview(bad_pixel_aggregate_g, "Bad pixel fraction", max_cells, 0, 0.10, 3)
 ```

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 # now we need the old constants
 old_const = {}
 old_mdata = {}
 old_retrieval_res.wait()

 for (qm, const), (data, timestamp, filepath, h5path) in zip(qm_x_const, old_retrieval_res.get()):
    old_const.setdefault(qm, {})[const] = data
    old_mdata.setdefault(qm, {})[const] = {
        "timestamp": timestamp,
        "filepath": filepath,
        "h5path": h5path
    }
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown("The following pre-existing constants are used for comparison:"))
 for qm, consts in old_mdata.items():
    display(Markdown(f"- {qm}"))
    for const in consts:
        display(Markdown(f"    - {const} at {consts[const]['timestamp']}"))
    # saving locations of old constants for summary notebook
    with open(f"{out_folder}/module_metadata_{qm}.yml", "w") as fd:
        yaml.safe_dump(
            {
                "module": qm,
                "pdu": qm_dict[qm]["db_module"],
                "old-constants": old_mdata[qm]
            },
            fd,
        )
 ```

 %% Cell type:code id: tags:

 ``` python
 table = []
 gain_names = ['High', 'Medium', 'Low']
 bits = [BadPixels.NOISE_OUT_OF_THRESHOLD, BadPixels.OFFSET_OUT_OF_THRESHOLD, BadPixels.OFFSET_NOISE_EVAL_ERROR, BadPixels.GAIN_THRESHOLDING_ERROR]
 for qm in badpix_g.keys():
    for gain in range(3):
        l_data = []
        l_data_old = []

        data = np.copy(badpix_g[qm][:,:,:,gain])
        datau32 = data.astype(np.uint32)
        l_data.append(len(datau32[datau32>0].flatten()))
        for bit in bits:
            l_data.append(np.count_nonzero(badpix_g[qm][:,:,:,gain] & bit))

        if old_const[qm]['BadPixelsDark'] is not None:
            dataold = np.copy(old_const[qm]['BadPixelsDark'][:, :, :, gain])
            datau32old = dataold.astype(np.uint32)
            l_data_old.append(len(datau32old[datau32old>0].flatten()))
            for bit in bits:
                l_data_old.append(np.count_nonzero(old_const[qm]['BadPixelsDark'][:, :, :, gain] & bit))

        l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',
                       'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR', 'GAIN_THRESHOLDING_ERROR']

        l_threshold = ['', f'{thresholds_noise_sigma}' f'{thresholds_noise_hard[gain]}',
                       f'{thresholds_offset_sigma}' f'{thresholds_offset_hard[gain]}',
                       '', f'{thresholds_gain_sigma}']

        for i in range(len(l_data)):
            line = [f'{l_data_name[i]}, {gain_names[gain]} gain', l_threshold[i], l_data[i]]

            if old_const[qm]['BadPixelsDark'] is not None:
                line += [l_data_old[i]]
            else:
                line += ['-']

            table.append(line)
        table.append(['', '', '', ''])

 display(Markdown('''
 ### Number of bad pixels

 One pixel can be bad for different reasons, therefore, the sum of all types of bad pixels can be more than the number of all bad pixels.

 '''))
 if len(table)>0:
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                         headers=["Pixel type", "Threshold",
                                                  "New constant", "Old constant"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 if fixed_gain_mode:
    constants = ['Offset', 'Noise']
 else:
    constants = ['Offset', 'Noise', 'ThresholdsDark']

 constants_x_qms = list(itertools.product(constants, res.keys()))


 def compute_table(const, qm):
    if const == 'ThresholdsDark':
        table = [['','HG-MG threshold', 'HG-MG threshold', 'MG-LG threshold', 'MG-LG threshold']]
    else:
        table = [['','High gain', 'High gain', 'Medium gain', 'Medium gain', 'Low gain', 'Low gain']]

    compare_with_old_constant = old_const[qm][const] is not None and \
        old_const[qm]['BadPixelsDark'] is not None

    data = np.copy(res[qm][const])

    if const == 'ThresholdsDark':
        data[...,0][res[qm]['BadPixelsDark'][...,0]>0] = np.nan
        data[...,1][res[qm]['BadPixelsDark'][...,1]>0] = np.nan
    else:
        data[res[qm]['BadPixelsDark']>0] = np.nan

    if compare_with_old_constant:
        data_old = np.copy(old_const[qm][const])
        if const == 'ThresholdsDark':
            data_old[...,0][old_const[qm]['BadPixelsDark'][...,0]>0] = np.nan
            data_old[...,1][old_const[qm]['BadPixelsDark'][...,1]>0] = np.nan
        else:
            data_old[old_const[qm]['BadPixelsDark']>0] = np.nan

    f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
    n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

    def compute_row(i):
        line = [n_list[i]]
        for gain in range(3):
            # Compare only 3 threshold gain-maps
            if gain == 2 and const == 'ThresholdsDark':
                continue
            stat_measure = f_list[i](data[...,gain])
            line.append(f"{stat_measure:6.1f}")
            if compare_with_old_constant:
                old_stat_measure = f_list[i](data_old[...,gain])
                line.append(f"{old_stat_measure:6.1f}")
            else:
                line.append("-")
        return line


    with multiprocessing.pool.ThreadPool(processes=multiprocessing.cpu_count() // len(constants_x_qms)) as pool:
        rows = pool.map(compute_row, range(len(f_list)))

    table.extend(rows)

    return table


 with multiprocessing.Pool(processes=len(constants_x_qms)) as pool:
    tables = pool.starmap(compute_table, constants_x_qms)

 for (const, qm), table in zip(constants_x_qms, tables):
    display(Markdown(f"### {qm}: {const} [ADU], good pixels only"))
    display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
 ```

--- a/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_NBC.ipynb
+++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Gain Characterization #


 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202030/p900138/scratch/karnem/r0203_r0204_v01/" # the folder to read histograms from, required
-out_folder = "/gpfs/exfel/exp/SPB/202030/p900138/scratch/karnem/r0203_r0204_v01/"  # the folder to output to, required
+out_folder = ""  # the folder to output to, required
 hist_file_template = "hists_m{:02d}_sum.h5" # the template to use to access histograms
 modules = [10] # modules to correct, set to -1 for all, range allowed

 raw_folder = "/gpfs/exfel/exp/MID/202030/p900137/raw" # Path to raw image data used to create histograms
 proc_folder = "" # Path to corrected image data used to create histograms

 run = 449 # of the run of image data used to create histograms

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information
 karabo_id_control = "MID_IRU_AGIPD1M1" # karabo-id for control device
 karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milli seconds
 local_output = True # output constants locally
 db_output = False # output constants to database

 # Fit parameters
 peak_range = [-30, 30, 35, 70, 95, 135, 145, 220] # where to look for the peaks, [a0, b0, a1, b1, ...] exactly 8 elements
 peak_width_range = [0, 30, 0, 35, 0, 40, 0, 45] # fit limits on the peak widths, [a0, b0, a1, b1, ...] exactly 8 elements
 peak_norm_range = [0.0, -1, 0, -1, 0, -1, 0, -1] #

 # Bad-pixel thresholds (gain evaluation error). Contribute to BadPixel bit "Gain_Evaluation_Error"
 peak_lim = [-30, 30] # Limit of position of noise peak
 d0_lim = [10, 80] # hard limits for distance between noise and first peak
 peak_width_lim = [0.9, 1.55, 0.95, 1.65] # hard limits on the peak widths for first and second peak, in units of the noise peak. 4 parameters.
 chi2_lim = [0, 3.0] # Hard limit on chi2/nDOF value

 intensity_lim = 15 # Threshold on standard deviation of a histogram in ADU. Contribute to BadPixel bit "No_Entry"
 gain_lim = [0.8, 1.2] # Threshold on gain in relative number. Contribute to BadPixel bit "Gain_deviation"

 cell_range = [1, 3] # range of cell to be considered, [0,0] for all
 pixel_range = [0, 0, 32, 32] # range of pixels x1,y1,x2,y2 to consider [0,0,512,128] for all
 max_bins = 0 # Maximum number of bins to consider, 0 for all bins
 batch_size = [1, 8, 8] # batch size: [cell,x,y]
 fit_range = [0, 0] # range of a histogram considered for fitting in ADU. Dynamically evaluated in case [0,0]
 n_peaks_fit = 4 # Number of gaussian peaks to fit including noise peak
 fix_peaks = False # Fix distance between photon peaks
 do_minos = False # This is additional feature of minuit to evaluate errors.
 sigma_limit = 0. # If >0, repeat fit keeping only bins within mu +- sigma_limit*sigma

 # Detector conditions
 max_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300  # Bias voltage
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
 photon_energy = 8.05 # photon energy in keV
 ```

 %% Cell type:code id: tags:

 ``` python
 import glob
 import os
 import traceback
 import warnings
 from multiprocessing import Pool

 import h5py
 import matplotlib.pyplot as plt
 import numpy as np
 import sharedmem
 import XFELDetAna.xfelpyanatools as xana
 from cal_tools.agipdlib import get_bias_voltage
 from cal_tools.agipdutils_ff import (
    any_in,
    fit_n_peaks,
    gaussian,
    gaussian_sum,
    get_mask,
    get_starting_parameters,
    set_par_limits,
 )
 from cal_tools.ana_tools import get_range, save_dict_to_hdf5
 from cal_tools.enums import BadPixelsFF
 from iminuit import Minuit
 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.simpleplot import simplePlot

 # %load_ext autotime
 %matplotlib inline
 warnings.filterwarnings('ignore')
 ```

 %% Cell type:code id: tags:

 ``` python
 peak_range = np.reshape(peak_range,(4,2))
 peak_width_range = np.reshape(peak_width_range,(4,2))
 peak_width_lim = np.reshape(peak_width_lim,(2,2))
 peak_norm_range = [None if x == -1 else x for x in peak_norm_range]
 peak_norm_range = np.reshape(peak_norm_range,(4,2))
 module = modules[0]
 ```

 %% Cell type:code id: tags:

 ``` python
 # This is never used in this notebook and should be removed

 # if bias_voltage == 0:
 #     # Read the bias voltage from files, if recorded.
 #     # If not available, make use of the historical voltage the detector is running at
 #     control_filename = f'{raw_folder}/r{run:04d}/RAW-R{run:04d}-{karabo_da_control}-S00000.h5'
 #     bias_voltage = get_bias_voltage(control_filename, karabo_id_control)
 #     bias_voltage = bias_voltage if bias_voltage is not None else 300
 # print(f"Bias voltage: {bias_voltage}V")
 ```

 %% Cell type:code id: tags:

 ``` python
 def idx_gen(batch_start, batch_size):
    """
    This generator iterate across pixels and memory cells starting
    from batch_start until batch_start+batch_size
    """
    for c_idx in range(batch_start[0], batch_start[0]+batch_size[0]):
        for x_idx in range(batch_start[1], batch_start[1]+batch_size[1]):
            for y_idx in range(batch_start[2], batch_start[2]+batch_size[2]):
                yield(c_idx, x_idx, y_idx)
 ```

 %% Cell type:code id: tags:

 ``` python
 n_pixels_x = pixel_range[2]-pixel_range[0]
 n_pixels_y = pixel_range[3]-pixel_range[1]

 hist_data = {}
 with h5py.File(f"{in_folder}/{hist_file_template.format(module)}", 'r') as hf:
    hist_data['cellId'] = np.array(hf['cellId'][()])
    hist_data['hRange'] = np.array(hf['hRange'][()])
    hist_data['nBins'] = np.array(hf['nBins'][()])

    if cell_range == [0,0]:
        cell_range[1] = hist_data['cellId'].shape[0]

    if max_bins == 0:
        max_bins = hist_data['nBins']

    hist_data['cellId'] = hist_data['cellId'][cell_range[0]:cell_range[1]]
    hist_data['hist'] = np.array(hf['hist'][cell_range[0]:cell_range[1], :max_bins, :])

 n_cells = cell_range[1]-cell_range[0]
 hist_data['hist'] = hist_data['hist'].reshape(n_cells, max_bins, 512, 128)
 hist_data['hist'] = hist_data['hist'][:,:,pixel_range[0]:pixel_range[2],pixel_range[1]:pixel_range[3]]

 print(f'Data shape {hist_data["hist"].shape}')

 bin_edges = np.linspace(hist_data['hRange'][0], hist_data['hRange'][1], int(hist_data['nBins']+1))
 x = (bin_edges[1:] + bin_edges[:-1])[:max_bins] * 0.5


 batches = []
 for c_idx in range(0, n_cells, batch_size[0]):
    for x_idx in range(0, n_pixels_x, batch_size[1]):
        for y_idx in range(0, n_pixels_y, batch_size[2]):
            batches.append([c_idx,x_idx,y_idx])

 print(f'Number of batches {len(batches)}')
 ```

 %% Cell type:code id: tags:

 ``` python
 def fit_batch(batch_start):
    current_result = {}
    prev = None
    for c_idx, x_idx, y_idx in idx_gen(batch_start, batch_size):
        try:
            y = hist_data['hist'][c_idx, :, x_idx, y_idx]

            if prev is None:
                prev, _ = get_starting_parameters(x, y, peak_range, n_peaks=n_peaks_fit)

            if fit_range == [0, 0]:
                frange = (prev[f'g0mean']-2*prev[f'g0sigma'],
                          prev[f'g{n_peaks_fit-1}mean'] + prev[f'g{n_peaks_fit-1}sigma'])
            else:
                frange = fit_range

            set_par_limits(prev, peak_range, peak_norm_range,
                           peak_width_range, n_peaks_fit)
            minuit = fit_n_peaks(x, y, prev, frange,
                                 do_minos=do_minos, n_peaks=n_peaks_fit,
                                 fix_d01=fix_peaks, sigma_limit=sigma_limit,)

            ndof = np.rint(frange[1]-frange[0])-len(minuit.args) ## FIXME: this line is wrong if fix_peaks is True
            current_result['chi2_ndof'] = minuit.fval/ndof
            res = minuit.fitarg
            if fix_peaks : ## set g2 and g3 mean correctly
                for i in range(2,n_peaks_fit):
                    d = res[f'g1mean'] - res[f'g0mean']
                    res[f'g{i}mean'] = res[f'g0mean'] + d*i
            current_result.update(res)
            current_result.update(minuit.get_fmin())

            fit_result['chi2_ndof'][c_idx, x_idx, y_idx] = current_result['chi2_ndof']

            for key in res.keys():
                if key in fit_result:
                    fit_result[key][c_idx, x_idx, y_idx] = res[key]

            fit_result['mask'][c_idx, x_idx, y_idx] = get_mask(current_result,
                                                                    peak_lim,
                                                                    d0_lim, chi2_lim,
                                                                    peak_width_lim)
        except Exception as e:
            fit_result['mask'][c_idx, x_idx,
                                    y_idx] = BadPixelsFF.FIT_FAILED.value
            print(c_idx, x_idx, y_idx, e, traceback.format_exc())

        if fit_result['mask'][c_idx, x_idx, y_idx] == 0:
            prev = res
        else:
            prev = None
 ```

 %% Cell type:markdown id: tags:

 # Single fit ##

 Left plot shows starting parameters for fitting. Right plot shows result of the fit. Errors are evaluated with minos.

 %% Cell type:code id: tags:

 ``` python
 hist = hist_data['hist'][1,:,1, 1]
 prev, shapes = get_starting_parameters(x, hist, peak_range, n_peaks=n_peaks_fit)

 if fit_range == [0, 0]:
    frange = (prev[f'g0mean']-2*prev[f'g0sigma'],
              prev[f'g3mean'] + prev[f'g3sigma'])
 else:
    frange = fit_range

 set_par_limits(prev, peak_range, peak_norm_range,
               peak_width_range, n_peaks=n_peaks_fit)
 minuit = fit_n_peaks(x, hist, prev, frange,
                     do_minos=True, n_peaks=n_peaks_fit,
                     fix_d01=fix_peaks,
                     sigma_limit=sigma_limit,
                    )
 print (minuit.get_fmin())
 minuit.print_matrix()
 print(minuit.get_param_states())
 ```

 %% Cell type:code id: tags:

 ``` python
 res = minuit.fitarg
 if fix_peaks :
    for i in range(2,n_peaks_fit):
        d = res[f'g1mean'] - res[f'g0mean']
        res[f'g{i}mean'] = res[f'g0mean'] + d*i
 err = minuit.errors
 p = minuit.args
 ya = np.arange(0,1e4)
 y = gaussian_sum(x,n_peaks_fit, *p)
 peak_colors = ['g', 'y', 'b', 'orange']

 peak_hist = hist.copy()
 d=[]
 if sigma_limit > 0 :
    sel2 = (np.abs(x - res['g0mean']) < sigma_limit*res['g0sigma']) | \
           (np.abs(x - res['g1mean']) < sigma_limit*res['g1sigma']) | \
           (np.abs(x - res['g2mean']) < sigma_limit*res['g2sigma']) | \
           (np.abs(x - res['g3mean']) < sigma_limit*res['g3sigma'])
    peak_hist[~sel2] = 0
    valley_hist = hist.copy()
    valley_hist[sel2] = 0
    d.append({'x': x,
              'y': valley_hist.astype(np.float64),
              'y_err': np.sqrt(valley_hist),
              'drawstyle': 'bars',
              'errorstyle': 'bars',
              'transparency': '95%',
              'errorcoarsing': 3,
              'label': f'X-ray Data)'
             })
    htitle = f'X-ray Data, (μ±{sigma_limit:0.1f}σ)'
 else :
    htitle = 'X-ray Data'

 d.append({'x': x,
          'y': peak_hist.astype(np.float64),
          'y_err': np.sqrt(peak_hist),
          'drawstyle': 'bars',
          'errorstyle': 'bars',
          'errorcoarsing': 3,
          'label': htitle,
         }
        )
 d.append({'x': x,
          'y': y,
          'y2': (hist-y)/np.sqrt(hist),
          'drawstyle':'line',
          'drawstyle2': 'steps-mid',
          'label': 'Fit'
         }
        )

 for i in range(n_peaks_fit):
    d.append({'x': x,
             'y': gaussian(x, res[f'g{i}n'], res[f'g{i}mean'], res[f'g{i}sigma']),
             'drawstyle':'line',
             'color': peak_colors[i],
             })
    d.append({'x': np.full_like(ya, res[f'g{i}mean']),
              'y': ya,
              'drawstyle': 'line',
              'linestyle': 'dashed',
              'color': peak_colors[i],
              'label': f'peak {i} = {res[f"g{i}mean"]:0.1f} $ \pm $ {err[f"g{i}mean"]:0.2f} ADU' })
 ```

 %% Cell type:code id: tags:

 ``` python
-fig = plt.figure(figsize=(16,7))
-ax = fig.add_subplot(121)
+fig, (ax1, ax2) = plt.subplots(1, 2)
+fig.set_size_inches(16, 7)
 for i, shape in enumerate(shapes):
    idx = shape[3]
-    plt.errorbar(x[idx], hist[idx], np.sqrt(hist[idx]),
-         marker='+', ls=''
-         )
+    ax1.errorbar(
+        x[idx], hist[idx],
+        np.sqrt(hist[idx]),
+        marker='+', ls='',
+    )
    yg = gaussian(x[idx], *shape[:3])
    l = f'Peak {i}: {shape[1]:0.1f} $ \pm $ {shape[2]:0.2f} ADU'
-    plt.plot(x[idx], yg, label=l)
-plt.grid(True)
-plt.xlabel("Signal [ADU]")
-plt.ylabel("Counts")
-plt.legend(ncol=2)
-
-
-ax2 = fig.add_subplot(122)
-fig2 = xana.simplePlot(d,
-                       use_axis=ax2,
-                       x_label='Signal [ADU]',
-                       y_label='Counts',
-                       secondpanel=True, y_log=False,
-                       x_range=(frange[0], frange[1]),
-                       y_range=(1., np.max(hist)*1.6),
-                       legend='top-left-frame-ncol2')
+    ax1.plot(x[idx], yg, label=l)
+ax1.grid(True)
+ax1.set_xlabel("Signal [ADU]")
+ax1.set_ylabel("Counts")
+ax1.legend(ncol=2)
+
+_ = xana.simplePlot(
+    d,
+    use_axis=ax2,
+    x_label='Signal [ADU]',
+    y_label='Counts',
+    secondpanel=True, y_log=False,
+    x_range=(frange[0], frange[1]),
+    y_range=(1., np.max(hist)*1.6),
+    legend='top-left-frame-ncol2',
+)
+
+plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## All fits ##

 %% Cell type:code id: tags:

 ``` python
 # Allocate memory for fit results
 fit_result = {}
 keys = list(minuit.fitarg.keys())
 keys = [x for x in keys if 'limit_' not in x and 'fix_' not in x]
 keys += ['chi2_ndof', 'mask', 'gain']
 for key in keys:
    dtype = 'f4'
    if key == 'mask':
        dtype = 'i4'
    fit_result[key] = sharedmem.empty([n_cells, n_pixels_x, n_pixels_y], dtype=dtype)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Perform fitting
 with Pool() as pool:
    const_out = pool.map(fit_batch, batches)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate bad pixels
 fit_result['gain'] = (fit_result['g1mean'] - fit_result['g0mean'])/photon_energy

 # Calculate histogram width and evaluate cut
 h_sums = np.sum(hist_data['hist'], axis=1)
 hist_norm = hist_data['hist'] / h_sums[:, None, :, :]
 hist_mean = np.sum(hist_norm[:, :max_bins, ...] *
                   x[None, :, None, None], axis=1)
 hist_sqr = (x[None, :, None, None] - hist_mean[:, None, ...])**2
 hist_std = np.sqrt(np.sum(hist_norm[:, :max_bins, ...] * hist_sqr, axis=1))

 fit_result['mask'][hist_std<intensity_lim] |= BadPixelsFF.NO_ENTRY.value

 # Bad pixel on gain deviation
 gains = np.copy(fit_result['gain'])
 gains[fit_result['mask']>0] = np.nan
 gain_mean = np.nanmean(gains, axis=(1,2))

 fit_result['mask'][fit_result['gain'] > gain_mean[:,None,None]*gain_lim[1] ] |=  BadPixelsFF.GAIN_DEVIATION.value
 fit_result['mask'][fit_result['gain'] < gain_mean[:,None,None]*gain_lim[0] ] |=  BadPixelsFF.GAIN_DEVIATION.value
 ```

 %% Cell type:code id: tags:

 ``` python
 # Save fit results
 os.makedirs(out_folder, exist_ok=True)
 out_name = f'{out_folder}/fits_m{module:02d}.h5'
 print(f'Save to file: {out_name}')
 save_dict_to_hdf5({'data': fit_result}, out_name)
 ```

 %% Cell type:markdown id: tags:

 ## Summary across cells ##

 %% Cell type:code id: tags:

 ``` python
-labels = ['Noise peak [ADU]',
-         'First photon peak [ADU]',
-         f"gain [ADU/keV], $\gamma$={photon_energy} [keV]",
-         "$\chi^2$/nDOF",
-         'Fraction of bad pixels']
-for i, key in enumerate(['g0mean', 'g1mean', 'gain', 'chi2_ndof', 'mask']):
+labels = [
+    "Noise peak [ADU]",
+    "First photon peak [ADU]",
+    f"gain [ADU/keV] $\gamma$={photon_energy} [keV]",
+    "$\chi^2$/nDOF",
+    "Fraction of bad pixels",
+]

+for i, key in enumerate(['g0mean', 'g1mean', 'gain', 'chi2_ndof', 'mask']):
    fig = plt.figure(figsize=(20,5))
    ax = fig.add_subplot(121)
    data = fit_result[key]
    if key == 'mask':
-        data = data>0
+        data = data > 0
        vmin, vmax = [0, 1]
    else:
        vmin, vmax = get_range(data, 5)
-    _ = heatmapPlot(np.mean(data, axis=0).T,
-                    add_panels=False, cmap='viridis', use_axis=ax,
-                    vmin=vmin, vmax=vmax, lut_label=labels[i] )
+    _ = heatmapPlot(
+        np.mean(data, axis=0).T,
+        add_panels=False, cmap='viridis', use_axis=ax,
+        vmin=vmin, vmax=vmax, lut_label=labels[i]
+    )

    if key != 'mask':
        vmin, vmax = get_range(data, 7)
-        ax1 = fig.add_subplot(122)
-        _ = xana.histPlot(ax1,data.flatten(),
-                      bins=45,range=[vmin, vmax],
-                      log=True,color='red',histtype='stepfilled')
-        plt.xlabel(labels[i])
-        plt.ylabel("Counts")
+        ax = fig.add_subplot(122)
+        _ = xana.histPlot(
+            ax, data.flatten(),
+            bins=45,range=[vmin, vmax],
+            log=True,color='red',histtype='stepfilled'
+        )
+        ax.set_xlabel(labels[i])
+        ax.set_ylabel("Counts")
 ```

 %% Cell type:markdown id: tags:

 ## histograms of fit parameters ##

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 5))
-ax0 = fig.add_subplot(111)
-a = ax0.hist(hist_std.flatten(), bins=100, range=(0,100) )
-ax0.plot([intensity_lim, intensity_lim], [0, np.nanmax(a[0])], linewidth=1.5, color='red' )
-ax0.set_xlabel('Histogram width [ADU]', fontsize=14)
-ax0.set_ylabel('Number of histograms', fontsize=14)
-ax0.set_title(f'{hist_std[hist_std<intensity_lim].shape[0]} histograms below threshold in {intensity_lim} ADU',
+ax = fig.add_subplot(111)
+a = ax.hist(hist_std.flatten(), bins=100, range=(0,100) )
+ax.plot([intensity_lim, intensity_lim], [0, np.nanmax(a[0])], linewidth=1.5, color='red' )
+ax.set_xlabel('Histogram width [ADU]', fontsize=14)
+ax.set_ylabel('Number of histograms', fontsize=14)
+ax.set_title(f'{hist_std[hist_std<intensity_lim].shape[0]} histograms below threshold in {intensity_lim} ADU',
              fontsize=14, fontweight='bold')
-ax0.grid()
-plt.yscale('log')
+ax.grid()
+ax.set_yscale('log')
 ```

 %% Cell type:code id: tags:

 ``` python
 def plot_par_distr(par):
-    fig = plt.figure(figsize=(16,5))
+    fig = plt.figure(figsize=(16, 5))
    sel = fit_result['mask'] == 0

    for i in range(n_peaks_fit) :
        data=fit_result[f"g{i}{par}"]
        plt_range=(-1,50)
        if par =='mean':
            plt_range=[peak_range[i][0] ,peak_range[i][1]]

        num_bins = int(plt_range[1] - plt_range[0])
        ax = fig.add_subplot(1,n_peaks_fit,i+1)
        _ = xana.histPlot(ax,data.flatten(),
                          bins= num_bins,range=plt_range,
                          log=True,color='red',
                          label='all fits',)

        a = ax.hist(data[sel].flatten(),
                    bins=num_bins, range=plt_range,
                    log=True,color='g',
                    label='good fits only',
                   )
-        plt.xlabel(f"g{i} {par} [ADU]")
-        plt.legend()
+        ax.set_xlabel(f"g{i} {par} [ADU]")
+        ax.legend()

 plot_par_distr('mean')
 plot_par_distr('sigma')
 ```

 %% Cell type:code id: tags:

 ``` python
 sel = fit_result['mask'] == 0

 dsets = {'d01 [ADU]':fit_result[f"g1mean"]-fit_result[f"g0mean"],
         'gain [ADU/keV]':fit_result[f"gain"],
         'gain relative to module mean':fit_result[f"gain"]/np.mean(gain_mean),
        }
 fig = plt.figure(figsize=(16,5))
 for i, (par, data) in enumerate(dsets.items()):
-    ax = fig.add_subplot(1,3,i+1)
+    ax = fig.add_subplot(1, 3, i+1)
    plt_range=get_range(data, 10)
    num_bins = 100
    _ = xana.histPlot(ax,data.flatten(),
                      bins= num_bins,range=plt_range,
                      log=True,color='red',
                      label='all fits',)

    a = ax.hist(data[sel].flatten(),
                bins=num_bins, range=plt_range,
                log=True,color='g',
                label='good fits only',
               )
-    plt.xlabel(f"{par}")
-    plt.legend()
+    ax.set_xlabel(f"{par}")
+    ax.legend()
    if 'd01' in par :
-        plt.axvline(d0_lim[0])
-        plt.axvline(d0_lim[1])
+        ax.axvline(d0_lim[0])
+        ax.axvline(d0_lim[1])
    if 'rel' in par :
-        plt.axvline(gain_lim[0])
-        plt.axvline(gain_lim[1])
+        ax.axvline(gain_lim[0])
+        ax.axvline(gain_lim[1])
 ```

 %% Cell type:markdown id: tags:

 ## Summary across pixels ##

 Mean and median values are calculated across all pixels for each memory cell.

 %% Cell type:code id: tags:

 ``` python
 def plot_error_band(key, x, ax):

    cdata = np.copy(fit_result[key])
    cdata[fit_result['mask']>0] = np.nan

    mean = np.nanmean(cdata, axis=(1,2))
    median = np.nanmedian(cdata, axis=(1,2))
    std = np.nanstd(cdata, axis=(1,2))
    mad = np.nanmedian(np.abs(cdata - median[:,None,None]), axis=(1,2))

-    ax0 = fig.add_subplot(111)
-    ax0.plot(x, mean, 'k', color='#3F7F4C', label=" mean value ")
-    ax0.plot(x, median, 'o', color='red', label=" median value ")
-    ax0.fill_between(x, mean-std, mean+std,
+    ax.plot(x, mean, 'k', color='#3F7F4C', label=" mean value ")
+    ax.plot(x, median, 'o', color='red', label=" median value ")
+    ax.fill_between(x, mean-std, mean+std,
                     alpha=0.6, edgecolor='#3F7F4C', facecolor='#7EFF99',
                     linewidth=1, linestyle='dashdot', antialiased=True,
                     label=" mean value $ \pm $ std ")

-    ax0.fill_between(x, median-mad, median+mad,
+    ax.fill_between(x, median-mad, median+mad,
                     alpha=0.3, edgecolor='red', facecolor='red',
                     linewidth=1, linestyle='dashdot', antialiased=True,
                     label=" median value $ \pm $ mad ")

    if f'error_{key}' in fit_result:
        cerr = np.copy(fit_result[f'error_{key}'])
        cerr[fit_result['mask']>0] = np.nan

        meanerr = np.nanmean(cerr, axis=(1,2))
-        ax0.fill_between(x, mean-meanerr, mean+meanerr,
+        ax.fill_between(x, mean-meanerr, mean+meanerr,
                 alpha=0.6, edgecolor='#089FFF', facecolor='#089FFF',
                 linewidth=1, linestyle='dashdot', antialiased=True,
                 label=" mean fit error ")


 x = np.linspace(*cell_range, n_cells)

 for i, key in enumerate(['g0mean', 'g1mean', 'gain', 'chi2_ndof']):

    fig = plt.figure(figsize=(10, 5))
-    ax0 = fig.add_subplot(111)
-    plot_error_band(key, x, ax0)
+    ax = fig.add_subplot(111)
+    plot_error_band(key, x, ax)

-    ax0.set_xlabel('Memory Cell ID', fontsize=14)
-    ax0.set_ylabel(labels[i], fontsize=14)
-    ax0.grid()
-    _ = ax0.legend()
+    ax.set_xlabel('Memory Cell ID', fontsize=14)
+    ax.set_ylabel(labels[i], fontsize=14)
+    ax.grid()
+    ax.legend()
 ```

 %% Cell type:markdown id: tags:

 ## Cut flow ##

 %% Cell type:code id: tags:

 ``` python
-fig = plt.figure(figsize=(10, 5))
-ax = fig.add_subplot(111)
+fig, ax = plt.subplots()
+fig.set_size_inches(10, 5)

 n_bars = 8
 x = np.arange(n_bars)
 width = 0.3

 msk = fit_result['mask']
 n_fits = np.prod(msk.shape)
 y = [any_in(msk, BadPixelsFF.FIT_FAILED.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value
           | BadPixelsFF.NO_ENTRY.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value
           | BadPixelsFF.NO_ENTRY.value| BadPixelsFF.GAIN_DEVIATION.value)
    ]

 y2 = [any_in(msk, BadPixelsFF.FIT_FAILED.value),
     any_in(msk, BadPixelsFF.ACCURATE_COVAR.value),
     any_in(msk, BadPixelsFF.CHI2_THRESHOLD.value),
     any_in(msk, BadPixelsFF.GAIN_THRESHOLD.value),
     any_in(msk, BadPixelsFF.NOISE_PEAK_THRESHOLD.value),
     any_in(msk, BadPixelsFF.PEAK_WIDTH_THRESHOLD.value),
     any_in(msk, BadPixelsFF.NO_ENTRY.value),
     any_in(msk, BadPixelsFF.GAIN_DEVIATION.value)
    ]

 y = (1 - np.sum(y, axis=(1,2,3))/n_fits)*100
 y2 = (1 - np.sum(y2, axis=(1,2,3))/n_fits)*100

 labels = ['Fit failes',
         'Accurate covar',
         'Chi2/nDOF',
         'Gain',
         'Noise peak',
         'Peak width',
         'No Entry',
         'Gain deviation']

 ax.bar(x, y2, width, label='Only this cut')
 ax.bar(x, y, width, label='Cut flow')
-plt.xticks(x, labels, rotation=90)
-plt.ylim(y[5]-0.5,100)
-plt.grid(True)
-plt.legend()
+ax.set_xticks(x)
+ax.set_xticklabels(labels, rotation=90)
+ax.set_ylim(y[5]-0.5, 100)
+ax.grid(True)
+ax.legend()
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 # Gain Characterization #


 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202030/p900138/scratch/karnem/r0203_r0204_v01/" # the folder to read histograms from, required
-out_folder = "/gpfs/exfel/exp/SPB/202030/p900138/scratch/karnem/r0203_r0204_v01/"  # the folder to output to, required
+out_folder = ""  # the folder to output to, required
 hist_file_template = "hists_m{:02d}_sum.h5" # the template to use to access histograms
 modules = [10] # modules to correct, set to -1 for all, range allowed

 raw_folder = "/gpfs/exfel/exp/MID/202030/p900137/raw" # Path to raw image data used to create histograms
 proc_folder = "" # Path to corrected image data used to create histograms

 run = 449 # of the run of image data used to create histograms

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information
 karabo_id_control = "MID_IRU_AGIPD1M1" # karabo-id for control device
 karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milli seconds
 local_output = True # output constants locally
 db_output = False # output constants to database

 # Fit parameters
 peak_range = [-30, 30, 35, 70, 95, 135, 145, 220] # where to look for the peaks, [a0, b0, a1, b1, ...] exactly 8 elements
 peak_width_range = [0, 30, 0, 35, 0, 40, 0, 45] # fit limits on the peak widths, [a0, b0, a1, b1, ...] exactly 8 elements
 peak_norm_range = [0.0, -1, 0, -1, 0, -1, 0, -1] #

 # Bad-pixel thresholds (gain evaluation error). Contribute to BadPixel bit "Gain_Evaluation_Error"
 peak_lim = [-30, 30] # Limit of position of noise peak
 d0_lim = [10, 80] # hard limits for distance between noise and first peak
 peak_width_lim = [0.9, 1.55, 0.95, 1.65] # hard limits on the peak widths for first and second peak, in units of the noise peak. 4 parameters.
 chi2_lim = [0, 3.0] # Hard limit on chi2/nDOF value

 intensity_lim = 15 # Threshold on standard deviation of a histogram in ADU. Contribute to BadPixel bit "No_Entry"
 gain_lim = [0.8, 1.2] # Threshold on gain in relative number. Contribute to BadPixel bit "Gain_deviation"

 cell_range = [1, 3] # range of cell to be considered, [0,0] for all
 pixel_range = [0, 0, 32, 32] # range of pixels x1,y1,x2,y2 to consider [0,0,512,128] for all
 max_bins = 0 # Maximum number of bins to consider, 0 for all bins
 batch_size = [1, 8, 8] # batch size: [cell,x,y]
 fit_range = [0, 0] # range of a histogram considered for fitting in ADU. Dynamically evaluated in case [0,0]
 n_peaks_fit = 4 # Number of gaussian peaks to fit including noise peak
 fix_peaks = False # Fix distance between photon peaks
 do_minos = False # This is additional feature of minuit to evaluate errors.
 sigma_limit = 0. # If >0, repeat fit keeping only bins within mu +- sigma_limit*sigma

 # Detector conditions
 max_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300  # Bias voltage
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
 photon_energy = 8.05 # photon energy in keV
 ```

 %% Cell type:code id: tags:

 ``` python
 import glob
 import os
 import traceback
 import warnings
 from multiprocessing import Pool

 import h5py
 import matplotlib.pyplot as plt
 import numpy as np
 import sharedmem
 import XFELDetAna.xfelpyanatools as xana
 from cal_tools.agipdlib import get_bias_voltage
 from cal_tools.agipdutils_ff import (
    any_in,
    fit_n_peaks,
    gaussian,
    gaussian_sum,
    get_mask,
    get_starting_parameters,
    set_par_limits,
 )
 from cal_tools.ana_tools import get_range, save_dict_to_hdf5
 from cal_tools.enums import BadPixelsFF
 from iminuit import Minuit
 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.simpleplot import simplePlot

 # %load_ext autotime
 %matplotlib inline
 warnings.filterwarnings('ignore')
 ```

 %% Cell type:code id: tags:

 ``` python
 peak_range = np.reshape(peak_range,(4,2))
 peak_width_range = np.reshape(peak_width_range,(4,2))
 peak_width_lim = np.reshape(peak_width_lim,(2,2))
 peak_norm_range = [None if x == -1 else x for x in peak_norm_range]
 peak_norm_range = np.reshape(peak_norm_range,(4,2))
 module = modules[0]
 ```

 %% Cell type:code id: tags:

 ``` python
 # This is never used in this notebook and should be removed

 # if bias_voltage == 0:
 #     # Read the bias voltage from files, if recorded.
 #     # If not available, make use of the historical voltage the detector is running at
 #     control_filename = f'{raw_folder}/r{run:04d}/RAW-R{run:04d}-{karabo_da_control}-S00000.h5'
 #     bias_voltage = get_bias_voltage(control_filename, karabo_id_control)
 #     bias_voltage = bias_voltage if bias_voltage is not None else 300
 # print(f"Bias voltage: {bias_voltage}V")
 ```

 %% Cell type:code id: tags:

 ``` python
 def idx_gen(batch_start, batch_size):
    """
    This generator iterate across pixels and memory cells starting
    from batch_start until batch_start+batch_size
    """
    for c_idx in range(batch_start[0], batch_start[0]+batch_size[0]):
        for x_idx in range(batch_start[1], batch_start[1]+batch_size[1]):
            for y_idx in range(batch_start[2], batch_start[2]+batch_size[2]):
                yield(c_idx, x_idx, y_idx)
 ```

 %% Cell type:code id: tags:

 ``` python
 n_pixels_x = pixel_range[2]-pixel_range[0]
 n_pixels_y = pixel_range[3]-pixel_range[1]

 hist_data = {}
 with h5py.File(f"{in_folder}/{hist_file_template.format(module)}", 'r') as hf:
    hist_data['cellId'] = np.array(hf['cellId'][()])
    hist_data['hRange'] = np.array(hf['hRange'][()])
    hist_data['nBins'] = np.array(hf['nBins'][()])

    if cell_range == [0,0]:
        cell_range[1] = hist_data['cellId'].shape[0]

    if max_bins == 0:
        max_bins = hist_data['nBins']

    hist_data['cellId'] = hist_data['cellId'][cell_range[0]:cell_range[1]]
    hist_data['hist'] = np.array(hf['hist'][cell_range[0]:cell_range[1], :max_bins, :])

 n_cells = cell_range[1]-cell_range[0]
 hist_data['hist'] = hist_data['hist'].reshape(n_cells, max_bins, 512, 128)
 hist_data['hist'] = hist_data['hist'][:,:,pixel_range[0]:pixel_range[2],pixel_range[1]:pixel_range[3]]

 print(f'Data shape {hist_data["hist"].shape}')

 bin_edges = np.linspace(hist_data['hRange'][0], hist_data['hRange'][1], int(hist_data['nBins']+1))
 x = (bin_edges[1:] + bin_edges[:-1])[:max_bins] * 0.5


 batches = []
 for c_idx in range(0, n_cells, batch_size[0]):
    for x_idx in range(0, n_pixels_x, batch_size[1]):
        for y_idx in range(0, n_pixels_y, batch_size[2]):
            batches.append([c_idx,x_idx,y_idx])

 print(f'Number of batches {len(batches)}')
 ```

 %% Cell type:code id: tags:

 ``` python
 def fit_batch(batch_start):
    current_result = {}
    prev = None
    for c_idx, x_idx, y_idx in idx_gen(batch_start, batch_size):
        try:
            y = hist_data['hist'][c_idx, :, x_idx, y_idx]

            if prev is None:
                prev, _ = get_starting_parameters(x, y, peak_range, n_peaks=n_peaks_fit)

            if fit_range == [0, 0]:
                frange = (prev[f'g0mean']-2*prev[f'g0sigma'],
                          prev[f'g{n_peaks_fit-1}mean'] + prev[f'g{n_peaks_fit-1}sigma'])
            else:
                frange = fit_range

            set_par_limits(prev, peak_range, peak_norm_range,
                           peak_width_range, n_peaks_fit)
            minuit = fit_n_peaks(x, y, prev, frange,
                                 do_minos=do_minos, n_peaks=n_peaks_fit,
                                 fix_d01=fix_peaks, sigma_limit=sigma_limit,)

            ndof = np.rint(frange[1]-frange[0])-len(minuit.args) ## FIXME: this line is wrong if fix_peaks is True
            current_result['chi2_ndof'] = minuit.fval/ndof
            res = minuit.fitarg
            if fix_peaks : ## set g2 and g3 mean correctly
                for i in range(2,n_peaks_fit):
                    d = res[f'g1mean'] - res[f'g0mean']
                    res[f'g{i}mean'] = res[f'g0mean'] + d*i
            current_result.update(res)
            current_result.update(minuit.get_fmin())

            fit_result['chi2_ndof'][c_idx, x_idx, y_idx] = current_result['chi2_ndof']

            for key in res.keys():
                if key in fit_result:
                    fit_result[key][c_idx, x_idx, y_idx] = res[key]

            fit_result['mask'][c_idx, x_idx, y_idx] = get_mask(current_result,
                                                                    peak_lim,
                                                                    d0_lim, chi2_lim,
                                                                    peak_width_lim)
        except Exception as e:
            fit_result['mask'][c_idx, x_idx,
                                    y_idx] = BadPixelsFF.FIT_FAILED.value
            print(c_idx, x_idx, y_idx, e, traceback.format_exc())

        if fit_result['mask'][c_idx, x_idx, y_idx] == 0:
            prev = res
        else:
            prev = None
 ```

 %% Cell type:markdown id: tags:

 # Single fit ##

 Left plot shows starting parameters for fitting. Right plot shows result of the fit. Errors are evaluated with minos.

 %% Cell type:code id: tags:

 ``` python
 hist = hist_data['hist'][1,:,1, 1]
 prev, shapes = get_starting_parameters(x, hist, peak_range, n_peaks=n_peaks_fit)

 if fit_range == [0, 0]:
    frange = (prev[f'g0mean']-2*prev[f'g0sigma'],
              prev[f'g3mean'] + prev[f'g3sigma'])
 else:
    frange = fit_range

 set_par_limits(prev, peak_range, peak_norm_range,
               peak_width_range, n_peaks=n_peaks_fit)
 minuit = fit_n_peaks(x, hist, prev, frange,
                     do_minos=True, n_peaks=n_peaks_fit,
                     fix_d01=fix_peaks,
                     sigma_limit=sigma_limit,
                    )
 print (minuit.get_fmin())
 minuit.print_matrix()
 print(minuit.get_param_states())
 ```

 %% Cell type:code id: tags:

 ``` python
 res = minuit.fitarg
 if fix_peaks :
    for i in range(2,n_peaks_fit):
        d = res[f'g1mean'] - res[f'g0mean']
        res[f'g{i}mean'] = res[f'g0mean'] + d*i
 err = minuit.errors
 p = minuit.args
 ya = np.arange(0,1e4)
 y = gaussian_sum(x,n_peaks_fit, *p)
 peak_colors = ['g', 'y', 'b', 'orange']

 peak_hist = hist.copy()
 d=[]
 if sigma_limit > 0 :
    sel2 = (np.abs(x - res['g0mean']) < sigma_limit*res['g0sigma']) | \
           (np.abs(x - res['g1mean']) < sigma_limit*res['g1sigma']) | \
           (np.abs(x - res['g2mean']) < sigma_limit*res['g2sigma']) | \
           (np.abs(x - res['g3mean']) < sigma_limit*res['g3sigma'])
    peak_hist[~sel2] = 0
    valley_hist = hist.copy()
    valley_hist[sel2] = 0
    d.append({'x': x,
              'y': valley_hist.astype(np.float64),
              'y_err': np.sqrt(valley_hist),
              'drawstyle': 'bars',
              'errorstyle': 'bars',
              'transparency': '95%',
              'errorcoarsing': 3,
              'label': f'X-ray Data)'
             })
    htitle = f'X-ray Data, (μ±{sigma_limit:0.1f}σ)'
 else :
    htitle = 'X-ray Data'

 d.append({'x': x,
          'y': peak_hist.astype(np.float64),
          'y_err': np.sqrt(peak_hist),
          'drawstyle': 'bars',
          'errorstyle': 'bars',
          'errorcoarsing': 3,
          'label': htitle,
         }
        )
 d.append({'x': x,
          'y': y,
          'y2': (hist-y)/np.sqrt(hist),
          'drawstyle':'line',
          'drawstyle2': 'steps-mid',
          'label': 'Fit'
         }
        )

 for i in range(n_peaks_fit):
    d.append({'x': x,
             'y': gaussian(x, res[f'g{i}n'], res[f'g{i}mean'], res[f'g{i}sigma']),
             'drawstyle':'line',
             'color': peak_colors[i],
             })
    d.append({'x': np.full_like(ya, res[f'g{i}mean']),
              'y': ya,
              'drawstyle': 'line',
              'linestyle': 'dashed',
              'color': peak_colors[i],
              'label': f'peak {i} = {res[f"g{i}mean"]:0.1f} $ \pm $ {err[f"g{i}mean"]:0.2f} ADU' })
 ```

 %% Cell type:code id: tags:

 ``` python
-fig = plt.figure(figsize=(16,7))
-ax = fig.add_subplot(121)
+fig, (ax1, ax2) = plt.subplots(1, 2)
+fig.set_size_inches(16, 7)
 for i, shape in enumerate(shapes):
    idx = shape[3]
-    plt.errorbar(x[idx], hist[idx], np.sqrt(hist[idx]),
-         marker='+', ls=''
-         )
+    ax1.errorbar(
+        x[idx], hist[idx],
+        np.sqrt(hist[idx]),
+        marker='+', ls='',
+    )
    yg = gaussian(x[idx], *shape[:3])
    l = f'Peak {i}: {shape[1]:0.1f} $ \pm $ {shape[2]:0.2f} ADU'
-    plt.plot(x[idx], yg, label=l)
-plt.grid(True)
-plt.xlabel("Signal [ADU]")
-plt.ylabel("Counts")
-plt.legend(ncol=2)
-
-
-ax2 = fig.add_subplot(122)
-fig2 = xana.simplePlot(d,
-                       use_axis=ax2,
-                       x_label='Signal [ADU]',
-                       y_label='Counts',
-                       secondpanel=True, y_log=False,
-                       x_range=(frange[0], frange[1]),
-                       y_range=(1., np.max(hist)*1.6),
-                       legend='top-left-frame-ncol2')
+    ax1.plot(x[idx], yg, label=l)
+ax1.grid(True)
+ax1.set_xlabel("Signal [ADU]")
+ax1.set_ylabel("Counts")
+ax1.legend(ncol=2)
+
+_ = xana.simplePlot(
+    d,
+    use_axis=ax2,
+    x_label='Signal [ADU]',
+    y_label='Counts',
+    secondpanel=True, y_log=False,
+    x_range=(frange[0], frange[1]),
+    y_range=(1., np.max(hist)*1.6),
+    legend='top-left-frame-ncol2',
+)
+
+plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## All fits ##

 %% Cell type:code id: tags:

 ``` python
 # Allocate memory for fit results
 fit_result = {}
 keys = list(minuit.fitarg.keys())
 keys = [x for x in keys if 'limit_' not in x and 'fix_' not in x]
 keys += ['chi2_ndof', 'mask', 'gain']
 for key in keys:
    dtype = 'f4'
    if key == 'mask':
        dtype = 'i4'
    fit_result[key] = sharedmem.empty([n_cells, n_pixels_x, n_pixels_y], dtype=dtype)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Perform fitting
 with Pool() as pool:
    const_out = pool.map(fit_batch, batches)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate bad pixels
 fit_result['gain'] = (fit_result['g1mean'] - fit_result['g0mean'])/photon_energy

 # Calculate histogram width and evaluate cut
 h_sums = np.sum(hist_data['hist'], axis=1)
 hist_norm = hist_data['hist'] / h_sums[:, None, :, :]
 hist_mean = np.sum(hist_norm[:, :max_bins, ...] *
                   x[None, :, None, None], axis=1)
 hist_sqr = (x[None, :, None, None] - hist_mean[:, None, ...])**2
 hist_std = np.sqrt(np.sum(hist_norm[:, :max_bins, ...] * hist_sqr, axis=1))

 fit_result['mask'][hist_std<intensity_lim] |= BadPixelsFF.NO_ENTRY.value

 # Bad pixel on gain deviation
 gains = np.copy(fit_result['gain'])
 gains[fit_result['mask']>0] = np.nan
 gain_mean = np.nanmean(gains, axis=(1,2))

 fit_result['mask'][fit_result['gain'] > gain_mean[:,None,None]*gain_lim[1] ] |=  BadPixelsFF.GAIN_DEVIATION.value
 fit_result['mask'][fit_result['gain'] < gain_mean[:,None,None]*gain_lim[0] ] |=  BadPixelsFF.GAIN_DEVIATION.value
 ```

 %% Cell type:code id: tags:

 ``` python
 # Save fit results
 os.makedirs(out_folder, exist_ok=True)
 out_name = f'{out_folder}/fits_m{module:02d}.h5'
 print(f'Save to file: {out_name}')
 save_dict_to_hdf5({'data': fit_result}, out_name)
 ```

 %% Cell type:markdown id: tags:

 ## Summary across cells ##

 %% Cell type:code id: tags:

 ``` python
-labels = ['Noise peak [ADU]',
-         'First photon peak [ADU]',
-         f"gain [ADU/keV], $\gamma$={photon_energy} [keV]",
-         "$\chi^2$/nDOF",
-         'Fraction of bad pixels']
-for i, key in enumerate(['g0mean', 'g1mean', 'gain', 'chi2_ndof', 'mask']):
+labels = [
+    "Noise peak [ADU]",
+    "First photon peak [ADU]",
+    f"gain [ADU/keV] $\gamma$={photon_energy} [keV]",
+    "$\chi^2$/nDOF",
+    "Fraction of bad pixels",
+]

+for i, key in enumerate(['g0mean', 'g1mean', 'gain', 'chi2_ndof', 'mask']):
    fig = plt.figure(figsize=(20,5))
    ax = fig.add_subplot(121)
    data = fit_result[key]
    if key == 'mask':
-        data = data>0
+        data = data > 0
        vmin, vmax = [0, 1]
    else:
        vmin, vmax = get_range(data, 5)
-    _ = heatmapPlot(np.mean(data, axis=0).T,
-                    add_panels=False, cmap='viridis', use_axis=ax,
-                    vmin=vmin, vmax=vmax, lut_label=labels[i] )
+    _ = heatmapPlot(
+        np.mean(data, axis=0).T,
+        add_panels=False, cmap='viridis', use_axis=ax,
+        vmin=vmin, vmax=vmax, lut_label=labels[i]
+    )

    if key != 'mask':
        vmin, vmax = get_range(data, 7)
-        ax1 = fig.add_subplot(122)
-        _ = xana.histPlot(ax1,data.flatten(),
-                      bins=45,range=[vmin, vmax],
-                      log=True,color='red',histtype='stepfilled')
-        plt.xlabel(labels[i])
-        plt.ylabel("Counts")
+        ax = fig.add_subplot(122)
+        _ = xana.histPlot(
+            ax, data.flatten(),
+            bins=45,range=[vmin, vmax],
+            log=True,color='red',histtype='stepfilled'
+        )
+        ax.set_xlabel(labels[i])
+        ax.set_ylabel("Counts")
 ```

 %% Cell type:markdown id: tags:

 ## histograms of fit parameters ##

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 5))
-ax0 = fig.add_subplot(111)
-a = ax0.hist(hist_std.flatten(), bins=100, range=(0,100) )
-ax0.plot([intensity_lim, intensity_lim], [0, np.nanmax(a[0])], linewidth=1.5, color='red' )
-ax0.set_xlabel('Histogram width [ADU]', fontsize=14)
-ax0.set_ylabel('Number of histograms', fontsize=14)
-ax0.set_title(f'{hist_std[hist_std<intensity_lim].shape[0]} histograms below threshold in {intensity_lim} ADU',
+ax = fig.add_subplot(111)
+a = ax.hist(hist_std.flatten(), bins=100, range=(0,100) )
+ax.plot([intensity_lim, intensity_lim], [0, np.nanmax(a[0])], linewidth=1.5, color='red' )
+ax.set_xlabel('Histogram width [ADU]', fontsize=14)
+ax.set_ylabel('Number of histograms', fontsize=14)
+ax.set_title(f'{hist_std[hist_std<intensity_lim].shape[0]} histograms below threshold in {intensity_lim} ADU',
              fontsize=14, fontweight='bold')
-ax0.grid()
-plt.yscale('log')
+ax.grid()
+ax.set_yscale('log')
 ```

 %% Cell type:code id: tags:

 ``` python
 def plot_par_distr(par):
-    fig = plt.figure(figsize=(16,5))
+    fig = plt.figure(figsize=(16, 5))
    sel = fit_result['mask'] == 0

    for i in range(n_peaks_fit) :
        data=fit_result[f"g{i}{par}"]
        plt_range=(-1,50)
        if par =='mean':
            plt_range=[peak_range[i][0] ,peak_range[i][1]]

        num_bins = int(plt_range[1] - plt_range[0])
        ax = fig.add_subplot(1,n_peaks_fit,i+1)
        _ = xana.histPlot(ax,data.flatten(),
                          bins= num_bins,range=plt_range,
                          log=True,color='red',
                          label='all fits',)

        a = ax.hist(data[sel].flatten(),
                    bins=num_bins, range=plt_range,
                    log=True,color='g',
                    label='good fits only',
                   )
-        plt.xlabel(f"g{i} {par} [ADU]")
-        plt.legend()
+        ax.set_xlabel(f"g{i} {par} [ADU]")
+        ax.legend()

 plot_par_distr('mean')
 plot_par_distr('sigma')
 ```

 %% Cell type:code id: tags:

 ``` python
 sel = fit_result['mask'] == 0

 dsets = {'d01 [ADU]':fit_result[f"g1mean"]-fit_result[f"g0mean"],
         'gain [ADU/keV]':fit_result[f"gain"],
         'gain relative to module mean':fit_result[f"gain"]/np.mean(gain_mean),
        }
 fig = plt.figure(figsize=(16,5))
 for i, (par, data) in enumerate(dsets.items()):
-    ax = fig.add_subplot(1,3,i+1)
+    ax = fig.add_subplot(1, 3, i+1)
    plt_range=get_range(data, 10)
    num_bins = 100
    _ = xana.histPlot(ax,data.flatten(),
                      bins= num_bins,range=plt_range,
                      log=True,color='red',
                      label='all fits',)

    a = ax.hist(data[sel].flatten(),
                bins=num_bins, range=plt_range,
                log=True,color='g',
                label='good fits only',
               )
-    plt.xlabel(f"{par}")
-    plt.legend()
+    ax.set_xlabel(f"{par}")
+    ax.legend()
    if 'd01' in par :
-        plt.axvline(d0_lim[0])
-        plt.axvline(d0_lim[1])
+        ax.axvline(d0_lim[0])
+        ax.axvline(d0_lim[1])
    if 'rel' in par :
-        plt.axvline(gain_lim[0])
-        plt.axvline(gain_lim[1])
+        ax.axvline(gain_lim[0])
+        ax.axvline(gain_lim[1])
 ```

 %% Cell type:markdown id: tags:

 ## Summary across pixels ##

 Mean and median values are calculated across all pixels for each memory cell.

 %% Cell type:code id: tags:

 ``` python
 def plot_error_band(key, x, ax):

    cdata = np.copy(fit_result[key])
    cdata[fit_result['mask']>0] = np.nan

    mean = np.nanmean(cdata, axis=(1,2))
    median = np.nanmedian(cdata, axis=(1,2))
    std = np.nanstd(cdata, axis=(1,2))
    mad = np.nanmedian(np.abs(cdata - median[:,None,None]), axis=(1,2))

-    ax0 = fig.add_subplot(111)
-    ax0.plot(x, mean, 'k', color='#3F7F4C', label=" mean value ")
-    ax0.plot(x, median, 'o', color='red', label=" median value ")
-    ax0.fill_between(x, mean-std, mean+std,
+    ax.plot(x, mean, 'k', color='#3F7F4C', label=" mean value ")
+    ax.plot(x, median, 'o', color='red', label=" median value ")
+    ax.fill_between(x, mean-std, mean+std,
                     alpha=0.6, edgecolor='#3F7F4C', facecolor='#7EFF99',
                     linewidth=1, linestyle='dashdot', antialiased=True,
                     label=" mean value $ \pm $ std ")

-    ax0.fill_between(x, median-mad, median+mad,
+    ax.fill_between(x, median-mad, median+mad,
                     alpha=0.3, edgecolor='red', facecolor='red',
                     linewidth=1, linestyle='dashdot', antialiased=True,
                     label=" median value $ \pm $ mad ")

    if f'error_{key}' in fit_result:
        cerr = np.copy(fit_result[f'error_{key}'])
        cerr[fit_result['mask']>0] = np.nan

        meanerr = np.nanmean(cerr, axis=(1,2))
-        ax0.fill_between(x, mean-meanerr, mean+meanerr,
+        ax.fill_between(x, mean-meanerr, mean+meanerr,
                 alpha=0.6, edgecolor='#089FFF', facecolor='#089FFF',
                 linewidth=1, linestyle='dashdot', antialiased=True,
                 label=" mean fit error ")


 x = np.linspace(*cell_range, n_cells)

 for i, key in enumerate(['g0mean', 'g1mean', 'gain', 'chi2_ndof']):

    fig = plt.figure(figsize=(10, 5))
-    ax0 = fig.add_subplot(111)
-    plot_error_band(key, x, ax0)
+    ax = fig.add_subplot(111)
+    plot_error_band(key, x, ax)

-    ax0.set_xlabel('Memory Cell ID', fontsize=14)
-    ax0.set_ylabel(labels[i], fontsize=14)
-    ax0.grid()
-    _ = ax0.legend()
+    ax.set_xlabel('Memory Cell ID', fontsize=14)
+    ax.set_ylabel(labels[i], fontsize=14)
+    ax.grid()
+    ax.legend()
 ```

 %% Cell type:markdown id: tags:

 ## Cut flow ##

 %% Cell type:code id: tags:

 ``` python
-fig = plt.figure(figsize=(10, 5))
-ax = fig.add_subplot(111)
+fig, ax = plt.subplots()
+fig.set_size_inches(10, 5)

 n_bars = 8
 x = np.arange(n_bars)
 width = 0.3

 msk = fit_result['mask']
 n_fits = np.prod(msk.shape)
 y = [any_in(msk, BadPixelsFF.FIT_FAILED.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value
           | BadPixelsFF.NO_ENTRY.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value
           | BadPixelsFF.NO_ENTRY.value| BadPixelsFF.GAIN_DEVIATION.value)
    ]

 y2 = [any_in(msk, BadPixelsFF.FIT_FAILED.value),
     any_in(msk, BadPixelsFF.ACCURATE_COVAR.value),
     any_in(msk, BadPixelsFF.CHI2_THRESHOLD.value),
     any_in(msk, BadPixelsFF.GAIN_THRESHOLD.value),
     any_in(msk, BadPixelsFF.NOISE_PEAK_THRESHOLD.value),
     any_in(msk, BadPixelsFF.PEAK_WIDTH_THRESHOLD.value),
     any_in(msk, BadPixelsFF.NO_ENTRY.value),
     any_in(msk, BadPixelsFF.GAIN_DEVIATION.value)
    ]

 y = (1 - np.sum(y, axis=(1,2,3))/n_fits)*100
 y2 = (1 - np.sum(y2, axis=(1,2,3))/n_fits)*100

 labels = ['Fit failes',
         'Accurate covar',
         'Chi2/nDOF',
         'Gain',
         'Noise peak',
         'Peak width',
         'No Entry',
         'Gain deviation']

 ax.bar(x, y2, width, label='Only this cut')
 ax.bar(x, y, width, label='Cut flow')
-plt.xticks(x, labels, rotation=90)
-plt.ylim(y[5]-0.5,100)
-plt.grid(True)
-plt.legend()
+ax.set_xticks(x)
+ax.set_xticklabels(labels, rotation=90)
+ax.set_ylim(y[5]-0.5, 100)
+ax.grid(True)
+ax.legend()
 plt.show()
 ```

--- a/notebooks/AGIPD/Chracterize_AGIPD_Gain_PC_NBC.ipynb
+++ b/notebooks/AGIPD/Chracterize_AGIPD_Gain_PC_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Characterize AGIPD Pulse Capacitor Data #

 Author: S. Hauf, Version 1.0

 The following code characterizes AGIPD gain via data take with the pulse capacitor source (PCS). The PCS allows scanning through the high and medium gains of AGIPD, by subsequently intecreasing the number of charge pulses from a on-ASIC capicitor, thus increasing the charge a pixel sees in a given integration time.

 Because induced charge does not originate from X-rays on the sensor, the gains evaluated here will later need to be rescaled with gains deduced from X-ray data.

 PCS data is organized into multiple runs, as the on-ASIC current source cannot supply all pixels of a given module with charge at the same time. Hence, only certain pixel rows will have seen charge for a given image. These rows then first need to be combined into single module images again.

 We then use a K-means clustering algorithm to identify components in the resulting per-pixel data series, matching to three general regions:

 * a high gain slope
 * a transition region, where gain switching occurs
 * a medium gain slope.

 The same regions are present in the gain-bit data and are used to deduce the switching threshold.

 The resulting slopes are then fitted with a linear function and a combination of a linear and exponential decay function to determine the relative gains of the pixels with respect to the module. Additionally, we deduce masks for bad pixels form the data.

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB" # The ipcluster profile to use
 in_folder = '/gpfs/exfel/exp/SPB/202130/p900188/raw/' # path to input data, required
 out_folder = "/gpfs/exfel/data/scratch/jsztuk/test/pc" # path to output to, required
 runs = [92, 93, 94, 95, 96, 97, 98, 99] # runs to use, required, range allowed
 n_sequences = 5 # number of sequence files, starting for 0 to evaluate

 modules = [-1] # modules to work on, required, range allowed
 karabo_da = ["all"]
 karabo_da_control = "AGIPD1MCTRL00" # karabo DA for control infromation
 karabo_id_control = "SPB_IRU_AGIPD1M1"
 karabo_id = "SPB_DET_AGIPD1M-1"
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information

 use_dir_creation_date = True
+delta_time = 0 # offset to the creation time (e.g. useful in case we want to force the system to use diff. dark constants)
 cal_db_interface = "tcp://max-exfl016:8019"  # the database interface to use
 local_output = True # output constants locally
 db_output = False # output constants to database

 bias_voltage = 300 # detector bias voltage
 mem_cells = 0.  # number of memory cells used, use 0 to auto-derive
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = 0.1 # gain setting can have value 0 or 1, Default=0.1 for no (None) gain-setting
 integration_time = -1 # integration time, negative values for auto-detection.

 interlaced = False # assume interlaced data format, for data prior to Dec. 2017
 fit_hook = True # fit a hook function to medium gain slope
 rawversion = 2 # RAW file format version
 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h
 ```

 %% Cell type:code id: tags:

 ``` python
 # imports, usually no need to change anything here

 import os
 import warnings
-from datetime import datetime
+from datetime import datetime, timedelta
 from functools import partial

 warnings.filterwarnings('ignore')

 import dateutil.parser
 import h5py
 import matplotlib
 import numpy as np
 from ipyparallel import Client

 import matplotlib.pyplot as plt

 %matplotlib inline

 import XFELDetAna.xfelpyanatools as xana
 from cal_tools.agipdlib import (
    get_acq_rate, get_gain_setting, get_integration_time, get_num_cells
 )
 from cal_tools.enums import BadPixels
 from cal_tools.plotting import plot_badpix_3d, show_overview
 from cal_tools.tools import (
    gain_map_files,
    get_constant_from_db_and_time,
    get_dir_creation_date,
    get_notebook_name,
    get_pdu_from_db,
    get_report,
    module_index_to_qm,
    parse_runs,
    run_prop_seq_from_path,
    send_to_db,
 )
 from iCalibrationDB import Conditions, ConstantMetaData, Constants, Detectors, Versions

 # make sure a cluster is running with ipcluster start --n=32, give it a while to start
 view = Client(profile=cluster_profile)[:]
 view.use_dill()

 IL_MODE = interlaced
 maxcells = mem_cells if not interlaced else mem_cells*2
 cells = mem_cells
 path_temp = in_folder+"/r{:04d}/"
 image_name_temp = 'RAW-R{:04d}-AGIPD{:02d}-S{:05d}.h5'
 seqs = n_sequences
 print("Parameters are:")
 print("Memory cells: {}/{}".format(cells, maxcells))
 print("Runs: {}".format(runs))
 print("Modules: {}".format(modules))
 print("Sequences: {}".format(seqs))
 print("Interlaced mode: {}".format(IL_MODE))

 run, prop, seq = run_prop_seq_from_path(in_folder)

 instrument = karabo_id.split("_")[0]


 if instrument == "HED":
    nmods = 8
 else:
    nmods = 16

 print(f"Detector in use is {karabo_id}")


 if karabo_da == ["all"]:
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
 ```

 %% Cell type:markdown id: tags:

 ## Read in data and merge ##

 The number of bursts in each sequence file is determined from the sequence files of the first module.

 %% Cell type:code id: tags:

 ``` python
 run = runs[0]
 bursts_per_file = []
 channel = 0

 for seq in range(seqs):
    fname = os.path.join(path_temp.format(run),
                         image_name_temp.format(run, channel, seq))
    print('Reading ',fname)

    if acq_rate == 0.:
        acq_rate = get_acq_rate((fname, karabo_id, channel))
        print("Acquisition rate set from file: {} MHz".format(acq_rate))

    if mem_cells == 0:
        cells = get_num_cells(fname, karabo_id, channel)
        maxcells = cells
        mem_cells = cells  # avoid setting twice
        print("Memory cells set from file: {}".format(cells))

    f = h5py.File(fname, 'r', driver='core')
    if rawversion == 2:
        count = np.squeeze(f["/INDEX/{}/DET/{}CH0:xtdf/image/count".format(karabo_id, channel)])
        bursts_per_file.append(np.count_nonzero(count))
    else:
        status = np.squeeze(f["/INDEX/{}/DET/{}CH0:xtdf/image/status".format(karabo_id, channel)])
        bursts_per_file.append(np.count_nonzero(status != 0))
    f.close()
 bursts_per_file = np.array(bursts_per_file)
 print("Bursts per sequence file are: {}".format(bursts_per_file))

 # Define creation time
 creation_time=None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
-
+    creation_time = creation_time + timedelta(hours=delta_time)
 print(f"Using {creation_time} as creation time of constant.")

 if not creation_time and use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)

 if creation_time:
    print("Using {} as creation time".format(creation_time.isoformat()))
 ```

 %% Cell type:code id: tags:

 ``` python
 control_fname = f'{in_folder}/r{runs[0]:04d}/RAW-R{runs[0]:04d}-{karabo_da_control}-S00000.h5'

 if "{" in h5path_ctrl:
    h5path_ctrl = h5path_ctrl.format(karabo_id_control)

 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < dateutil.parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
        except Exception as e:
            print(f'Error while reading gain setting from: \n{control_fname}')
            print(e)
            print("Gain setting is not found in the control information")
            print("Data will not be processed")
            sequences = []
 print(f"Gain setting: {gain_setting}")

 if integration_time < 0:
    integration_time = get_integration_time(control_fname, h5path_ctrl)
 print(f"Integration time: {integration_time}")
 ```

 %% Cell type:code id: tags:

 ``` python
 def read_and_merge_module_data(cells, path_temp, image_name_temp,
                               runs, seqs, il_mode, rawversion, instrument, channel):
    import os

    import h5py
    import numpy as np


    def cal_bursts_per_file(run, dseq=0):

        bursts_per_file = []
        channel = 0

        for seq in range(dseq, seqs+dseq):
            #print(run, channel, seq)
            fname = os.path.join(path_temp.format(run),
                                 image_name_temp.format(run, channel, seq))
            #print('Reading ',fname)
            with h5py.File(fname, 'r') as f:
                if rawversion == 2:
-                    count = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)][()])
+                    count = np.squeeze(f["/INDEX/{}/DET/{}CH0:xtdf/image/count".format(karabo_id, channel)][()])
                    bursts_per_file.append(np.count_nonzero(count))
                    del count
                else:
-                    status = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)][()])
+                    status = np.squeeze(f["/INDEX/{}/DET/{}CH0:xtdf/image/status".format(karabo_id, channel)][()])
                    bursts_per_file.append(np.count_nonzero(status != 0))
                    del status
        if bursts_per_file[0] == 0:
            return cal_bursts_per_file(run, dseq=dseq+1)  # late start of daq
        return np.array(bursts_per_file), dseq

    #bursts_per_file = np.hstack([0, bursts_per_file])

    bursts_total = np.max([np.sum(cal_bursts_per_file(run)[0]) for run in runs])

    cfac = 2 if il_mode else 1

    def read_raw_data_file(fname, channel, cells = cells, cells_tot = cells, bursts = 250,
                           skip_first_burst = True, first_burst_length = cells):
        data = None
        cellID_all = None
        with h5py.File(fname, 'r') as f:

            #print('Reading ',fname)
-            image_path_temp = 'INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data'.format(instrument, channel)
-            cellID_path_temp = 'INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId'.format(instrument, channel)
+            image_path_temp = 'INSTRUMENT/{}/DET/{}CH0:xtdf/image/data'.format(karabo_id, channel)
+            cellID_path_temp = 'INSTRUMENT/{}/DET/{}CH0:xtdf/image/cellId'.format(karabo_id, channel)
            if rawversion == 2:
-                count = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)])
-                first = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first".format(instrument, channel)])
+                count = np.squeeze(f["/INDEX/{}/DET/{}CH0:xtdf/image/count".format(karabo_id, channel)])
+                first = np.squeeze(f["/INDEX/{}/DET/{}CH0:xtdf/image/first".format(karabo_id, channel)])
                last_index = int(first[count != 0][-1]+count[count != 0][-1])
                first_index = int(first[count != 0][0])
            else:
-                status = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)])
+                status = np.squeeze(f["/INDEX/{}/DET/{}CH0:xtdf/image/status".format(karabo_id, channel)])
                if np.count_nonzero(status != 0) == 0:
                    return
-                last = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last".format(instrument, channel)])
+                last = np.squeeze(f["/INDEX/{}/DET/{}CH0:xtdf/image/last".format(karabo_id, channel)])
                last_index = int(last[status != 0][-1])
                first_index = int(last[status != 0][0])
            #print(first_index, last_index)
            data = f[image_path_temp][first_index:last_index,...][()]

            cellID_all = np.squeeze(f[cellID_path_temp][first_index:last_index,...][()])
            data = data[cellID_all<cells, ...]

        #bursts = int(data.shape[0]/adcells)
        #print('Bursts: ', bursts)
        analog = np.zeros((bursts - skip_first_burst, cells//cfac, 128, 512))
        digital = np.zeros((bursts - skip_first_burst, cells//cfac, 128, 512))
        cellID = np.zeros(( (bursts - skip_first_burst) * cells))
        offset = skip_first_burst * first_burst_length

        for b in range(min(bursts, data.shape[0]//cells-1)  - skip_first_burst-1):
            try:

                analog[b, : cells//cfac, ...] = np.swapaxes(data[b * cells_tot + offset : b * cells_tot  + cells + offset : cfac,
                                                         0, ...], -1, -2)
                digital[b, : cells//cfac, ...] = np.swapaxes(data[b * cells_tot + cfac - 1 + skip_first_burst * first_burst_length :
                                                          b * cells_tot  + cells + cfac - 1 + offset :cfac, cfac%2, ...], -1, -2)

                cellID[ b * cells : (b  + 1) * cells] = cellID_all[b * cells_tot + offset : b * cells_tot + cells + offset].flatten()
            except:
                #print(b * cells_tot + offset, b * cells_tot  + cells + offset)
                #print(b, offset, cells, data.shape[0]//cells)
                raise AttributeError("Foo")
        return {'analog': analog, 'digital': digital, 'cellID': cellID}


    pc_data = {'analog': np.zeros((bursts_total, cells//cfac, 128, 512)),
               'digital': np.zeros((bursts_total, cells//cfac, 128, 512)),
               'cellID': np.zeros(((bursts_total) * cells))
              }
    pc_data_merged = {'analog': np.zeros((bursts_total, cells//cfac, 128, 512)),
               'digital': np.zeros((bursts_total, cells//cfac, 128, 512)),
               'cellID': np.zeros(((bursts_total) * cells))
              }

    for run_idx, run in enumerate(runs):
        bursts_per_file, dseq = cal_bursts_per_file(run)
        print("Run {}: bursts per file: {} -> {} total".format(run, bursts_per_file, np.sum(bursts_per_file)))
        #Read files in
        last_burst = 0
        for seq in range(dseq, seqs+dseq):
            fname = os.path.join(path_temp.format(run),
                                 image_name_temp.format(run, channel, seq))
            if seq-dseq == 0:
                skip_first_burst = True
            else:
                skip_first_burst = False
            bursts = bursts_per_file[seq-dseq]

            try:
                aa = read_raw_data_file(fname, channel, bursts = bursts,
                                        skip_first_burst = skip_first_burst,
                                        first_burst_length = cells)
                pc_data['analog'][last_burst : last_burst+bursts_per_file[seq-dseq]-skip_first_burst, ...] = aa['analog']
                pc_data['digital'][last_burst : last_burst+bursts_per_file[seq-dseq]-skip_first_burst, ...] = aa['digital']
                pc_data['cellID'][last_burst * cells : (last_burst+bursts_per_file[seq-dseq]-skip_first_burst) * cells, ...] = aa['cellID']

            except Exception as e:
                print(e)
                pc_data['analog'][last_burst : last_burst+bursts_per_file[seq-dseq]-skip_first_burst, ...] = 0
                pc_data['digital'][last_burst : last_burst+bursts_per_file[seq-dseq]-skip_first_burst, ...] = 0
                pc_data['cellID'][last_burst * cells : (last_burst+bursts_per_file[seq-dseq]-skip_first_burst) * cells, ...] = 0
            finally:
                last_burst += bursts_per_file[seq-dseq]-skip_first_burst
        # Copy injected rows
        for row_i in range(8):
            try:
                pc_data_merged['analog'][:,:,row_i * 8 + (7 - run_idx),:] = pc_data['analog'][:bursts_total,:cells//cfac,row_i * 8 + (7 - run_idx),:]
                pc_data_merged['analog'][:,:,64 + row_i * 8 + run_idx ,:] = pc_data['analog'][:bursts_total,:cells//cfac, 64 + row_i * 8 + run_idx,:]
                pc_data_merged['digital'][:,:,row_i * 8 + (7 - run_idx),:] = pc_data['digital'][:bursts_total,:cells//cfac,row_i * 8 + (7 - run_idx),:]
                pc_data_merged['digital'][:,:,64 + row_i * 8 + run_idx ,:] = pc_data['digital'][:bursts_total,:cells//cfac, 64 + row_i * 8 + run_idx,:]
            except Exception as e:
                print(e)
        #Check cellIDs
        #Copy cellIDs of first run
        if run_idx == 0:
            pc_data_merged['cellID'][...] = pc_data['cellID'][...]
        #Check cellIDs of all the other runs
        #else:
        #    print('cellID difference:{}'.format(np.sum(pc_data_merged['cellID']-pc_data['cellID'])))
    return pc_data_merged['analog'], pc_data_merged['digital'], pc_data_merged['cellID']

 start = datetime.now()
 p = partial(read_and_merge_module_data, maxcells, path_temp, image_name_temp,
            runs, seqs, IL_MODE, rawversion, instrument)
 # chunk this a bit, so that we don't overuse available memory
 res = list(map(p, modules))
 ```

 %% Cell type:markdown id: tags:

 ## Slope clustering and Fitting ##

 The following two cells contain the actual algorithm logic as well as a preview of a single pixel and memory cells visualizing the data and the concepts.

 We start out with calculating an estimate of the slope in proximity of a given data value. This is done by calculating the slopes of a given value with 15 neighbours and averaging the result. Values are then clustered by these slopes into three regions via a K-means algorithm.

 * for the first region a linear function is fitted to the data, determining the gain slope and offset for the high gain mode.

   $$y = mx + b$$

 * for the second and third region a composite function of the form:

  $$y = A*e^{-(x-O)/C}+mx+b$$

  is fitted, covering both the transition region and the medium gain slope.

 %% Cell type:code id: tags:

 ``` python
 from iminuit import Minuit
 from iminuit.util import describe, make_func_code
 from sklearn.cluster import KMeans


 def calc_m_cluster(x, y):
    scan_range = 15
    ms = np.zeros((x.shape[0], scan_range))
    for i in range(scan_range):
        xdiffs = x - np.roll(x, i+1)
        ydiffs = y - np.roll(y, i+1)
        m = ydiffs/xdiffs
        ms[:,i] = m
    m = np.mean(ms, axis=1)

    k = KMeans(n_clusters=3, n_jobs=-2)
    k.fit(m.reshape(-1, 1))
    ms = []
    for lbl in np.unique(k.labels_):
        xl = x[k.labels_ == lbl]
        xd = np.reshape(xl, (len(xl), 1))
        xdiff = xd - xd.transpose()

        yl = y[k.labels_ == lbl]
        yd = np.reshape(yl, (len(yl), 1))
        ydiff = yd - yd.transpose()
        ms.append(np.mean(np.nanmean(ydiff/xdiff, axis=0)))
    return ms, k.labels_, k.cluster_centers_

 def rolling_window(a, window):
    shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
    strides = a.strides + (a.strides[-1],)
    return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)


 def calc_m_cluster2(x, y, r1=5, r2=0, r3=1.5):
    scan_range = 15
    ms = np.zeros((x.shape[0], scan_range))
    for i in range(scan_range):
        xdiffs = x - np.roll(x, i+1)
        ydiffs = y - np.roll(y, i+1)
        m = ydiffs/xdiffs
        ms[:,i] = m
    m = np.mean(ms, axis=1)
-    mm = np.zeros_like(m)
-    mm[...] = np.nan
+
    m[scan_range//2:-scan_range//2+1] = np.mean(rolling_window(m, scan_range),-1)
    reg1 = m > r1
    reg2 = m < r2
    reg3 = (m > r2) & (m < r3)
    reg4 = ~(reg1 | reg2 | reg3)
    labels = [reg1, reg2, reg3, reg4]
    regions = np.zeros_like(x, np.uint8)
    for r, lbl in enumerate(labels):
        regions[lbl] = r
    scan_range = 30
    mregions = np.round(np.mean(rolling_window(regions, scan_range),-1))
-    regions[...] = np.nan
+    # change from np.nan to -1
+    regions[...] = -1
    regions[scan_range//2:-scan_range//2+1] = mregions


    labels = [regions == 0, regions == 1, regions == 2, regions == 3]

    idx = np.arange(x.size)
    maxlbl = x.size-1
    for i in range(0, len(labels)-1):
        nidx = labels[i+1]
        if np.any(nidx):
            maxlbl = np.max(idx[nidx])
            cidx = idx > maxlbl
            if np.any(cidx):
                labels[i][cidx] = False

    ms = []
    for lbl in labels:
        xl = x[lbl]
        xd = np.reshape(xl, (len(xl), 1))
        xdiff = xd - xd.transpose()

        yl = y[lbl]
        yd = np.reshape(yl, (len(yl), 1))
        ydiff = yd - yd.transpose()
        ms.append(np.mean(np.nanmean(ydiff/xdiff, axis=0)))

    return ms, labels, None

 def fit_data(fun, x, y, yerr, par_ests):
    par_ests["throw_nan"] = False
    par_ests["pedantic"] = False
    par_ests["print_level"] = 0

    f_sig = describe(fun)[1:]

    class _Chi2Functor:
        def __init__(self, f, x, y, err):
            self.f = f
            self.x = x[y != 0]
            self.y = y[y != 0]
            self.err = err[y != 0]
            f_sig = describe(f)
            # this is how you fake function
            # signature dynamically
            self.func_code = make_func_code(
                f_sig[1:])  # docking off independent variable
            self.func_defaults = None  # this keeps numpy.vectorize happy

        def __call__(self, *arg):
            # notice that it accept variable length
            # positional arguments
            # chi2 = sum((y-self.f(x,*arg))**2 for x,y in zip(self.x,self.y))
            return np.sum(((self.f(self.x, *arg) - self.y) ** 2) / self.err)

    wrapped = _Chi2Functor(fun, x, y, yerr)
    m = Minuit(wrapped, **par_ests)
    fmin = m.migrad()

    return m.values

 def lin_fun(x, m, b):
    return m*x+b

 def hook_fun(x, a, c, o, m, b):
    return a*np.exp(-(x-o)/c)+m*x+b
 ```

 %% Cell type:code id: tags:

 ``` python
 from cal_tools.tools import get_constant_from_db_and_time

 offsets = {}
 noises = {}
 thresholds = {}
 for k_da, mod in zip(karabo_da, modules):

    offset, when = get_constant_from_db_and_time(karabo_id, k_da,
                                                 Constants.AGIPD.Offset(),
                                                 Conditions.Dark.AGIPD(
                                                     memory_cells=mem_cells,
                                                     bias_voltage=bias_voltage,
                                                     acquisition_rate=acq_rate,
                                                     gain_setting=gain_setting,
                                                     integration_time=integration_time),
                                                 np.zeros((128, 512, mem_cells, 3)),
                                                 cal_db_interface,
                                                 creation_time=creation_time)
    offsets[mod] = np.array(offset.data)

    noise, when = get_constant_from_db_and_time(karabo_id, k_da,
                                                Constants.AGIPD.Noise(),
                                                Conditions.Dark.AGIPD(
                                                    memory_cells=mem_cells,
                                                    bias_voltage=bias_voltage,
                                                    acquisition_rate=acq_rate,
                                                    gain_setting=gain_setting,
                                                    integration_time=integration_time),
                                                np.zeros((128, 512, mem_cells, 3)),
                                                cal_db_interface, creation_time=creation_time)
    noises[mod] = np.array(noise.data)

    threshold, when = get_constant_from_db_and_time(karabo_id, k_da,
                                                    Constants.AGIPD.ThresholdsDark(),
                                                    Conditions.Dark.AGIPD(
                                                        memory_cells=mem_cells,
                                                        bias_voltage=bias_voltage,
                                                        acquisition_rate=acq_rate,
                                                        gain_setting=gain_setting,
                                                        integration_time=integration_time),
                                                    np.zeros((128, 512, mem_cells, 3)),
                                                    cal_db_interface, creation_time=creation_time)
    thresholds[mod] = np.array(threshold.data)
 ```

 %% Cell type:code id: tags:

 ``` python
 test_pixels = []
 tpix_range1 = [(0,16), (0,64)]
 for i in range(*tpix_range1[0]):
    for j in range(*tpix_range1[1]):
        test_pixels.append((j,i))
 test_cells = [4, 38, 64, 128]#, 200, 249]
 tcell = np.array(test_cells)
 tcell = tcell[tcell < mem_cells]
 if tcell.size == 0:
    test_cells = [mem_cells-1]
 else:
    test_cells = tcell.tolist()

 from mpl_toolkits.axes_grid1 import ImageGrid
 for mod, r in zip(modules, res):
    dig, ana, cellId = r
    d = []
    d2 = []
    d3 = []

    H = [0, 0, 0, 0]

    ex, ey = None, None
    offset = offsets[mod]
    for pix in test_pixels:
        for cell in test_cells:
            color = np.random.rand(3,1)

            x = np.arange(dig.shape[0])
            y = dig[:,cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]
            if x.shape[0] == 0:
                continue

            ms, labels, centers = calc_m_cluster2(x, y)
            bound = None
            bound_m = None
            markers = ['o','.','x','v']
            colors = ['b', 'r', 'g', 'k']
            ymin = y.min()

            for i, lbl in enumerate(labels):
                if np.any(lbl):
                    #ym = y[lbl]-y[lbl].min()
                    if i == 0:
                        gain = 0
                    else:
                        gain = 1
                    ym = y[lbl] - offset[pix[0], pix[1], cell, gain]
                    #if i != 0:
                    #    ym += y[labels[0]].max()-y[labels[0]].min()
                    h, ex, ey = np.histogram2d(x[lbl], ym, range=((0, 600), (-500, 6000)), bins=(300, 650))
                    H[i] += h



    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111)
    for i in range(3):
        H[i][H[i]==0] = np.nan
    ax.imshow(H[0].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    ax.imshow(H[1].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='spring', alpha=0.7, vmin=0, vmax=100)
    ax.imshow(H[2].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD response (ADU)")
    ax.set_xlabel("PC scan point (#)")
 ```

 %% Cell type:markdown id: tags:

 ### Examples from Pixel Subset ###

 The follwing is an visualization of the clustering and fitting for a subset of pixels. If data significantly mismatches expectations, the clustering and fitting algorithms should fail for this subset:

 * the first plot shows the clustering results for pixels which were sucessfully evaluated
 * the second plot shows the clustering results for pixels which failed to evaluate
 * the third plot shows the fits and fit residuals for the pixel clusters shown in the first plot

 Non-smooth behaviour is an indication that you are errorously processing interleaved data that is not, or vice versa, or have the wrong number of memory cells set.

 We do this twice for different detector regions

 %% Cell type:code id: tags:

 ``` python
 test_pixels = []
 tpix_range1 = [(250,254), (60,64)]
 for i in range(*tpix_range1[0]):
    for j in range(*tpix_range1[1]):
        test_pixels.append((j,i))
 test_cells = [4, 38]
 tcell = np.array(test_cells)
 tcell = tcell[tcell < mem_cells]
 if tcell.size == 0:
    test_cells = [mem_cells-1]
 else:
    test_cells = tcell.tolist()

 for mod, r in zip(modules, res):
    dig, ana, cellId = r
    d = []
    d2 = []
    d3 = []
    offset = offsets[mod]
    noise = noises[mod]
    for pix in test_pixels:
        for cell in test_cells:
            color = np.random.rand(3,1)

            x = np.arange(dig.shape[0])
            y = dig[:,cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]

            ms, labels, centers = calc_m_cluster2(x, y)
            bound = None
            bound_m = None
            markers = ['o','.','x','v']
            colors = ['b', 'r', 'g', 'k']
            for i, lbl in enumerate(labels):
                if i == 0:
                    gain = 0
                else:
                    gain = 1
                d.append({'x': x[lbl],
                  'y': y[lbl] - offset[pix[0], pix[1], cell, gain],
                  'marker': markers[i],
                  'color': colors[i],
                  'linewidth': 0
                 })
                #if ms[i] < 0: # slope separating two regions
                #    bound = np.min(x[lbl])
                #    bound_m = ms[i]
            if labels[1].any():
                bound = np.min(x[labels[1]])
                bound_m = ms[1]
            if bound is None or bound < 20 and False:
                ya = ana[:,cell, pix[0], pix[1]][vidx]
                msa, labels, centers = calc_m_cluster2(x, ya, 25, -10, 25)
                if np.count_nonzero(labels[0]) > 0:

                    bound = np.min(x[labels[0]])
                    bound_m = ms[3]
                else:
                    avg_g = np.nanmean(ya)
                    bound = np.max(x[y < avg_g])
                    bound_m = ms[3]

            #print(bound)
            # fit linear slope
            if not np.isnan(bound_m):
-                xl = x[(x<bound)]
-                yl = y[(x<bound)] - offset[pix[0], pix[1], cell, 0]
-                parms = {'m': bound_m, 'b': np.min(yl)}
-
-                errors = np.ones(xl.shape)*noise[pix[0], pix[1], cell, 0]
-                fitted = fit_data(lin_fun, xl, yl, errors , parms)
-                yf = lin_fun(xl, fitted['m'], fitted['b'])
-                max_devl = np.max(np.abs((yl-yf)/yl))
-
-                d3.append({'x': xl,
-                          'y': yf,
-                          'color': 'k',
-                          'linewidth': 1,
-                           'y2': (yf-yl)/errors
-                         })
+                xl = x[(x<bound-20)]
+                yl = y[(x<bound-20)] - offset[pix[0], pix[1], cell, 0]
+                if yl.shape[0] != 0:
+                    parms = {'m': bound_m, 'b': np.min(yl)}
+
+                    errors = np.ones(xl.shape)*noise[pix[0], pix[1], cell, 0]
+                    fitted = fit_data(lin_fun, xl, yl, errors , parms)
+                    yf = lin_fun(xl, fitted['m'], fitted['b'])
+                    max_devl = np.max(np.abs((yl-yf)/yl))
+
+                    d3.append({'x': xl,
+                              'y': yf,
+                              'color': 'k',
+                              'linewidth': 1,
+                               'y2': (yf-yl)/errors
+                             })
            # fit hook slope
            if fit_hook:
                idx = (x >= bound) & (y > 0) & np.isfinite(x) & np.isfinite(y)
                xh = x[idx]
                yh = y[idx] - offset[pix[0], pix[1], cell, 1]
                if len(yh[yh > 0]) == 0:
                    break
                parms = {'m': bound_m/10 if bound_m/10>0.3 else 0.5, 'b': np.min(yh[yh > 0]), 'a': np.max(yh), 'c': 5, 'o': bound-1}
-                parms["limit_m"] = [0.3, 1.0]
+                parms["limit_m"] = [0.3, 2.0]
                parms["limit_c"] = [1., 1000]
                errors = np.ones(xh.shape)*noise[pix[0], pix[1], cell, 1]
                fitted = fit_data(hook_fun, xh, yh, errors, parms)
                yf = hook_fun(xh, fitted['a'], fitted['c'], fitted['o'], fitted['m'], fitted['b'])

                max_devh = np.max(np.abs((yh-yf)/yh))
                #print(fitted)
                d3.append({'x': xh,
                          'y': yf,
                          'color': 'red',
                          'linewidth': 1,
                          'y2': (yf-yh)/errors
                         })

            x = np.arange(ana.shape[0])
            y = ana[:,cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]

            #ms, labels, centers = calc_m_cluster2(x, y, 25, -10, 25)
            if len(y[labels[0]]) != 0 and len(y[labels[2]]) != 0:
                threshold = (np.mean(y[labels[0]])+np.mean(y[labels[2]]))/2

            for i, lbl in enumerate(labels):

                d2.append({'x': x[lbl],
                  'y': y[lbl],
                  'marker': markers[i],
                  'color': colors[i],
                  'lw': None

                 })

                d2.append({'x': np.array([x[0], x[-1]]),
                  'y': np.ones(2)*threshold,

                  'color': 'k',
                  'lw': 1

                 })

            #threshold = (np.min(y[x<bound]) + np.max(y[x>=bound]))/2


    fig = xana.simplePlot(d, y_label="PC pixel signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #")
    fig.savefig("{}/module_{}_pixel_plot.png".format(out_folder, mod))

    fig = xana.simplePlot(d2, y_label="PC gain signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #")
    fig.savefig("{}/module_{}_pixel_plot_gain.png".format(out_folder, mod))

    fig = xana.simplePlot(d3, secondpanel=True, y_label="PC signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #", y2_label="Residuals ($\sigma$)", y2_range=(-5,5))
    fig.savefig("{}/module_{}_pixel_plot_fits.png".format(out_folder, mod))
 ```

 %% Cell type:code id: tags:

 ``` python
 test_pixels = []
 tpix_range2 = [(96,128), (32,64)]
 for i in range(*tpix_range2[0]):
    for j in range(*tpix_range2[1]):
        test_pixels.append((j,i))

 for mod, r in zip(modules, res):
    dig, ana, cellId = r
    d = []
    d2 = []
    d3 = []
    offset = offsets[mod]
    noise = noises[mod]
    for pix in test_pixels:
        for cell in test_cells:
            color = np.random.rand(3,1)

            x = np.arange(dig.shape[0])
            y = dig[:,cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]

            ms, labels, centers = calc_m_cluster2(x, y)
            bound = None
            bound_m = None
            markers = ['o','.','x','v']
            colors = ['b', 'r', 'g', 'k']
            for i, lbl in enumerate(labels):
                if i == 0:
                    gain = 0
                else:
                    gain = 1
                d.append({'x': x[lbl],
                  'y': y[lbl] - offset[pix[0], pix[1], cell, gain],
                  'marker': markers[i],
                  'color': colors[i],
                  'linewidth': 0
                 })
                #if ms[i] < 0: # slope separating two regions
                #    bound = np.min(x[lbl])
                #    bound_m = ms[i]
            if len(x[labels[1]]):
                bound = np.min(x[labels[1]])
                bound_m = ms[1]

                # fit linear slope
                idx = (x >= bound) & (y > 0) & np.isfinite(x) & np.isfinite(y)
-                xl = x[(x<bound)]
-                yl = y[(x<bound)] - offset[pix[0], pix[1], cell, 0]
+                xl = x[(x<bound-20)]
+                yl = y[(x<bound-20)] - offset[pix[0], pix[1], cell, 0]
                errors = np.ones(xl.shape)*noise[pix[0], pix[1], cell, 0]
-                parms = {'m': bound_m, 'b': np.min(yl)}
-                fitted = fit_data(lin_fun, xl, yl, errors, parms)
+                if yl.shape[0] != 0:
+                    parms = {'m': bound_m, 'b': np.min(yl)}
+                    fitted = fit_data(lin_fun, xl, yl, errors, parms)

-                yf = lin_fun(xl, fitted['m'], fitted['b'])
-                max_devl = np.max(np.abs((yl-yf)/yl))
+                    yf = lin_fun(xl, fitted['m'], fitted['b'])
+                    max_devl = np.max(np.abs((yl-yf)/yl))

                xtt = np.arange(ana.shape[0])
                ytt = ana[:,cell, pix[0], pix[1]]

                vidx = (ytt > 1000) & np.isfinite(ytt)
                xtt = xtt[vidx]
                ytt = ytt[vidx]

                #ms, labels, centers = calc_m_cluster2(x, y, 25, -10, 25)
                if len(y[labels[0]]) != 0 and len(y[labels[2]]) != 0:
                    threshold = (np.mean(ytt[labels[0]])+np.mean(ytt[labels[2]]))/2

                if threshold > 10000 or threshold < 4000:
                    d3.append({
                        'x': xl,
                        'y': yf,
                        'color': 'k',
                        'linewidth': 1,
                        'y2': (yf-yl)/errors
                             })

            if bound is None:
                ya = ana[:,cell, pix[0], pix[1]][vidx]
                msa, labels, centers = calc_m_cluster2(x, ya, 25, -10, 25)
                if np.count_nonzero(labels[0]) > 0:
                    bound = np.min(x[labels[0]])
                    bound_m = ms[3]
                else:
                    avg_g = np.nanmean(ya)
                    bound = np.max(x[y < avg_g])
                    bound_m = ms[3]

            # fit hook slope
            try:
                if fit_hook and len(yh[yh > 0]) !=0:
                    idx = (x >= bound) & (y > 0) & np.isfinite(x) & np.isfinite(y)
                    xh = x[idx]
                    yh = y[idx] - offset[pix[0], pix[1], cell, 1]
                    errors = np.ones(xh.shape)*noise[pix[0], pix[1], cell, 1]
                    parms = {
                        'm': np.abs(bound_m/10),
                        'b': np.min(yh[yh > 0]),
                        'a': np.max(yh),
                        'c': 5.,
                        'o': bound-1
                            }
-                    parms["limit_m"] = [0.3, 1.0]
+                    parms["limit_m"] = [0.3, 2.0]
                    parms["limit_c"] = [1., 1000]
                    fitted = fit_data(hook_fun, xh, yh, errors, parms)
                    yf = hook_fun(xh, fitted['a'], fitted['c'], fitted['o'], fitted['m'], fitted['b'])
                    max_devh = np.max(np.abs((yh-yf)/yh))
                    #print(fitted)
                    if threshold > 10000 or threshold < 4000 or fitted['m'] < 0.2:
                        d3.append({
                            'x': xh,
                            'y': yf,
                            'color': 'red',
                            'linewidth': 1,
                            'y2': (yf-yh)/errors
                         })
            except Exception as e:
                if "zero-size array" in str(e):
                    pass
                else:
                    print(e)

            if threshold > 10000 or threshold < 4000:
                for i, lbl in enumerate(labels):
                    d2.append({
                        'x': xtt[lbl],
                        'y': ytt[lbl],
                        'marker': markers[i],
                        'color': colors[i],
                        'lw': None
                     })

                    d2.append({'x': np.array([xtt[0], xtt[-1]]),
                      'y': np.ones(2)*threshold,
                      'color': 'k',
                      'lw': 1
                     })

            #threshold = (np.min(y[x<bound]) + np.max(y[x>=bound]))/2
    fig = xana.simplePlot(d, y_label="PC pixel signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #")
    fig.savefig("{}/module_{}_pixel_plot_fail.png".format(out_folder, mod))

    fig = xana.simplePlot(d2, y_label="PC gain signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #")
    fig.savefig("{}/module_{}_pixel_plot_gain_fail.png".format(out_folder, mod))

    fig = xana.simplePlot(d3, secondpanel=True, y_label="PC signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #", y2_label="Residuals ($\sigma$)", y2_range=(-5,5))
    fig.savefig("{}/module_{}_pixel_plot_fits_fail.png".format(out_folder, mod))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Here we perform the calculations in column-parallel for all modules
 def calibrate_single_row(cells, fit_hook, inp):

    import numpy as np
    from iminuit import Minuit
    from iminuit.util import describe, make_func_code
    from sklearn.cluster import KMeans

    yrd, yra, offset, noise = inp

    def rolling_window(a, window):
        shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
        strides = a.strides + (a.strides[-1],)
        return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)


    def calc_m_cluster2(x, y, r1=5, r2=0, r3=1.5):
        scan_range = 15
        ms = np.zeros((x.shape[0], scan_range))
        for i in range(scan_range):
            xdiffs = x - np.roll(x, i+1)
            ydiffs = y - np.roll(y, i+1)
            m = ydiffs/xdiffs
            ms[:,i] = m
        m = np.mean(ms, axis=1)
-        mm = np.zeros_like(m)
-        mm[...] = np.nan
        m[scan_range//2:-scan_range//2+1] = np.mean(rolling_window(m, scan_range),-1)
        reg1 = m > r1
        reg2 = m < r2
        reg3 = (m > r2) & (m < r3)
        reg4 = ~(reg1 | reg2 | reg3)
        labels = [reg1, reg2, reg3, reg4]
        regions = np.zeros_like(x, np.uint8)
        for r, lbl in enumerate(labels):
            regions[lbl] = r
        scan_range = 30
        mregions = np.round(np.mean(rolling_window(regions, scan_range),-1))
-        regions[...] = np.nan
+        # chanage from np.nan to -1
+        regions[...] = -1
        regions[scan_range//2:-scan_range//2+1] = mregions


        labels = [regions == 0, regions == 1, regions == 2, regions == 3]

        idx = np.arange(x.size)
        maxlbl = x.size-1
        for i in range(0, len(labels)-1):
            nidx = labels[i+1]
            if np.any(nidx):
                maxlbl = np.max(idx[nidx])
                cidx = idx > maxlbl
                if np.any(cidx):
                    labels[i][cidx] = False

        ms = []
        for lbl in labels:
            xl = x[lbl]
            xd = np.reshape(xl, (len(xl), 1))
            xdiff = xd - xd.transpose()

            yl = y[lbl]
            yd = np.reshape(yl, (len(yl), 1))
            ydiff = yd - yd.transpose()
            ms.append(np.mean(np.nanmean(ydiff/xdiff, axis=0)))

        return ms, labels, None

    def fit_data(fun, x, y, yerr, par_ests):
        par_ests["throw_nan"] = False
        par_ests["pedantic"] = False
        par_ests["print_level"] = 0

        f_sig = describe(fun)[1:]

        class _Chi2Functor:
            def __init__(self, f, x, y, err):
                self.f = f
                self.x = x
                self.y = y
                self.err = err
                f_sig = describe(f)
                # this is how you fake function
                # signature dynamically
                self.func_code = make_func_code(
                    f_sig[1:])  # docking off independent variable
                self.func_defaults = None  # this keeps numpy.vectorize happy

            def __call__(self, *arg):
                # notice that it accept variable length
                # positional arguments
                # chi2 = sum((y-self.f(x,*arg))**2 for x,y in zip(self.x,self.y))
                return np.sum(((self.f(self.x, *arg) - self.y) ** 2) / self.err)

        wrapped = _Chi2Functor(fun, x, y, yerr)
        m = Minuit(wrapped, **par_ests)
        fmin = m.migrad()

        return m.values

    def lin_fun(x, m, b):
        return m*x+b

    def hook_fun(x, a, c, o, m, b):
        return a*np.exp(-(x-o)/c)+m*x+b

    # linear slope
    ml = np.zeros(yrd.shape[1:])
    bl = np.zeros(yrd.shape[1:])
    devl = np.zeros(yrd.shape[1:])
    ml[...] = np.nan
    bl[...] = np.nan
    devl[...] = np.nan

    #hook function
    mh = np.zeros(yrd.shape[1:])
    bh = np.zeros(yrd.shape[1:])
    ch = np.zeros(yrd.shape[1:])
    oh = np.zeros(yrd.shape[1:])
    ah = np.zeros(yrd.shape[1:])
    devh = np.zeros(yrd.shape[1:])
    dhm = np.zeros(yrd.shape[1:])
    mh[...] = np.nan
    bh[...] = np.nan
    ch[...] = np.nan
    oh[...] = np.nan
    ah[...] = np.nan
    devh[...] = np.nan
    dhm[...] = np.nan

    # threshold
    thresh = np.zeros(list(yrd.shape[1:])+[3,])
    thresh[...] = np.nan
    failures = []

    for col in range(yrd.shape[-1]):
        try:
            y = yrd[:,col]
            x = np.arange(y.shape[0])

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]

            ms, labels, centers = calc_m_cluster2(x, y)

            bound = np.min(x[labels[1]])
            bound_m = ms[1]

            # fit linear slope
-            xl = x[x<bound]
-            yl = y[x<bound] - offset[col, 0]
+            xl = x[x<bound-20]
+            yl = y[x<bound-20] - offset[col, 0]
            errors = np.ones(xl.shape)*noise[col, 0]
-            parms = {'m': bound_m, 'b': np.min(yl)}
-            fitted = fit_data(lin_fun, xl, yl, errors, parms)
-            yf = lin_fun(xl, fitted['m'], fitted['b'])
-            max_devl = np.median(np.abs((yl-yf)/yl))
+            if yl.shape[0] != 0:
+                parms = {'m': bound_m, 'b': np.min(yl)}
+                fitted = fit_data(lin_fun, xl, yl, errors, parms)
+                yf = lin_fun(xl, fitted['m'], fitted['b'])
+                max_devl = np.median(np.abs((yl-yf)/yl))
            ml[col] = fitted['m']
            bl[col] = fitted['b']
            devl[col] = max_devl
            #if np.any(labels[0]) and np.any(labels[2]):
                #dhm[col] = y[labels[0]].max()-y[labels[2]].min()
            dhml = lin_fun(bound, fitted['m'], fitted['b'])
            # fit hook slope
            if fit_hook:
                idx = (x >= bound) & (y > 0) & np.isfinite(x) & np.isfinite(y)
                xh = x[idx]
                yh = y[idx] - offset[col, 1]
                errors = np.ones(xh.shape)*noise[col, 1]
                parms = {'m': bound_m/10 if bound_m/10 > 0.3 else 0.5, 'b': np.min(yh[yh > 0]), 'a': np.max(yh), 'c': 5., 'o': bound-1}
-                parms["limit_m"] = [0.3, 1.0]
+                parms["limit_m"] = [0.3, 2.0]
                parms["limit_c"] = [1., 1000]
                fitted = fit_data(hook_fun, xh, yh, errors, parms)
                yf = hook_fun(xh, fitted['a'], fitted['c'], fitted['o'], fitted['m'], fitted['b'])
                max_devh = np.median(np.abs((yh-yf)/yh))

                mh[col] = fitted['m']
                bh[col] = fitted['b']
                ah[col] = fitted['a']
                oh[col] = fitted['o']
                ch[col] = fitted['c']
                devh[col] = max_devh
                dhm[col] = bound #(dhml) - lin_fun(bound, fitted['m'], fitted['b'])

            y = yra[:,col]
            x = np.arange(y.shape[0])

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]

            threshold = (np.mean(y[labels[0]])+np.mean(y[labels[2]]))/2
            thresh[col,0] = threshold
            thresh[col,1] = np.mean(y[labels[0]])
            thresh[col,2] = np.mean(y[labels[2]])
        except Exception as e:
            print(e)
            failures.append((col, str(e)))
    del yrd
    del yra
    return thresh, (ml, bl, devl), (mh, bh, ah, oh, ch, devh), failures, dhm

 start = datetime.now()
 fres = {}
 failures = []
 for i, r in zip(modules, res):
    offset = offsets[i]
    noise = noises[i]
    qm = module_index_to_qm(i)
    dig, ana, cellId = r


    # linear slope
    ml = np.zeros(dig.shape[1:])
    bl = np.zeros(dig.shape[1:])
    devl = np.zeros(dig.shape[1:])

    #hook function
    mh = np.zeros(dig.shape[1:])
    bh = np.zeros(dig.shape[1:])
    ch = np.zeros(dig.shape[1:])
    oh = np.zeros(dig.shape[1:])
    ah = np.zeros(dig.shape[1:])
    devh = np.zeros(dig.shape[1:])
    dhma = np.zeros(dig.shape[1:])

    # threshold
    thresh = np.zeros(list(dig.shape[1:]))
    thresh_bounds = np.zeros(list(dig.shape[1:])+[2,])

    for cell in range(dig.shape[1]):
        inp = []
        for j in range(dig.shape[2]):
            inp.append((dig[:,cell,j,:], ana[:,cell,j,:], offset[j,:,cell,:], noise[j,:,cell,:]))

        p = partial(calibrate_single_row, cells, fit_hook)
        #print("Running {} tasks in parallel".format(len(inp)))
        frs = view.map_sync(p, inp)
        #frs = list(map(p, inp))

        for j, fr in enumerate(frs):
            threshr, lin, hook, fails, dhm = fr
            mlr, blr, devlr = lin
            mhr, bhr, ahr, ohr, chro, devhr = hook
            failures.append(fails)

            ml[cell,j,:] = mlr
            bl[cell,j,:] = blr
            devl[cell,j,:] = devlr

            mh[cell,j,:] = mhr
            bh[cell,j,:] = bhr
            oh[cell,j,:] = ohr
            ch[cell,j,:] = chro
            ah[cell,j,:] = ahr
            devh[cell,j,:] = devhr
            dhma[cell,j,:] = dhm

            thresh[cell,j,...] = threshr[...,0]
            thresh_bounds[cell,j,...] = threshr[...,1:]

    fres[qm] = {'ml': ml,
                'bl': bl,
                'devl': devl,
                'tresh': thresh,
                'tresh_bounds': thresh_bounds,
                'dhm': dhma}
    if fit_hook:
            fres[qm].update({
                'mh': mh,
                'bh': bh,
                'oh': oh,
                'ch': ch,
                'ah': ah,
                'devh': devh,
               })
 ```

 %% Cell type:markdown id: tags:

 Results of slope fitting from PC runs values are
 distinguished on axis 0 by index:

 0: linear slope - m value
 1: linear slope - b value
 2: linear slope - deviation
 3: hook function - m value
 4: hook function - b value
 5: hook function - o value
 6: hook function - c value
 7: hook function - a value
 8: hook function - deviation

 %% Cell type:code id: tags:

 ``` python
 def slope_dict_to_arr(d):
    key_to_index = {
        "ml": 0,
        "bl": 1,
        "devl": 2,
        "mh": 3,
        "bh": 4,
        "oh": 5,
        "ch": 6,
        "ah": 7,
        "devh": 8,
        "tresh": 9,

    }
    arr = np.zeros([11]+list(d["ml"].shape), np.float32)
    for key, item in d.items():
        if key not in key_to_index:
            continue
        arr[key_to_index[key],...] = item

    return arr
 ```

 %% Cell type:code id: tags:

 ``` python
 from collections import OrderedDict

 bad_pixels = OrderedDict()
 for qm, data in fres.items():
    mask = np.zeros(data['ml'].shape, np.uint32)
    mask[(data['tresh'][...,0] < 50) | (data['tresh'][...,0] > 8500)] |= BadPixels.CI_GAIN_OF_OF_THRESHOLD.value
    mask[(data['devl'] == 0)] |= BadPixels.CI_LINEAR_DEVIATION.value
    mask[(np.abs(data['devl']) > 0.5)] |= BadPixels.CI_LINEAR_DEVIATION.value
    mask[(~np.isfinite(data['devl']))] |= BadPixels.CI_EVAL_ERROR.value
    bad_pixels[qm] = mask
 ```

 %% Cell type:code id: tags:

 ``` python
 if local_output:
    ofile = "{}/agipd_pc_store_{}_{}_{}.h5".format(out_folder, "_".join([str(run) for run in runs]), modules[0], modules[-1])
    store_file = h5py.File(ofile, "w")
    for qm, r in fres.items():
        for key, item in r.items():
            store_file["/{}/{}/0/data".format(qm, key)] = item
        #arr = slope_dict_to_arr(r)
        #store_file["/{}/SlopesPC/0/data".format(qm)] = arr
        store_file["/{}/{}/0/data".format(qm, "BadPixelsPC")] = bad_pixels[qm]
    store_file.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read report path and create file location tuple to add with the injection
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = proposal + ' ' + ' '.join(list(map(str,runs)))

 report = get_report(out_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 md = None

 # set the operating condition
 condition = Conditions.Dark.AGIPD(memory_cells=maxcells, bias_voltage=bias_voltage,
                                  acquisition_rate=acq_rate, gain_setting=gain_setting)

 db_modules = get_pdu_from_db(karabo_id, karabo_da, Constants.AGIPD.SlopesPC(),
                             condition, cal_db_interface,
                             snapshot_at=creation_time)

 for pdu, (qm, r) in zip(db_modules, fres.items()):
    for const in ["SlopesPC", "BadPixelsPC"]:

        dbconst = getattr(Constants.AGIPD, const)()

        if const == "SlopesPC":
            dbconst.data = slope_dict_to_arr(r)
        else:
            dbconst.data = bad_pixels[qm]

        if db_output:
            md = send_to_db(pdu, karabo_id, dbconst, condition,
                            file_loc, report, cal_db_interface,
                            creation_time=creation_time)
        # TODO: check if this can replace other written function of this notebook.
        #if local_output:
        #    md = save_const_to_h5(pdu, karabo_id, dconst, condition, dconst.data,
        #                          file_loc, report, creation_time, out_folder)

 print("Constants parameter conditions are:\n")
 print(f"• memory_cells: {maxcells}\n• bias_voltage: {bias_voltage}\n"
      f"• acquisition_rate: {acq_rate}\n• gain_setting: {gain_setting}\n"
      f"• integration_time: {integration_time}\n"
      f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:markdown id: tags:

 ## Overview Plots ##

 Each of the following plots represents one of the fit parameters of memory cell 4 on a module:

 For the linear function of the high gain region

   $$y = mx + b$$

 * ml denotes the $m$ parameter
 * bl denotes the $b$ parameter
 * devl denotes the anbsolute relative deviation from linearity.

 For the composite function of the medium gain and transition region

  $$y = A*e^{-(x-O)/C}+mx+b$$

 * oh denotes the $O$ parameter
 * ch denotes the $C$ parameter
 * mh denotes the $m$ parameter
 * bh denotes the $b$ parameter
 * devh denotes the anbsolute relative deviation from the linear part of the function.

 Additionally, the thresholds and bad pixels (mask) are shown.

 Finally, the red and white rectangles indicate the first and second pixel ranges

 %% Cell type:code id: tags:

 ``` python
 import matplotlib.patches as patches
 import matplotlib.pyplot as plt
 from mpl_toolkits.axes_grid1 import AxesGrid

 cell_to_preview = min(59, mem_cells-1)
 for module, data in fres.items():
    fig = plt.figure(figsize=(20,20))
    grid = AxesGrid(fig, 111,
                    nrows_ncols=(7 if fit_hook else 3, 2),
                    axes_pad=(0.9, 0.15),
                    label_mode="1",
                    share_all=True,
                    cbar_location="right",
                    cbar_mode="each",
                    cbar_size="7%",
                    cbar_pad="2%",
                    )


    mask = bad_pixels[module]

    i = 0
    for key, citem in data.items():
        item = citem.copy()
        item[~np.isfinite(item)] = 0
        med = np.nanmedian(item)
        bound = 0.1
        maxcnt = 10
        if med < 0:
            bound = -bound

        while(np.count_nonzero((item < med-bound*med) | (item > med+bound*med))/item.size > 0.01):
            bound *=2
            maxcnt -= 1
            if maxcnt < 0:
                break


        if "bounds" in key:
            d = item[cell_to_preview,...,0]
            im = grid[i].imshow(d, interpolation="nearest",
                               vmin=med-bound*med, vmax=med+bound*med)
        else:
            d = item[cell_to_preview,...]
            im = grid[i].imshow(d, interpolation="nearest",
                               vmin=med-bound*med, vmax=med+bound*med)
        cb = grid.cbar_axes[i].colorbar(im)

        # axes coordinates are 0,0 is bottom left and 1,1 is upper right
        x0, x1 = tpix_range1[0][0], tpix_range1[0][1]
        y0, y1 = tpix_range1[1][0], tpix_range1[1][1]
        p = patches.Rectangle(
            (x0, y0), x1-x0, y1-y0, fill=False, color="red")

        grid[i].add_patch(p)

        x0, x1 = tpix_range2[0][0], tpix_range2[0][1]
        y0, y1 = tpix_range2[1][0], tpix_range2[1][1]
        p = patches.Rectangle(
            (x0, y0), x1-x0, y1-y0, fill=False, color="white")

        grid[i].add_patch(p)

        grid[i].text(20, 50, key, color="w", fontsize=50)

        i += 1

    im = grid[-1].imshow(mask[cell_to_preview,...], interpolation="nearest",
                           vmin=0, vmax=1)
    cb = grid.cbar_axes[-1].colorbar(im)

    grid[-1].text(20, 50, "mask", color="w", fontsize=50)
    fig.savefig("{}/module_{}_PC.png".format(out_folder, module))
 ```

 %% Cell type:markdown id: tags:

 ### Memory Cell dependent behavior of thresholding ###

 %% Cell type:code id: tags:

 ``` python
 mltomh = ml/mh
 fres[qm].update({'mltomh': mltomh})
 toplot = {"tresh": "Gain theshold (ADU)",
          "ml": "Slope (HG)",
          "bl": "Offset (HG) (ADU)",
          "mh": "Slope (MG)",
          "bh": "Offset (MG) (ADU)",
          "mltomh": "Ration slope_HG/slope_MG"}
 from matplotlib.colors import LogNorm, PowerNorm

 for module, data in fres.items():

    bins = 100

    for typ, label in toplot.items():
        r_hist = np.zeros((mem_cells, bins))
        mask = bad_pixels[module]
        thresh = data[typ]
        hrange = [0.5*np.nanmedian(thresh), 1.5*np.nanmedian(thresh)]
        if hrange[1] < hrange[0]:
            hrange = hrange[::-1]
        for c in range(mem_cells):
            d = thresh[c,...]
            h, e = np.histogram(d.flatten(), bins=bins, range=hrange)
            r_hist[c, :] = h
        fig = plt.figure(figsize=(5,5))
        ax = fig.add_subplot(111)
        im = ax.imshow(r_hist[:,:].T[::-1,:], interpolation="nearest",
                  aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(r_hist)),
                 extent=[0, mem_cells, hrange[0], hrange[1]])
        ax.set_xlabel("Memory cell")
        ax.set_ylabel(label)
        cb = fig.colorbar(im)
        cb.set_label("Counts")
    #fig.savefig("/gpfs/exfel/data/scratch/haufs/test/agipd_gain_threholds.pdf", bbox_inches="tight")
 ```

 %% Cell type:markdown id: tags:

 ## Global Bad Pixel Behaviour ##

 The following plots show the results of bad pixel evaluation for all evaluated memory cells. Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2. This excludes single bad pixels present only in disconnected pixels. Hence, any bad pixels spanning at least 2 pixels in the x/y-plane, or across at least two memory cells are indicated. Colors encode the bad pixel type, or mixed type.

 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.CI_GAIN_OF_OF_THRESHOLD.value: (BadPixels.CI_GAIN_OF_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.CI_EVAL_ERROR.value: (BadPixels.CI_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.CI_GAIN_OF_OF_THRESHOLD.value | BadPixels.OFFSET_OUT_OF_THRESHOLD.value: ('MIXED', '#DD00DD80')}

 rebin = 2 if not high_res_badpix_3d else 1

 gain = 0
 for mod, data in bad_pixels.items():
    plot_badpix_3d(np.moveaxis(data, 0, 2), cols, title=mod, rebin_fac=rebin, azim=60.)
 ```

 %% Cell type:code id: tags:

 ``` python
 one_photon = 55 # ADU
 test_pixels = []
 tpix_range1 = [(0,8), (0,8)]
 for i in range(*tpix_range1[0]):
    for j in range(*tpix_range1[1]):
        test_pixels.append((j,i))
 test_cells = [4, 38, 64, 128, 200, 249]
 tcell = np.array(test_cells)
 tcell = tcell[tcell < mem_cells]
 if tcell.size == 0:
    test_cells = [mem_cells-1]
 else:
    test_cells = tcell.tolist()
 from mpl_toolkits.axes_grid1 import ImageGrid
 for mod, r in zip(modules, res):
    dig, ana, cellId = r
    d = []
    d2 = []
    d3 = []

    H = [0, 0, 0, 0]
    H2 = [0, 0, 0, 0]
    Ha = [0, 0, 0, 0]
    qm = module_index_to_qm(mod)
    cdata = fres[qm]
    ex, ey, ea = None, None, None
    medml = np.nanmean(cdata['ml'])
    medmh = np.nanmean(cdata['mh'][cdata['mh']> 0.5])
    offset = offsets[mod]
    threshold = thresholds[mod]

    medth = np.nanmean(threshold[...,0])
    for pix in test_pixels:
        for cell in test_cells:
            color = np.random.rand(3,1)

            x = np.arange(dig.shape[0])
            y = dig[:,cell, pix[0], pix[1]]
            a = ana[:,cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]
            a = a[vidx]

            ms, labels, centers = calc_m_cluster2(x, y)
            bound = None
            bound_m = None
            markers = ['o','.','x','v']
            colors = ['b', 'r', 'g', 'k']
            ymin = y.min()

            amin = a[labels[2]].min()
            for i, lbl in enumerate(labels):

                if np.any(lbl):
                    if i == 0:
                        cm = (cdata['ml'][cell, pix[0], pix[1]]/medml)

                        o = offset[pix[0], pix[1], cell, 0]
                        ym = (y[lbl]-o)/cm

                    elif i >= 1:
                        mh = cdata['mh'][cell, pix[0], pix[1]]
                        ml = cdata['ml'][cell, pix[0], pix[1]]
                        cml = ml/medml
                        cmh = mh/medmh
                        cm = medml/medmh
                        oh = cdata['bh'][cell, pix[0], pix[1]]
                        o = offset[pix[0], pix[1], cell, 1] + oh

                        ym = (y[lbl]-o)/cmh*cm

                        if i == 1:
                            ah = cdata['ah'][cell, pix[0], pix[1]]
                            ch = cdata['ch'][cell, pix[0], pix[1]]
                            ohh = cdata['oh'][cell, pix[0], pix[1]]
                            tx = ch * np.log(ah/(y[lbl]-o))+ohh

                            chook  = (ah*np.exp(-(tx-ohh)/ch) - mh*tx)/cmh*cm

                            ym -= chook

                    h, ex, ey = np.histogram2d(x[lbl], ym/one_photon, range=((0, 600), (0, 15000/one_photon)), bins=(300, 600))
                    H[i] += h

            labels = [a < threshold[pix[0], pix[1], cell,0], a >= threshold[pix[0], pix[1], cell,0]]
            for i, lbl in enumerate(labels):

                if np.any(lbl):
                    if i == 0:
                        cm = (cdata['ml'][cell, pix[0], pix[1]]/medml)

                        o = offset[pix[0], pix[1], cell, 0]
                        ym = (y[lbl]-o)/cm

                    elif i >= 1:
                        mh = cdata['mh'][cell, pix[0], pix[1]]
                        ml = cdata['ml'][cell, pix[0], pix[1]]
                        cml = ml/medml
                        cmh = mh/medmh
                        cm = medml/medmh
                        oh = cdata['bh'][cell, pix[0], pix[1]]
                        o = offset[pix[0], pix[1], cell, 1] + oh

                        ym = (y[lbl]-o)/cmh*cm

                        if i == 1:
                            ah = cdata['ah'][cell, pix[0], pix[1]]
                            ch = cdata['ch'][cell, pix[0], pix[1]]
                            ohh = cdata['oh'][cell, pix[0], pix[1]]
                            tx = ch * np.log(ah/(y[lbl]-o))+ohh

                            chook  = (ah*np.exp(-(tx-ohh)/ch) - mh*tx)/cmh*cm
                            idx = (a[lbl]-amin) < 0
                            ym[idx] -= chook[idx]

                            #ym = a[lbl]-amin

                    h, ex, ey = np.histogram2d(x[lbl], ym/one_photon, range=((0, 600), (0, 15000/one_photon)), bins=(300, 600))
                    H2[i] += h

            labels = [a < threshold[pix[0], pix[1], cell,0], a >= threshold[pix[0], pix[1], cell,0]]
            for i, lbl in enumerate(labels):

                if np.any(lbl):

                    #if i == 0:
                    #    amin = a[lbl].min()
                    #else:
                    #    amin = a[labels[0]].min() #a[labels[1]].min()# /(threshold[pix[0], pix[1], cell,0]/medth)
                    am = a[lbl] - amin
                    h, ex, ea = np.histogram2d(x[lbl], am, range=((0, 600), (-100, 5000)), bins=(300, 400))
                    Ha[i] += h



    fig = plt.figure(figsize=(10,15))
    ax = fig.add_subplot(311)
    for i in range(3):
        H[i][H[i]==0] = np.nan
    ax.imshow(H[0].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    ax.imshow(H[1].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='spring', alpha=0.7, vmin=0, vmax=100)
    ax.imshow(H[2].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD response (ADU)")
    ax.set_xlabel("PC scan point (#)")

    x = np.arange(0, 600)
    ideal = medml*x/one_photon
    ax.plot(x, ideal, color='red')
    ax.plot(x, ideal + np.sqrt(ideal), color='red')
    ax.plot(x, ideal - np.sqrt(ideal), color='red')


    ax = fig.add_subplot(312)
    for i in range(2):
        H2[i][H2[i]==0] = np.nan
    ax.imshow(H2[0].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    ax.imshow(H2[1].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD response (ADU)")
    ax.set_xlabel("PC scan point (#)")

    x = np.arange(0, 600)
    ideal = medml*x/one_photon
    ax.plot(x, ideal, color='red')
    ax.plot(x, ideal + np.sqrt(ideal), color='red')
    ax.plot(x, ideal - np.sqrt(ideal), color='red')


    ax = fig.add_subplot(313)
    for i in range(2):
        Ha[i][Ha[i]==0] = np.nan
    ax.imshow(Ha[0].T, origin="lower", extent=[ex[0], ex[-1], ea[0], ea[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    #ax.imshow(Ha[1].T, origin="lower", extent=[ex[0], ex[-1], ea[0], ea[-1]],
    #          aspect='auto', cmap='spring', alpha=0.7, vmin=0, vmax=100)
    ax.imshow(Ha[1].T, origin="lower", extent=[ex[0], ex[-1], ea[0], ea[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD gain (ADU)")
    ax.set_xlabel("PC scan point (#)")

 ```

 %% Cell type:markdown id: tags:

 # Characterize AGIPD Pulse Capacitor Data #

 Author: S. Hauf, Version 1.0

 The following code characterizes AGIPD gain via data take with the pulse capacitor source (PCS). The PCS allows scanning through the high and medium gains of AGIPD, by subsequently intecreasing the number of charge pulses from a on-ASIC capicitor, thus increasing the charge a pixel sees in a given integration time.

 Because induced charge does not originate from X-rays on the sensor, the gains evaluated here will later need to be rescaled with gains deduced from X-ray data.

 PCS data is organized into multiple runs, as the on-ASIC current source cannot supply all pixels of a given module with charge at the same time. Hence, only certain pixel rows will have seen charge for a given image. These rows then first need to be combined into single module images again.

 We then use a K-means clustering algorithm to identify components in the resulting per-pixel data series, matching to three general regions:

 * a high gain slope
 * a transition region, where gain switching occurs
 * a medium gain slope.

 The same regions are present in the gain-bit data and are used to deduce the switching threshold.

 The resulting slopes are then fitted with a linear function and a combination of a linear and exponential decay function to determine the relative gains of the pixels with respect to the module. Additionally, we deduce masks for bad pixels form the data.

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB" # The ipcluster profile to use
 in_folder = '/gpfs/exfel/exp/SPB/202130/p900188/raw/' # path to input data, required
 out_folder = "/gpfs/exfel/data/scratch/jsztuk/test/pc" # path to output to, required
 runs = [92, 93, 94, 95, 96, 97, 98, 99] # runs to use, required, range allowed
 n_sequences = 5 # number of sequence files, starting for 0 to evaluate

 modules = [-1] # modules to work on, required, range allowed
 karabo_da = ["all"]
 karabo_da_control = "AGIPD1MCTRL00" # karabo DA for control infromation
 karabo_id_control = "SPB_IRU_AGIPD1M1"
 karabo_id = "SPB_DET_AGIPD1M-1"
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information

 use_dir_creation_date = True
+delta_time = 0 # offset to the creation time (e.g. useful in case we want to force the system to use diff. dark constants)
 cal_db_interface = "tcp://max-exfl016:8019"  # the database interface to use
 local_output = True # output constants locally
 db_output = False # output constants to database

 bias_voltage = 300 # detector bias voltage
 mem_cells = 0.  # number of memory cells used, use 0 to auto-derive
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = 0.1 # gain setting can have value 0 or 1, Default=0.1 for no (None) gain-setting
 integration_time = -1 # integration time, negative values for auto-detection.

 interlaced = False # assume interlaced data format, for data prior to Dec. 2017
 fit_hook = True # fit a hook function to medium gain slope
 rawversion = 2 # RAW file format version
 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h
 ```

 %% Cell type:code id: tags:

 ``` python
 # imports, usually no need to change anything here

 import os
 import warnings
-from datetime import datetime
+from datetime import datetime, timedelta
 from functools import partial

 warnings.filterwarnings('ignore')

 import dateutil.parser
 import h5py
 import matplotlib
 import numpy as np
 from ipyparallel import Client

 import matplotlib.pyplot as plt

 %matplotlib inline

 import XFELDetAna.xfelpyanatools as xana
 from cal_tools.agipdlib import (
    get_acq_rate, get_gain_setting, get_integration_time, get_num_cells
 )
 from cal_tools.enums import BadPixels
 from cal_tools.plotting import plot_badpix_3d, show_overview
 from cal_tools.tools import (
    gain_map_files,
    get_constant_from_db_and_time,
    get_dir_creation_date,
    get_notebook_name,
    get_pdu_from_db,
    get_report,
    module_index_to_qm,
    parse_runs,
    run_prop_seq_from_path,
    send_to_db,
 )
 from iCalibrationDB import Conditions, ConstantMetaData, Constants, Detectors, Versions

 # make sure a cluster is running with ipcluster start --n=32, give it a while to start
 view = Client(profile=cluster_profile)[:]
 view.use_dill()

 IL_MODE = interlaced
 maxcells = mem_cells if not interlaced else mem_cells*2
 cells = mem_cells
 path_temp = in_folder+"/r{:04d}/"
 image_name_temp = 'RAW-R{:04d}-AGIPD{:02d}-S{:05d}.h5'
 seqs = n_sequences
 print("Parameters are:")
 print("Memory cells: {}/{}".format(cells, maxcells))
 print("Runs: {}".format(runs))
 print("Modules: {}".format(modules))
 print("Sequences: {}".format(seqs))
 print("Interlaced mode: {}".format(IL_MODE))

 run, prop, seq = run_prop_seq_from_path(in_folder)

 instrument = karabo_id.split("_")[0]


 if instrument == "HED":
    nmods = 8
 else:
    nmods = 16

 print(f"Detector in use is {karabo_id}")


 if karabo_da == ["all"]:
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
 ```

 %% Cell type:markdown id: tags:

 ## Read in data and merge ##

 The number of bursts in each sequence file is determined from the sequence files of the first module.

 %% Cell type:code id: tags:

 ``` python
 run = runs[0]
 bursts_per_file = []
 channel = 0

 for seq in range(seqs):
    fname = os.path.join(path_temp.format(run),
                         image_name_temp.format(run, channel, seq))
    print('Reading ',fname)

    if acq_rate == 0.:
        acq_rate = get_acq_rate((fname, karabo_id, channel))
        print("Acquisition rate set from file: {} MHz".format(acq_rate))

    if mem_cells == 0:
        cells = get_num_cells(fname, karabo_id, channel)
        maxcells = cells
        mem_cells = cells  # avoid setting twice
        print("Memory cells set from file: {}".format(cells))

    f = h5py.File(fname, 'r', driver='core')
    if rawversion == 2:
        count = np.squeeze(f["/INDEX/{}/DET/{}CH0:xtdf/image/count".format(karabo_id, channel)])
        bursts_per_file.append(np.count_nonzero(count))
    else:
        status = np.squeeze(f["/INDEX/{}/DET/{}CH0:xtdf/image/status".format(karabo_id, channel)])
        bursts_per_file.append(np.count_nonzero(status != 0))
    f.close()
 bursts_per_file = np.array(bursts_per_file)
 print("Bursts per sequence file are: {}".format(bursts_per_file))

 # Define creation time
 creation_time=None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
-
+    creation_time = creation_time + timedelta(hours=delta_time)
 print(f"Using {creation_time} as creation time of constant.")

 if not creation_time and use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)

 if creation_time:
    print("Using {} as creation time".format(creation_time.isoformat()))
 ```

 %% Cell type:code id: tags:

 ``` python
 control_fname = f'{in_folder}/r{runs[0]:04d}/RAW-R{runs[0]:04d}-{karabo_da_control}-S00000.h5'

 if "{" in h5path_ctrl:
    h5path_ctrl = h5path_ctrl.format(karabo_id_control)

 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < dateutil.parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
        except Exception as e:
            print(f'Error while reading gain setting from: \n{control_fname}')
            print(e)
            print("Gain setting is not found in the control information")
            print("Data will not be processed")
            sequences = []
 print(f"Gain setting: {gain_setting}")

 if integration_time < 0:
    integration_time = get_integration_time(control_fname, h5path_ctrl)
 print(f"Integration time: {integration_time}")
 ```

 %% Cell type:code id: tags:

 ``` python
 def read_and_merge_module_data(cells, path_temp, image_name_temp,
                               runs, seqs, il_mode, rawversion, instrument, channel):
    import os

    import h5py
    import numpy as np


    def cal_bursts_per_file(run, dseq=0):

        bursts_per_file = []
        channel = 0

        for seq in range(dseq, seqs+dseq):
            #print(run, channel, seq)
            fname = os.path.join(path_temp.format(run),
                                 image_name_temp.format(run, channel, seq))
            #print('Reading ',fname)
            with h5py.File(fname, 'r') as f:
                if rawversion == 2:
-                    count = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)][()])
+                    count = np.squeeze(f["/INDEX/{}/DET/{}CH0:xtdf/image/count".format(karabo_id, channel)][()])
                    bursts_per_file.append(np.count_nonzero(count))
                    del count
                else:
-                    status = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)][()])
+                    status = np.squeeze(f["/INDEX/{}/DET/{}CH0:xtdf/image/status".format(karabo_id, channel)][()])
                    bursts_per_file.append(np.count_nonzero(status != 0))
                    del status
        if bursts_per_file[0] == 0:
            return cal_bursts_per_file(run, dseq=dseq+1)  # late start of daq
        return np.array(bursts_per_file), dseq

    #bursts_per_file = np.hstack([0, bursts_per_file])

    bursts_total = np.max([np.sum(cal_bursts_per_file(run)[0]) for run in runs])

    cfac = 2 if il_mode else 1

    def read_raw_data_file(fname, channel, cells = cells, cells_tot = cells, bursts = 250,
                           skip_first_burst = True, first_burst_length = cells):
        data = None
        cellID_all = None
        with h5py.File(fname, 'r') as f:

            #print('Reading ',fname)
-            image_path_temp = 'INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data'.format(instrument, channel)
-            cellID_path_temp = 'INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId'.format(instrument, channel)
+            image_path_temp = 'INSTRUMENT/{}/DET/{}CH0:xtdf/image/data'.format(karabo_id, channel)
+            cellID_path_temp = 'INSTRUMENT/{}/DET/{}CH0:xtdf/image/cellId'.format(karabo_id, channel)
            if rawversion == 2:
-                count = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)])
-                first = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first".format(instrument, channel)])
+                count = np.squeeze(f["/INDEX/{}/DET/{}CH0:xtdf/image/count".format(karabo_id, channel)])
+                first = np.squeeze(f["/INDEX/{}/DET/{}CH0:xtdf/image/first".format(karabo_id, channel)])
                last_index = int(first[count != 0][-1]+count[count != 0][-1])
                first_index = int(first[count != 0][0])
            else:
-                status = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)])
+                status = np.squeeze(f["/INDEX/{}/DET/{}CH0:xtdf/image/status".format(karabo_id, channel)])
                if np.count_nonzero(status != 0) == 0:
                    return
-                last = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last".format(instrument, channel)])
+                last = np.squeeze(f["/INDEX/{}/DET/{}CH0:xtdf/image/last".format(karabo_id, channel)])
                last_index = int(last[status != 0][-1])
                first_index = int(last[status != 0][0])
            #print(first_index, last_index)
            data = f[image_path_temp][first_index:last_index,...][()]

            cellID_all = np.squeeze(f[cellID_path_temp][first_index:last_index,...][()])
            data = data[cellID_all<cells, ...]

        #bursts = int(data.shape[0]/adcells)
        #print('Bursts: ', bursts)
        analog = np.zeros((bursts - skip_first_burst, cells//cfac, 128, 512))
        digital = np.zeros((bursts - skip_first_burst, cells//cfac, 128, 512))
        cellID = np.zeros(( (bursts - skip_first_burst) * cells))
        offset = skip_first_burst * first_burst_length

        for b in range(min(bursts, data.shape[0]//cells-1)  - skip_first_burst-1):
            try:

                analog[b, : cells//cfac, ...] = np.swapaxes(data[b * cells_tot + offset : b * cells_tot  + cells + offset : cfac,
                                                         0, ...], -1, -2)
                digital[b, : cells//cfac, ...] = np.swapaxes(data[b * cells_tot + cfac - 1 + skip_first_burst * first_burst_length :
                                                          b * cells_tot  + cells + cfac - 1 + offset :cfac, cfac%2, ...], -1, -2)

                cellID[ b * cells : (b  + 1) * cells] = cellID_all[b * cells_tot + offset : b * cells_tot + cells + offset].flatten()
            except:
                #print(b * cells_tot + offset, b * cells_tot  + cells + offset)
                #print(b, offset, cells, data.shape[0]//cells)
                raise AttributeError("Foo")
        return {'analog': analog, 'digital': digital, 'cellID': cellID}


    pc_data = {'analog': np.zeros((bursts_total, cells//cfac, 128, 512)),
               'digital': np.zeros((bursts_total, cells//cfac, 128, 512)),
               'cellID': np.zeros(((bursts_total) * cells))
              }
    pc_data_merged = {'analog': np.zeros((bursts_total, cells//cfac, 128, 512)),
               'digital': np.zeros((bursts_total, cells//cfac, 128, 512)),
               'cellID': np.zeros(((bursts_total) * cells))
              }

    for run_idx, run in enumerate(runs):
        bursts_per_file, dseq = cal_bursts_per_file(run)
        print("Run {}: bursts per file: {} -> {} total".format(run, bursts_per_file, np.sum(bursts_per_file)))
        #Read files in
        last_burst = 0
        for seq in range(dseq, seqs+dseq):
            fname = os.path.join(path_temp.format(run),
                                 image_name_temp.format(run, channel, seq))
            if seq-dseq == 0:
                skip_first_burst = True
            else:
                skip_first_burst = False
            bursts = bursts_per_file[seq-dseq]

            try:
                aa = read_raw_data_file(fname, channel, bursts = bursts,
                                        skip_first_burst = skip_first_burst,
                                        first_burst_length = cells)
                pc_data['analog'][last_burst : last_burst+bursts_per_file[seq-dseq]-skip_first_burst, ...] = aa['analog']
                pc_data['digital'][last_burst : last_burst+bursts_per_file[seq-dseq]-skip_first_burst, ...] = aa['digital']
                pc_data['cellID'][last_burst * cells : (last_burst+bursts_per_file[seq-dseq]-skip_first_burst) * cells, ...] = aa['cellID']

            except Exception as e:
                print(e)
                pc_data['analog'][last_burst : last_burst+bursts_per_file[seq-dseq]-skip_first_burst, ...] = 0
                pc_data['digital'][last_burst : last_burst+bursts_per_file[seq-dseq]-skip_first_burst, ...] = 0
                pc_data['cellID'][last_burst * cells : (last_burst+bursts_per_file[seq-dseq]-skip_first_burst) * cells, ...] = 0
            finally:
                last_burst += bursts_per_file[seq-dseq]-skip_first_burst
        # Copy injected rows
        for row_i in range(8):
            try:
                pc_data_merged['analog'][:,:,row_i * 8 + (7 - run_idx),:] = pc_data['analog'][:bursts_total,:cells//cfac,row_i * 8 + (7 - run_idx),:]
                pc_data_merged['analog'][:,:,64 + row_i * 8 + run_idx ,:] = pc_data['analog'][:bursts_total,:cells//cfac, 64 + row_i * 8 + run_idx,:]
                pc_data_merged['digital'][:,:,row_i * 8 + (7 - run_idx),:] = pc_data['digital'][:bursts_total,:cells//cfac,row_i * 8 + (7 - run_idx),:]
                pc_data_merged['digital'][:,:,64 + row_i * 8 + run_idx ,:] = pc_data['digital'][:bursts_total,:cells//cfac, 64 + row_i * 8 + run_idx,:]
            except Exception as e:
                print(e)
        #Check cellIDs
        #Copy cellIDs of first run
        if run_idx == 0:
            pc_data_merged['cellID'][...] = pc_data['cellID'][...]
        #Check cellIDs of all the other runs
        #else:
        #    print('cellID difference:{}'.format(np.sum(pc_data_merged['cellID']-pc_data['cellID'])))
    return pc_data_merged['analog'], pc_data_merged['digital'], pc_data_merged['cellID']

 start = datetime.now()
 p = partial(read_and_merge_module_data, maxcells, path_temp, image_name_temp,
            runs, seqs, IL_MODE, rawversion, instrument)
 # chunk this a bit, so that we don't overuse available memory
 res = list(map(p, modules))
 ```

 %% Cell type:markdown id: tags:

 ## Slope clustering and Fitting ##

 The following two cells contain the actual algorithm logic as well as a preview of a single pixel and memory cells visualizing the data and the concepts.

 We start out with calculating an estimate of the slope in proximity of a given data value. This is done by calculating the slopes of a given value with 15 neighbours and averaging the result. Values are then clustered by these slopes into three regions via a K-means algorithm.

 * for the first region a linear function is fitted to the data, determining the gain slope and offset for the high gain mode.

   $$y = mx + b$$

 * for the second and third region a composite function of the form:

  $$y = A*e^{-(x-O)/C}+mx+b$$

  is fitted, covering both the transition region and the medium gain slope.

 %% Cell type:code id: tags:

 ``` python
 from iminuit import Minuit
 from iminuit.util import describe, make_func_code
 from sklearn.cluster import KMeans


 def calc_m_cluster(x, y):
    scan_range = 15
    ms = np.zeros((x.shape[0], scan_range))
    for i in range(scan_range):
        xdiffs = x - np.roll(x, i+1)
        ydiffs = y - np.roll(y, i+1)
        m = ydiffs/xdiffs
        ms[:,i] = m
    m = np.mean(ms, axis=1)

    k = KMeans(n_clusters=3, n_jobs=-2)
    k.fit(m.reshape(-1, 1))
    ms = []
    for lbl in np.unique(k.labels_):
        xl = x[k.labels_ == lbl]
        xd = np.reshape(xl, (len(xl), 1))
        xdiff = xd - xd.transpose()

        yl = y[k.labels_ == lbl]
        yd = np.reshape(yl, (len(yl), 1))
        ydiff = yd - yd.transpose()
        ms.append(np.mean(np.nanmean(ydiff/xdiff, axis=0)))
    return ms, k.labels_, k.cluster_centers_

 def rolling_window(a, window):
    shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
    strides = a.strides + (a.strides[-1],)
    return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)


 def calc_m_cluster2(x, y, r1=5, r2=0, r3=1.5):
    scan_range = 15
    ms = np.zeros((x.shape[0], scan_range))
    for i in range(scan_range):
        xdiffs = x - np.roll(x, i+1)
        ydiffs = y - np.roll(y, i+1)
        m = ydiffs/xdiffs
        ms[:,i] = m
    m = np.mean(ms, axis=1)
-    mm = np.zeros_like(m)
-    mm[...] = np.nan
+
    m[scan_range//2:-scan_range//2+1] = np.mean(rolling_window(m, scan_range),-1)
    reg1 = m > r1
    reg2 = m < r2
    reg3 = (m > r2) & (m < r3)
    reg4 = ~(reg1 | reg2 | reg3)
    labels = [reg1, reg2, reg3, reg4]
    regions = np.zeros_like(x, np.uint8)
    for r, lbl in enumerate(labels):
        regions[lbl] = r
    scan_range = 30
    mregions = np.round(np.mean(rolling_window(regions, scan_range),-1))
-    regions[...] = np.nan
+    # change from np.nan to -1
+    regions[...] = -1
    regions[scan_range//2:-scan_range//2+1] = mregions


    labels = [regions == 0, regions == 1, regions == 2, regions == 3]

    idx = np.arange(x.size)
    maxlbl = x.size-1
    for i in range(0, len(labels)-1):
        nidx = labels[i+1]
        if np.any(nidx):
            maxlbl = np.max(idx[nidx])
            cidx = idx > maxlbl
            if np.any(cidx):
                labels[i][cidx] = False

    ms = []
    for lbl in labels:
        xl = x[lbl]
        xd = np.reshape(xl, (len(xl), 1))
        xdiff = xd - xd.transpose()

        yl = y[lbl]
        yd = np.reshape(yl, (len(yl), 1))
        ydiff = yd - yd.transpose()
        ms.append(np.mean(np.nanmean(ydiff/xdiff, axis=0)))

    return ms, labels, None

 def fit_data(fun, x, y, yerr, par_ests):
    par_ests["throw_nan"] = False
    par_ests["pedantic"] = False
    par_ests["print_level"] = 0

    f_sig = describe(fun)[1:]

    class _Chi2Functor:
        def __init__(self, f, x, y, err):
            self.f = f
            self.x = x[y != 0]
            self.y = y[y != 0]
            self.err = err[y != 0]
            f_sig = describe(f)
            # this is how you fake function
            # signature dynamically
            self.func_code = make_func_code(
                f_sig[1:])  # docking off independent variable
            self.func_defaults = None  # this keeps numpy.vectorize happy

        def __call__(self, *arg):
            # notice that it accept variable length
            # positional arguments
            # chi2 = sum((y-self.f(x,*arg))**2 for x,y in zip(self.x,self.y))
            return np.sum(((self.f(self.x, *arg) - self.y) ** 2) / self.err)

    wrapped = _Chi2Functor(fun, x, y, yerr)
    m = Minuit(wrapped, **par_ests)
    fmin = m.migrad()

    return m.values

 def lin_fun(x, m, b):
    return m*x+b

 def hook_fun(x, a, c, o, m, b):
    return a*np.exp(-(x-o)/c)+m*x+b
 ```

 %% Cell type:code id: tags:

 ``` python
 from cal_tools.tools import get_constant_from_db_and_time

 offsets = {}
 noises = {}
 thresholds = {}
 for k_da, mod in zip(karabo_da, modules):

    offset, when = get_constant_from_db_and_time(karabo_id, k_da,
                                                 Constants.AGIPD.Offset(),
                                                 Conditions.Dark.AGIPD(
                                                     memory_cells=mem_cells,
                                                     bias_voltage=bias_voltage,
                                                     acquisition_rate=acq_rate,
                                                     gain_setting=gain_setting,
                                                     integration_time=integration_time),
                                                 np.zeros((128, 512, mem_cells, 3)),
                                                 cal_db_interface,
                                                 creation_time=creation_time)
    offsets[mod] = np.array(offset.data)

    noise, when = get_constant_from_db_and_time(karabo_id, k_da,
                                                Constants.AGIPD.Noise(),
                                                Conditions.Dark.AGIPD(
                                                    memory_cells=mem_cells,
                                                    bias_voltage=bias_voltage,
                                                    acquisition_rate=acq_rate,
                                                    gain_setting=gain_setting,
                                                    integration_time=integration_time),
                                                np.zeros((128, 512, mem_cells, 3)),
                                                cal_db_interface, creation_time=creation_time)
    noises[mod] = np.array(noise.data)

    threshold, when = get_constant_from_db_and_time(karabo_id, k_da,
                                                    Constants.AGIPD.ThresholdsDark(),
                                                    Conditions.Dark.AGIPD(
                                                        memory_cells=mem_cells,
                                                        bias_voltage=bias_voltage,
                                                        acquisition_rate=acq_rate,
                                                        gain_setting=gain_setting,
                                                        integration_time=integration_time),
                                                    np.zeros((128, 512, mem_cells, 3)),
                                                    cal_db_interface, creation_time=creation_time)
    thresholds[mod] = np.array(threshold.data)
 ```

 %% Cell type:code id: tags:

 ``` python
 test_pixels = []
 tpix_range1 = [(0,16), (0,64)]
 for i in range(*tpix_range1[0]):
    for j in range(*tpix_range1[1]):
        test_pixels.append((j,i))
 test_cells = [4, 38, 64, 128]#, 200, 249]
 tcell = np.array(test_cells)
 tcell = tcell[tcell < mem_cells]
 if tcell.size == 0:
    test_cells = [mem_cells-1]
 else:
    test_cells = tcell.tolist()

 from mpl_toolkits.axes_grid1 import ImageGrid
 for mod, r in zip(modules, res):
    dig, ana, cellId = r
    d = []
    d2 = []
    d3 = []

    H = [0, 0, 0, 0]

    ex, ey = None, None
    offset = offsets[mod]
    for pix in test_pixels:
        for cell in test_cells:
            color = np.random.rand(3,1)

            x = np.arange(dig.shape[0])
            y = dig[:,cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]
            if x.shape[0] == 0:
                continue

            ms, labels, centers = calc_m_cluster2(x, y)
            bound = None
            bound_m = None
            markers = ['o','.','x','v']
            colors = ['b', 'r', 'g', 'k']
            ymin = y.min()

            for i, lbl in enumerate(labels):
                if np.any(lbl):
                    #ym = y[lbl]-y[lbl].min()
                    if i == 0:
                        gain = 0
                    else:
                        gain = 1
                    ym = y[lbl] - offset[pix[0], pix[1], cell, gain]
                    #if i != 0:
                    #    ym += y[labels[0]].max()-y[labels[0]].min()
                    h, ex, ey = np.histogram2d(x[lbl], ym, range=((0, 600), (-500, 6000)), bins=(300, 650))
                    H[i] += h



    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111)
    for i in range(3):
        H[i][H[i]==0] = np.nan
    ax.imshow(H[0].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    ax.imshow(H[1].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='spring', alpha=0.7, vmin=0, vmax=100)
    ax.imshow(H[2].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD response (ADU)")
    ax.set_xlabel("PC scan point (#)")
 ```

 %% Cell type:markdown id: tags:

 ### Examples from Pixel Subset ###

 The follwing is an visualization of the clustering and fitting for a subset of pixels. If data significantly mismatches expectations, the clustering and fitting algorithms should fail for this subset:

 * the first plot shows the clustering results for pixels which were sucessfully evaluated
 * the second plot shows the clustering results for pixels which failed to evaluate
 * the third plot shows the fits and fit residuals for the pixel clusters shown in the first plot

 Non-smooth behaviour is an indication that you are errorously processing interleaved data that is not, or vice versa, or have the wrong number of memory cells set.

 We do this twice for different detector regions

 %% Cell type:code id: tags:

 ``` python
 test_pixels = []
 tpix_range1 = [(250,254), (60,64)]
 for i in range(*tpix_range1[0]):
    for j in range(*tpix_range1[1]):
        test_pixels.append((j,i))
 test_cells = [4, 38]
 tcell = np.array(test_cells)
 tcell = tcell[tcell < mem_cells]
 if tcell.size == 0:
    test_cells = [mem_cells-1]
 else:
    test_cells = tcell.tolist()

 for mod, r in zip(modules, res):
    dig, ana, cellId = r
    d = []
    d2 = []
    d3 = []
    offset = offsets[mod]
    noise = noises[mod]
    for pix in test_pixels:
        for cell in test_cells:
            color = np.random.rand(3,1)

            x = np.arange(dig.shape[0])
            y = dig[:,cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]

            ms, labels, centers = calc_m_cluster2(x, y)
            bound = None
            bound_m = None
            markers = ['o','.','x','v']
            colors = ['b', 'r', 'g', 'k']
            for i, lbl in enumerate(labels):
                if i == 0:
                    gain = 0
                else:
                    gain = 1
                d.append({'x': x[lbl],
                  'y': y[lbl] - offset[pix[0], pix[1], cell, gain],
                  'marker': markers[i],
                  'color': colors[i],
                  'linewidth': 0
                 })
                #if ms[i] < 0: # slope separating two regions
                #    bound = np.min(x[lbl])
                #    bound_m = ms[i]
            if labels[1].any():
                bound = np.min(x[labels[1]])
                bound_m = ms[1]
            if bound is None or bound < 20 and False:
                ya = ana[:,cell, pix[0], pix[1]][vidx]
                msa, labels, centers = calc_m_cluster2(x, ya, 25, -10, 25)
                if np.count_nonzero(labels[0]) > 0:

                    bound = np.min(x[labels[0]])
                    bound_m = ms[3]
                else:
                    avg_g = np.nanmean(ya)
                    bound = np.max(x[y < avg_g])
                    bound_m = ms[3]

            #print(bound)
            # fit linear slope
            if not np.isnan(bound_m):
-                xl = x[(x<bound)]
-                yl = y[(x<bound)] - offset[pix[0], pix[1], cell, 0]
-                parms = {'m': bound_m, 'b': np.min(yl)}
-
-                errors = np.ones(xl.shape)*noise[pix[0], pix[1], cell, 0]
-                fitted = fit_data(lin_fun, xl, yl, errors , parms)
-                yf = lin_fun(xl, fitted['m'], fitted['b'])
-                max_devl = np.max(np.abs((yl-yf)/yl))
-
-                d3.append({'x': xl,
-                          'y': yf,
-                          'color': 'k',
-                          'linewidth': 1,
-                           'y2': (yf-yl)/errors
-                         })
+                xl = x[(x<bound-20)]
+                yl = y[(x<bound-20)] - offset[pix[0], pix[1], cell, 0]
+                if yl.shape[0] != 0:
+                    parms = {'m': bound_m, 'b': np.min(yl)}
+
+                    errors = np.ones(xl.shape)*noise[pix[0], pix[1], cell, 0]
+                    fitted = fit_data(lin_fun, xl, yl, errors , parms)
+                    yf = lin_fun(xl, fitted['m'], fitted['b'])
+                    max_devl = np.max(np.abs((yl-yf)/yl))
+
+                    d3.append({'x': xl,
+                              'y': yf,
+                              'color': 'k',
+                              'linewidth': 1,
+                               'y2': (yf-yl)/errors
+                             })
            # fit hook slope
            if fit_hook:
                idx = (x >= bound) & (y > 0) & np.isfinite(x) & np.isfinite(y)
                xh = x[idx]
                yh = y[idx] - offset[pix[0], pix[1], cell, 1]
                if len(yh[yh > 0]) == 0:
                    break
                parms = {'m': bound_m/10 if bound_m/10>0.3 else 0.5, 'b': np.min(yh[yh > 0]), 'a': np.max(yh), 'c': 5, 'o': bound-1}
-                parms["limit_m"] = [0.3, 1.0]
+                parms["limit_m"] = [0.3, 2.0]
                parms["limit_c"] = [1., 1000]
                errors = np.ones(xh.shape)*noise[pix[0], pix[1], cell, 1]
                fitted = fit_data(hook_fun, xh, yh, errors, parms)
                yf = hook_fun(xh, fitted['a'], fitted['c'], fitted['o'], fitted['m'], fitted['b'])

                max_devh = np.max(np.abs((yh-yf)/yh))
                #print(fitted)
                d3.append({'x': xh,
                          'y': yf,
                          'color': 'red',
                          'linewidth': 1,
                          'y2': (yf-yh)/errors
                         })

            x = np.arange(ana.shape[0])
            y = ana[:,cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]

            #ms, labels, centers = calc_m_cluster2(x, y, 25, -10, 25)
            if len(y[labels[0]]) != 0 and len(y[labels[2]]) != 0:
                threshold = (np.mean(y[labels[0]])+np.mean(y[labels[2]]))/2

            for i, lbl in enumerate(labels):

                d2.append({'x': x[lbl],
                  'y': y[lbl],
                  'marker': markers[i],
                  'color': colors[i],
                  'lw': None

                 })

                d2.append({'x': np.array([x[0], x[-1]]),
                  'y': np.ones(2)*threshold,

                  'color': 'k',
                  'lw': 1

                 })

            #threshold = (np.min(y[x<bound]) + np.max(y[x>=bound]))/2


    fig = xana.simplePlot(d, y_label="PC pixel signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #")
    fig.savefig("{}/module_{}_pixel_plot.png".format(out_folder, mod))

    fig = xana.simplePlot(d2, y_label="PC gain signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #")
    fig.savefig("{}/module_{}_pixel_plot_gain.png".format(out_folder, mod))

    fig = xana.simplePlot(d3, secondpanel=True, y_label="PC signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #", y2_label="Residuals ($\sigma$)", y2_range=(-5,5))
    fig.savefig("{}/module_{}_pixel_plot_fits.png".format(out_folder, mod))
 ```

 %% Cell type:code id: tags:

 ``` python
 test_pixels = []
 tpix_range2 = [(96,128), (32,64)]
 for i in range(*tpix_range2[0]):
    for j in range(*tpix_range2[1]):
        test_pixels.append((j,i))

 for mod, r in zip(modules, res):
    dig, ana, cellId = r
    d = []
    d2 = []
    d3 = []
    offset = offsets[mod]
    noise = noises[mod]
    for pix in test_pixels:
        for cell in test_cells:
            color = np.random.rand(3,1)

            x = np.arange(dig.shape[0])
            y = dig[:,cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]

            ms, labels, centers = calc_m_cluster2(x, y)
            bound = None
            bound_m = None
            markers = ['o','.','x','v']
            colors = ['b', 'r', 'g', 'k']
            for i, lbl in enumerate(labels):
                if i == 0:
                    gain = 0
                else:
                    gain = 1
                d.append({'x': x[lbl],
                  'y': y[lbl] - offset[pix[0], pix[1], cell, gain],
                  'marker': markers[i],
                  'color': colors[i],
                  'linewidth': 0
                 })
                #if ms[i] < 0: # slope separating two regions
                #    bound = np.min(x[lbl])
                #    bound_m = ms[i]
            if len(x[labels[1]]):
                bound = np.min(x[labels[1]])
                bound_m = ms[1]

                # fit linear slope
                idx = (x >= bound) & (y > 0) & np.isfinite(x) & np.isfinite(y)
-                xl = x[(x<bound)]
-                yl = y[(x<bound)] - offset[pix[0], pix[1], cell, 0]
+                xl = x[(x<bound-20)]
+                yl = y[(x<bound-20)] - offset[pix[0], pix[1], cell, 0]
                errors = np.ones(xl.shape)*noise[pix[0], pix[1], cell, 0]
-                parms = {'m': bound_m, 'b': np.min(yl)}
-                fitted = fit_data(lin_fun, xl, yl, errors, parms)
+                if yl.shape[0] != 0:
+                    parms = {'m': bound_m, 'b': np.min(yl)}
+                    fitted = fit_data(lin_fun, xl, yl, errors, parms)

-                yf = lin_fun(xl, fitted['m'], fitted['b'])
-                max_devl = np.max(np.abs((yl-yf)/yl))
+                    yf = lin_fun(xl, fitted['m'], fitted['b'])
+                    max_devl = np.max(np.abs((yl-yf)/yl))

                xtt = np.arange(ana.shape[0])
                ytt = ana[:,cell, pix[0], pix[1]]

                vidx = (ytt > 1000) & np.isfinite(ytt)
                xtt = xtt[vidx]
                ytt = ytt[vidx]

                #ms, labels, centers = calc_m_cluster2(x, y, 25, -10, 25)
                if len(y[labels[0]]) != 0 and len(y[labels[2]]) != 0:
                    threshold = (np.mean(ytt[labels[0]])+np.mean(ytt[labels[2]]))/2

                if threshold > 10000 or threshold < 4000:
                    d3.append({
                        'x': xl,
                        'y': yf,
                        'color': 'k',
                        'linewidth': 1,
                        'y2': (yf-yl)/errors
                             })

            if bound is None:
                ya = ana[:,cell, pix[0], pix[1]][vidx]
                msa, labels, centers = calc_m_cluster2(x, ya, 25, -10, 25)
                if np.count_nonzero(labels[0]) > 0:
                    bound = np.min(x[labels[0]])
                    bound_m = ms[3]
                else:
                    avg_g = np.nanmean(ya)
                    bound = np.max(x[y < avg_g])
                    bound_m = ms[3]

            # fit hook slope
            try:
                if fit_hook and len(yh[yh > 0]) !=0:
                    idx = (x >= bound) & (y > 0) & np.isfinite(x) & np.isfinite(y)
                    xh = x[idx]
                    yh = y[idx] - offset[pix[0], pix[1], cell, 1]
                    errors = np.ones(xh.shape)*noise[pix[0], pix[1], cell, 1]
                    parms = {
                        'm': np.abs(bound_m/10),
                        'b': np.min(yh[yh > 0]),
                        'a': np.max(yh),
                        'c': 5.,
                        'o': bound-1
                            }
-                    parms["limit_m"] = [0.3, 1.0]
+                    parms["limit_m"] = [0.3, 2.0]
                    parms["limit_c"] = [1., 1000]
                    fitted = fit_data(hook_fun, xh, yh, errors, parms)
                    yf = hook_fun(xh, fitted['a'], fitted['c'], fitted['o'], fitted['m'], fitted['b'])
                    max_devh = np.max(np.abs((yh-yf)/yh))
                    #print(fitted)
                    if threshold > 10000 or threshold < 4000 or fitted['m'] < 0.2:
                        d3.append({
                            'x': xh,
                            'y': yf,
                            'color': 'red',
                            'linewidth': 1,
                            'y2': (yf-yh)/errors
                         })
            except Exception as e:
                if "zero-size array" in str(e):
                    pass
                else:
                    print(e)

            if threshold > 10000 or threshold < 4000:
                for i, lbl in enumerate(labels):
                    d2.append({
                        'x': xtt[lbl],
                        'y': ytt[lbl],
                        'marker': markers[i],
                        'color': colors[i],
                        'lw': None
                     })

                    d2.append({'x': np.array([xtt[0], xtt[-1]]),
                      'y': np.ones(2)*threshold,
                      'color': 'k',
                      'lw': 1
                     })

            #threshold = (np.min(y[x<bound]) + np.max(y[x>=bound]))/2
    fig = xana.simplePlot(d, y_label="PC pixel signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #")
    fig.savefig("{}/module_{}_pixel_plot_fail.png".format(out_folder, mod))

    fig = xana.simplePlot(d2, y_label="PC gain signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #")
    fig.savefig("{}/module_{}_pixel_plot_gain_fail.png".format(out_folder, mod))

    fig = xana.simplePlot(d3, secondpanel=True, y_label="PC signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #", y2_label="Residuals ($\sigma$)", y2_range=(-5,5))
    fig.savefig("{}/module_{}_pixel_plot_fits_fail.png".format(out_folder, mod))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Here we perform the calculations in column-parallel for all modules
 def calibrate_single_row(cells, fit_hook, inp):

    import numpy as np
    from iminuit import Minuit
    from iminuit.util import describe, make_func_code
    from sklearn.cluster import KMeans

    yrd, yra, offset, noise = inp

    def rolling_window(a, window):
        shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
        strides = a.strides + (a.strides[-1],)
        return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)


    def calc_m_cluster2(x, y, r1=5, r2=0, r3=1.5):
        scan_range = 15
        ms = np.zeros((x.shape[0], scan_range))
        for i in range(scan_range):
            xdiffs = x - np.roll(x, i+1)
            ydiffs = y - np.roll(y, i+1)
            m = ydiffs/xdiffs
            ms[:,i] = m
        m = np.mean(ms, axis=1)
-        mm = np.zeros_like(m)
-        mm[...] = np.nan
        m[scan_range//2:-scan_range//2+1] = np.mean(rolling_window(m, scan_range),-1)
        reg1 = m > r1
        reg2 = m < r2
        reg3 = (m > r2) & (m < r3)
        reg4 = ~(reg1 | reg2 | reg3)
        labels = [reg1, reg2, reg3, reg4]
        regions = np.zeros_like(x, np.uint8)
        for r, lbl in enumerate(labels):
            regions[lbl] = r
        scan_range = 30
        mregions = np.round(np.mean(rolling_window(regions, scan_range),-1))
-        regions[...] = np.nan
+        # chanage from np.nan to -1
+        regions[...] = -1
        regions[scan_range//2:-scan_range//2+1] = mregions


        labels = [regions == 0, regions == 1, regions == 2, regions == 3]

        idx = np.arange(x.size)
        maxlbl = x.size-1
        for i in range(0, len(labels)-1):
            nidx = labels[i+1]
            if np.any(nidx):
                maxlbl = np.max(idx[nidx])
                cidx = idx > maxlbl
                if np.any(cidx):
                    labels[i][cidx] = False

        ms = []
        for lbl in labels:
            xl = x[lbl]
            xd = np.reshape(xl, (len(xl), 1))
            xdiff = xd - xd.transpose()

            yl = y[lbl]
            yd = np.reshape(yl, (len(yl), 1))
            ydiff = yd - yd.transpose()
            ms.append(np.mean(np.nanmean(ydiff/xdiff, axis=0)))

        return ms, labels, None

    def fit_data(fun, x, y, yerr, par_ests):
        par_ests["throw_nan"] = False
        par_ests["pedantic"] = False
        par_ests["print_level"] = 0

        f_sig = describe(fun)[1:]

        class _Chi2Functor:
            def __init__(self, f, x, y, err):
                self.f = f
                self.x = x
                self.y = y
                self.err = err
                f_sig = describe(f)
                # this is how you fake function
                # signature dynamically
                self.func_code = make_func_code(
                    f_sig[1:])  # docking off independent variable
                self.func_defaults = None  # this keeps numpy.vectorize happy

            def __call__(self, *arg):
                # notice that it accept variable length
                # positional arguments
                # chi2 = sum((y-self.f(x,*arg))**2 for x,y in zip(self.x,self.y))
                return np.sum(((self.f(self.x, *arg) - self.y) ** 2) / self.err)

        wrapped = _Chi2Functor(fun, x, y, yerr)
        m = Minuit(wrapped, **par_ests)
        fmin = m.migrad()

        return m.values

    def lin_fun(x, m, b):
        return m*x+b

    def hook_fun(x, a, c, o, m, b):
        return a*np.exp(-(x-o)/c)+m*x+b

    # linear slope
    ml = np.zeros(yrd.shape[1:])
    bl = np.zeros(yrd.shape[1:])
    devl = np.zeros(yrd.shape[1:])
    ml[...] = np.nan
    bl[...] = np.nan
    devl[...] = np.nan

    #hook function
    mh = np.zeros(yrd.shape[1:])
    bh = np.zeros(yrd.shape[1:])
    ch = np.zeros(yrd.shape[1:])
    oh = np.zeros(yrd.shape[1:])
    ah = np.zeros(yrd.shape[1:])
    devh = np.zeros(yrd.shape[1:])
    dhm = np.zeros(yrd.shape[1:])
    mh[...] = np.nan
    bh[...] = np.nan
    ch[...] = np.nan
    oh[...] = np.nan
    ah[...] = np.nan
    devh[...] = np.nan
    dhm[...] = np.nan

    # threshold
    thresh = np.zeros(list(yrd.shape[1:])+[3,])
    thresh[...] = np.nan
    failures = []

    for col in range(yrd.shape[-1]):
        try:
            y = yrd[:,col]
            x = np.arange(y.shape[0])

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]

            ms, labels, centers = calc_m_cluster2(x, y)

            bound = np.min(x[labels[1]])
            bound_m = ms[1]

            # fit linear slope
-            xl = x[x<bound]
-            yl = y[x<bound] - offset[col, 0]
+            xl = x[x<bound-20]
+            yl = y[x<bound-20] - offset[col, 0]
            errors = np.ones(xl.shape)*noise[col, 0]
-            parms = {'m': bound_m, 'b': np.min(yl)}
-            fitted = fit_data(lin_fun, xl, yl, errors, parms)
-            yf = lin_fun(xl, fitted['m'], fitted['b'])
-            max_devl = np.median(np.abs((yl-yf)/yl))
+            if yl.shape[0] != 0:
+                parms = {'m': bound_m, 'b': np.min(yl)}
+                fitted = fit_data(lin_fun, xl, yl, errors, parms)
+                yf = lin_fun(xl, fitted['m'], fitted['b'])
+                max_devl = np.median(np.abs((yl-yf)/yl))
            ml[col] = fitted['m']
            bl[col] = fitted['b']
            devl[col] = max_devl
            #if np.any(labels[0]) and np.any(labels[2]):
                #dhm[col] = y[labels[0]].max()-y[labels[2]].min()
            dhml = lin_fun(bound, fitted['m'], fitted['b'])
            # fit hook slope
            if fit_hook:
                idx = (x >= bound) & (y > 0) & np.isfinite(x) & np.isfinite(y)
                xh = x[idx]
                yh = y[idx] - offset[col, 1]
                errors = np.ones(xh.shape)*noise[col, 1]
                parms = {'m': bound_m/10 if bound_m/10 > 0.3 else 0.5, 'b': np.min(yh[yh > 0]), 'a': np.max(yh), 'c': 5., 'o': bound-1}
-                parms["limit_m"] = [0.3, 1.0]
+                parms["limit_m"] = [0.3, 2.0]
                parms["limit_c"] = [1., 1000]
                fitted = fit_data(hook_fun, xh, yh, errors, parms)
                yf = hook_fun(xh, fitted['a'], fitted['c'], fitted['o'], fitted['m'], fitted['b'])
                max_devh = np.median(np.abs((yh-yf)/yh))

                mh[col] = fitted['m']
                bh[col] = fitted['b']
                ah[col] = fitted['a']
                oh[col] = fitted['o']
                ch[col] = fitted['c']
                devh[col] = max_devh
                dhm[col] = bound #(dhml) - lin_fun(bound, fitted['m'], fitted['b'])

            y = yra[:,col]
            x = np.arange(y.shape[0])

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]

            threshold = (np.mean(y[labels[0]])+np.mean(y[labels[2]]))/2
            thresh[col,0] = threshold
            thresh[col,1] = np.mean(y[labels[0]])
            thresh[col,2] = np.mean(y[labels[2]])
        except Exception as e:
            print(e)
            failures.append((col, str(e)))
    del yrd
    del yra
    return thresh, (ml, bl, devl), (mh, bh, ah, oh, ch, devh), failures, dhm

 start = datetime.now()
 fres = {}
 failures = []
 for i, r in zip(modules, res):
    offset = offsets[i]
    noise = noises[i]
    qm = module_index_to_qm(i)
    dig, ana, cellId = r


    # linear slope
    ml = np.zeros(dig.shape[1:])
    bl = np.zeros(dig.shape[1:])
    devl = np.zeros(dig.shape[1:])

    #hook function
    mh = np.zeros(dig.shape[1:])
    bh = np.zeros(dig.shape[1:])
    ch = np.zeros(dig.shape[1:])
    oh = np.zeros(dig.shape[1:])
    ah = np.zeros(dig.shape[1:])
    devh = np.zeros(dig.shape[1:])
    dhma = np.zeros(dig.shape[1:])

    # threshold
    thresh = np.zeros(list(dig.shape[1:]))
    thresh_bounds = np.zeros(list(dig.shape[1:])+[2,])

    for cell in range(dig.shape[1]):
        inp = []
        for j in range(dig.shape[2]):
            inp.append((dig[:,cell,j,:], ana[:,cell,j,:], offset[j,:,cell,:], noise[j,:,cell,:]))

        p = partial(calibrate_single_row, cells, fit_hook)
        #print("Running {} tasks in parallel".format(len(inp)))
        frs = view.map_sync(p, inp)
        #frs = list(map(p, inp))

        for j, fr in enumerate(frs):
            threshr, lin, hook, fails, dhm = fr
            mlr, blr, devlr = lin
            mhr, bhr, ahr, ohr, chro, devhr = hook
            failures.append(fails)

            ml[cell,j,:] = mlr
            bl[cell,j,:] = blr
            devl[cell,j,:] = devlr

            mh[cell,j,:] = mhr
            bh[cell,j,:] = bhr
            oh[cell,j,:] = ohr
            ch[cell,j,:] = chro
            ah[cell,j,:] = ahr
            devh[cell,j,:] = devhr
            dhma[cell,j,:] = dhm

            thresh[cell,j,...] = threshr[...,0]
            thresh_bounds[cell,j,...] = threshr[...,1:]

    fres[qm] = {'ml': ml,
                'bl': bl,
                'devl': devl,
                'tresh': thresh,
                'tresh_bounds': thresh_bounds,
                'dhm': dhma}
    if fit_hook:
            fres[qm].update({
                'mh': mh,
                'bh': bh,
                'oh': oh,
                'ch': ch,
                'ah': ah,
                'devh': devh,
               })
 ```

 %% Cell type:markdown id: tags:

 Results of slope fitting from PC runs values are
 distinguished on axis 0 by index:

 0: linear slope - m value
 1: linear slope - b value
 2: linear slope - deviation
 3: hook function - m value
 4: hook function - b value
 5: hook function - o value
 6: hook function - c value
 7: hook function - a value
 8: hook function - deviation

 %% Cell type:code id: tags:

 ``` python
 def slope_dict_to_arr(d):
    key_to_index = {
        "ml": 0,
        "bl": 1,
        "devl": 2,
        "mh": 3,
        "bh": 4,
        "oh": 5,
        "ch": 6,
        "ah": 7,
        "devh": 8,
        "tresh": 9,

    }
    arr = np.zeros([11]+list(d["ml"].shape), np.float32)
    for key, item in d.items():
        if key not in key_to_index:
            continue
        arr[key_to_index[key],...] = item

    return arr
 ```

 %% Cell type:code id: tags:

 ``` python
 from collections import OrderedDict

 bad_pixels = OrderedDict()
 for qm, data in fres.items():
    mask = np.zeros(data['ml'].shape, np.uint32)
    mask[(data['tresh'][...,0] < 50) | (data['tresh'][...,0] > 8500)] |= BadPixels.CI_GAIN_OF_OF_THRESHOLD.value
    mask[(data['devl'] == 0)] |= BadPixels.CI_LINEAR_DEVIATION.value
    mask[(np.abs(data['devl']) > 0.5)] |= BadPixels.CI_LINEAR_DEVIATION.value
    mask[(~np.isfinite(data['devl']))] |= BadPixels.CI_EVAL_ERROR.value
    bad_pixels[qm] = mask
 ```

 %% Cell type:code id: tags:

 ``` python
 if local_output:
    ofile = "{}/agipd_pc_store_{}_{}_{}.h5".format(out_folder, "_".join([str(run) for run in runs]), modules[0], modules[-1])
    store_file = h5py.File(ofile, "w")
    for qm, r in fres.items():
        for key, item in r.items():
            store_file["/{}/{}/0/data".format(qm, key)] = item
        #arr = slope_dict_to_arr(r)
        #store_file["/{}/SlopesPC/0/data".format(qm)] = arr
        store_file["/{}/{}/0/data".format(qm, "BadPixelsPC")] = bad_pixels[qm]
    store_file.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read report path and create file location tuple to add with the injection
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = proposal + ' ' + ' '.join(list(map(str,runs)))

 report = get_report(out_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 md = None

 # set the operating condition
 condition = Conditions.Dark.AGIPD(memory_cells=maxcells, bias_voltage=bias_voltage,
                                  acquisition_rate=acq_rate, gain_setting=gain_setting)

 db_modules = get_pdu_from_db(karabo_id, karabo_da, Constants.AGIPD.SlopesPC(),
                             condition, cal_db_interface,
                             snapshot_at=creation_time)

 for pdu, (qm, r) in zip(db_modules, fres.items()):
    for const in ["SlopesPC", "BadPixelsPC"]:

        dbconst = getattr(Constants.AGIPD, const)()

        if const == "SlopesPC":
            dbconst.data = slope_dict_to_arr(r)
        else:
            dbconst.data = bad_pixels[qm]

        if db_output:
            md = send_to_db(pdu, karabo_id, dbconst, condition,
                            file_loc, report, cal_db_interface,
                            creation_time=creation_time)
        # TODO: check if this can replace other written function of this notebook.
        #if local_output:
        #    md = save_const_to_h5(pdu, karabo_id, dconst, condition, dconst.data,
        #                          file_loc, report, creation_time, out_folder)

 print("Constants parameter conditions are:\n")
 print(f"• memory_cells: {maxcells}\n• bias_voltage: {bias_voltage}\n"
      f"• acquisition_rate: {acq_rate}\n• gain_setting: {gain_setting}\n"
      f"• integration_time: {integration_time}\n"
      f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:markdown id: tags:

 ## Overview Plots ##

 Each of the following plots represents one of the fit parameters of memory cell 4 on a module:

 For the linear function of the high gain region

   $$y = mx + b$$

 * ml denotes the $m$ parameter
 * bl denotes the $b$ parameter
 * devl denotes the anbsolute relative deviation from linearity.

 For the composite function of the medium gain and transition region

  $$y = A*e^{-(x-O)/C}+mx+b$$

 * oh denotes the $O$ parameter
 * ch denotes the $C$ parameter
 * mh denotes the $m$ parameter
 * bh denotes the $b$ parameter
 * devh denotes the anbsolute relative deviation from the linear part of the function.

 Additionally, the thresholds and bad pixels (mask) are shown.

 Finally, the red and white rectangles indicate the first and second pixel ranges

 %% Cell type:code id: tags:

 ``` python
 import matplotlib.patches as patches
 import matplotlib.pyplot as plt
 from mpl_toolkits.axes_grid1 import AxesGrid

 cell_to_preview = min(59, mem_cells-1)
 for module, data in fres.items():
    fig = plt.figure(figsize=(20,20))
    grid = AxesGrid(fig, 111,
                    nrows_ncols=(7 if fit_hook else 3, 2),
                    axes_pad=(0.9, 0.15),
                    label_mode="1",
                    share_all=True,
                    cbar_location="right",
                    cbar_mode="each",
                    cbar_size="7%",
                    cbar_pad="2%",
                    )


    mask = bad_pixels[module]

    i = 0
    for key, citem in data.items():
        item = citem.copy()
        item[~np.isfinite(item)] = 0
        med = np.nanmedian(item)
        bound = 0.1
        maxcnt = 10
        if med < 0:
            bound = -bound

        while(np.count_nonzero((item < med-bound*med) | (item > med+bound*med))/item.size > 0.01):
            bound *=2
            maxcnt -= 1
            if maxcnt < 0:
                break


        if "bounds" in key:
            d = item[cell_to_preview,...,0]
            im = grid[i].imshow(d, interpolation="nearest",
                               vmin=med-bound*med, vmax=med+bound*med)
        else:
            d = item[cell_to_preview,...]
            im = grid[i].imshow(d, interpolation="nearest",
                               vmin=med-bound*med, vmax=med+bound*med)
        cb = grid.cbar_axes[i].colorbar(im)

        # axes coordinates are 0,0 is bottom left and 1,1 is upper right
        x0, x1 = tpix_range1[0][0], tpix_range1[0][1]
        y0, y1 = tpix_range1[1][0], tpix_range1[1][1]
        p = patches.Rectangle(
            (x0, y0), x1-x0, y1-y0, fill=False, color="red")

        grid[i].add_patch(p)

        x0, x1 = tpix_range2[0][0], tpix_range2[0][1]
        y0, y1 = tpix_range2[1][0], tpix_range2[1][1]
        p = patches.Rectangle(
            (x0, y0), x1-x0, y1-y0, fill=False, color="white")

        grid[i].add_patch(p)

        grid[i].text(20, 50, key, color="w", fontsize=50)

        i += 1

    im = grid[-1].imshow(mask[cell_to_preview,...], interpolation="nearest",
                           vmin=0, vmax=1)
    cb = grid.cbar_axes[-1].colorbar(im)

    grid[-1].text(20, 50, "mask", color="w", fontsize=50)
    fig.savefig("{}/module_{}_PC.png".format(out_folder, module))
 ```

 %% Cell type:markdown id: tags:

 ### Memory Cell dependent behavior of thresholding ###

 %% Cell type:code id: tags:

 ``` python
 mltomh = ml/mh
 fres[qm].update({'mltomh': mltomh})
 toplot = {"tresh": "Gain theshold (ADU)",
          "ml": "Slope (HG)",
          "bl": "Offset (HG) (ADU)",
          "mh": "Slope (MG)",
          "bh": "Offset (MG) (ADU)",
          "mltomh": "Ration slope_HG/slope_MG"}
 from matplotlib.colors import LogNorm, PowerNorm

 for module, data in fres.items():

    bins = 100

    for typ, label in toplot.items():
        r_hist = np.zeros((mem_cells, bins))
        mask = bad_pixels[module]
        thresh = data[typ]
        hrange = [0.5*np.nanmedian(thresh), 1.5*np.nanmedian(thresh)]
        if hrange[1] < hrange[0]:
            hrange = hrange[::-1]
        for c in range(mem_cells):
            d = thresh[c,...]
            h, e = np.histogram(d.flatten(), bins=bins, range=hrange)
            r_hist[c, :] = h
        fig = plt.figure(figsize=(5,5))
        ax = fig.add_subplot(111)
        im = ax.imshow(r_hist[:,:].T[::-1,:], interpolation="nearest",
                  aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(r_hist)),
                 extent=[0, mem_cells, hrange[0], hrange[1]])
        ax.set_xlabel("Memory cell")
        ax.set_ylabel(label)
        cb = fig.colorbar(im)
        cb.set_label("Counts")
    #fig.savefig("/gpfs/exfel/data/scratch/haufs/test/agipd_gain_threholds.pdf", bbox_inches="tight")
 ```

 %% Cell type:markdown id: tags:

 ## Global Bad Pixel Behaviour ##

 The following plots show the results of bad pixel evaluation for all evaluated memory cells. Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2. This excludes single bad pixels present only in disconnected pixels. Hence, any bad pixels spanning at least 2 pixels in the x/y-plane, or across at least two memory cells are indicated. Colors encode the bad pixel type, or mixed type.

 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.CI_GAIN_OF_OF_THRESHOLD.value: (BadPixels.CI_GAIN_OF_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.CI_EVAL_ERROR.value: (BadPixels.CI_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.CI_GAIN_OF_OF_THRESHOLD.value | BadPixels.OFFSET_OUT_OF_THRESHOLD.value: ('MIXED', '#DD00DD80')}

 rebin = 2 if not high_res_badpix_3d else 1

 gain = 0
 for mod, data in bad_pixels.items():
    plot_badpix_3d(np.moveaxis(data, 0, 2), cols, title=mod, rebin_fac=rebin, azim=60.)
 ```

 %% Cell type:code id: tags:

 ``` python
 one_photon = 55 # ADU
 test_pixels = []
 tpix_range1 = [(0,8), (0,8)]
 for i in range(*tpix_range1[0]):
    for j in range(*tpix_range1[1]):
        test_pixels.append((j,i))
 test_cells = [4, 38, 64, 128, 200, 249]
 tcell = np.array(test_cells)
 tcell = tcell[tcell < mem_cells]
 if tcell.size == 0:
    test_cells = [mem_cells-1]
 else:
    test_cells = tcell.tolist()
 from mpl_toolkits.axes_grid1 import ImageGrid
 for mod, r in zip(modules, res):
    dig, ana, cellId = r
    d = []
    d2 = []
    d3 = []

    H = [0, 0, 0, 0]
    H2 = [0, 0, 0, 0]
    Ha = [0, 0, 0, 0]
    qm = module_index_to_qm(mod)
    cdata = fres[qm]
    ex, ey, ea = None, None, None
    medml = np.nanmean(cdata['ml'])
    medmh = np.nanmean(cdata['mh'][cdata['mh']> 0.5])
    offset = offsets[mod]
    threshold = thresholds[mod]

    medth = np.nanmean(threshold[...,0])
    for pix in test_pixels:
        for cell in test_cells:
            color = np.random.rand(3,1)

            x = np.arange(dig.shape[0])
            y = dig[:,cell, pix[0], pix[1]]
            a = ana[:,cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]
            a = a[vidx]

            ms, labels, centers = calc_m_cluster2(x, y)
            bound = None
            bound_m = None
            markers = ['o','.','x','v']
            colors = ['b', 'r', 'g', 'k']
            ymin = y.min()

            amin = a[labels[2]].min()
            for i, lbl in enumerate(labels):

                if np.any(lbl):
                    if i == 0:
                        cm = (cdata['ml'][cell, pix[0], pix[1]]/medml)

                        o = offset[pix[0], pix[1], cell, 0]
                        ym = (y[lbl]-o)/cm

                    elif i >= 1:
                        mh = cdata['mh'][cell, pix[0], pix[1]]
                        ml = cdata['ml'][cell, pix[0], pix[1]]
                        cml = ml/medml
                        cmh = mh/medmh
                        cm = medml/medmh
                        oh = cdata['bh'][cell, pix[0], pix[1]]
                        o = offset[pix[0], pix[1], cell, 1] + oh

                        ym = (y[lbl]-o)/cmh*cm

                        if i == 1:
                            ah = cdata['ah'][cell, pix[0], pix[1]]
                            ch = cdata['ch'][cell, pix[0], pix[1]]
                            ohh = cdata['oh'][cell, pix[0], pix[1]]
                            tx = ch * np.log(ah/(y[lbl]-o))+ohh

                            chook  = (ah*np.exp(-(tx-ohh)/ch) - mh*tx)/cmh*cm

                            ym -= chook

                    h, ex, ey = np.histogram2d(x[lbl], ym/one_photon, range=((0, 600), (0, 15000/one_photon)), bins=(300, 600))
                    H[i] += h

            labels = [a < threshold[pix[0], pix[1], cell,0], a >= threshold[pix[0], pix[1], cell,0]]
            for i, lbl in enumerate(labels):

                if np.any(lbl):
                    if i == 0:
                        cm = (cdata['ml'][cell, pix[0], pix[1]]/medml)

                        o = offset[pix[0], pix[1], cell, 0]
                        ym = (y[lbl]-o)/cm

                    elif i >= 1:
                        mh = cdata['mh'][cell, pix[0], pix[1]]
                        ml = cdata['ml'][cell, pix[0], pix[1]]
                        cml = ml/medml
                        cmh = mh/medmh
                        cm = medml/medmh
                        oh = cdata['bh'][cell, pix[0], pix[1]]
                        o = offset[pix[0], pix[1], cell, 1] + oh

                        ym = (y[lbl]-o)/cmh*cm

                        if i == 1:
                            ah = cdata['ah'][cell, pix[0], pix[1]]
                            ch = cdata['ch'][cell, pix[0], pix[1]]
                            ohh = cdata['oh'][cell, pix[0], pix[1]]
                            tx = ch * np.log(ah/(y[lbl]-o))+ohh

                            chook  = (ah*np.exp(-(tx-ohh)/ch) - mh*tx)/cmh*cm
                            idx = (a[lbl]-amin) < 0
                            ym[idx] -= chook[idx]

                            #ym = a[lbl]-amin

                    h, ex, ey = np.histogram2d(x[lbl], ym/one_photon, range=((0, 600), (0, 15000/one_photon)), bins=(300, 600))
                    H2[i] += h

            labels = [a < threshold[pix[0], pix[1], cell,0], a >= threshold[pix[0], pix[1], cell,0]]
            for i, lbl in enumerate(labels):

                if np.any(lbl):

                    #if i == 0:
                    #    amin = a[lbl].min()
                    #else:
                    #    amin = a[labels[0]].min() #a[labels[1]].min()# /(threshold[pix[0], pix[1], cell,0]/medth)
                    am = a[lbl] - amin
                    h, ex, ea = np.histogram2d(x[lbl], am, range=((0, 600), (-100, 5000)), bins=(300, 400))
                    Ha[i] += h



    fig = plt.figure(figsize=(10,15))
    ax = fig.add_subplot(311)
    for i in range(3):
        H[i][H[i]==0] = np.nan
    ax.imshow(H[0].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    ax.imshow(H[1].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='spring', alpha=0.7, vmin=0, vmax=100)
    ax.imshow(H[2].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD response (ADU)")
    ax.set_xlabel("PC scan point (#)")

    x = np.arange(0, 600)
    ideal = medml*x/one_photon
    ax.plot(x, ideal, color='red')
    ax.plot(x, ideal + np.sqrt(ideal), color='red')
    ax.plot(x, ideal - np.sqrt(ideal), color='red')


    ax = fig.add_subplot(312)
    for i in range(2):
        H2[i][H2[i]==0] = np.nan
    ax.imshow(H2[0].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    ax.imshow(H2[1].T, origin="lower", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD response (ADU)")
    ax.set_xlabel("PC scan point (#)")

    x = np.arange(0, 600)
    ideal = medml*x/one_photon
    ax.plot(x, ideal, color='red')
    ax.plot(x, ideal + np.sqrt(ideal), color='red')
    ax.plot(x, ideal - np.sqrt(ideal), color='red')


    ax = fig.add_subplot(313)
    for i in range(2):
        Ha[i][Ha[i]==0] = np.nan
    ax.imshow(Ha[0].T, origin="lower", extent=[ex[0], ex[-1], ea[0], ea[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    #ax.imshow(Ha[1].T, origin="lower", extent=[ex[0], ex[-1], ea[0], ea[-1]],
    #          aspect='auto', cmap='spring', alpha=0.7, vmin=0, vmax=100)
    ax.imshow(Ha[1].T, origin="lower", extent=[ex[0], ex[-1], ea[0], ea[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD gain (ADU)")
    ax.set_xlabel("PC scan point (#)")

 ```

--- a/notebooks/DSSC/Characterize_DSSC_Darks_NBC.ipynb
+++ b/notebooks/DSSC/Characterize_DSSC_Darks_NBC.ipynb
 %% Cell type:markdown id: tags:

 # DSSC Characterize Dark Images #

 Author: S. Hauf, Version: 0.1

 The following code analyzes a set of dark images taken with the DSSC detector to deduce detector offsets and noise. Data for the detector is presented in one run and don't acquire multiple gain stages.

 The notebook explicitely does what pyDetLib provides in its offset calculation method for streaming data.

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB" # The ipcluster profile to use
-in_folder = "/gpfs/exfel/exp/SCS/202031/p900170/raw" # path to input data, required
+in_folder = "/gpfs/exfel/exp/SQS/202131/p900210/raw" # path to input data, required
 out_folder = "/gpfs/exfel/data/scratch/samartse/data/DSSC" # path to output to, required
 sequences = [0] # sequence files to evaluate.
 modules = [-1]  # modules to run for
-run = 223 #run number in which data was recorded, required
+run = 20 #run number in which data was recorded, required

-karabo_id = "SCS_DET_DSSC1M-1" # karabo karabo_id
+karabo_id = "SQS_DET_DSSC1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = '/INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
 h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
 slow_data_pattern = 'RAW-R{}-DA{}-S00000.h5'

 use_dir_creation_date = True # use the dir creation date for determining the creation time
 cal_db_interface = "tcp://max-exfl016:8020" # the database interface to use
 cal_db_timeout = 3000000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database

 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 100 # detector bias voltage
 rawversion = 2 # RAW file format version

 thresholds_offset_sigma = 3. # thresholds in terms of n sigma noise for offset deduced bad pixels
 thresholds_offset_hard = [4, 125] # thresholds in absolute ADU terms for offset deduced bad pixels,
 # minimal threshold at 4 is set at hardware level, DSSC full range 0-511

 thresholds_noise_sigma = 3. # thresholds in terms of n sigma noise for offset deduced bad pixels
 thresholds_noise_hard = [0.001, 3] # thresholds in absolute ADU terms for offset deduced bad pixels
 offset_numpy_algorithm = "mean"

-instrument = "SCS" # the instrument
+instrument = "SQS" # the instrument
 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h
 slow_data_aggregators = [1,2,3,4]  # quadrant/aggregator
+slow_data_path = 'SQS_NQS_DSSC/FPGA/PPT_Q'
 operation_mode = ''  # Detector operation mode, optional
 ```

 %% Cell type:code id: tags:

 ``` python
 import os
 import warnings

 # imports and things that do not usually need to be changed
 from datetime import datetime

 warnings.filterwarnings('ignore')
 from collections import OrderedDict

 import h5py
 import matplotlib
 from ipyparallel import Client
 from IPython.display import Latex, Markdown, display

 matplotlib.use('agg')
 import matplotlib.pyplot as plt

 %matplotlib inline
 import numpy as np
 import tabulate
 import yaml
 from iCalibrationDB import Conditions, Constants, Detectors, Versions

 from cal_tools.dssclib import get_dssc_ctrl_data, get_pulseid_checksum
 from cal_tools.enums import BadPixels
 from cal_tools.plotting import (
    create_constant_overview,
    plot_badpix_3d,
    show_overview,
    show_processed_modules,
 )
 from cal_tools.tools import (
    get_dir_creation_date,
    get_from_db,
    get_notebook_name,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    map_gain_stages,
    parse_runs,
    run_prop_seq_from_path,
    save_const_to_h5,
    send_to_db,
 )

 view = Client(profile=cluster_profile)[:]
 view.use_dill()

 # make sure a cluster is running with ipcluster start --n=32, give it a while to start

 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)
 gain_names = ['High', 'Medium', 'Low']

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(16))
    karabo_da = ["DSSC{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 max_cells = mem_cells

 offset_runs = OrderedDict()
 offset_runs["high"] = run

 creation_time=None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time} as creation time of constant.")

 run, prop, seq = run_prop_seq_from_path(in_folder)

 dinstance = "DSSC1M1"

 print(f"Detector in use is {karabo_id}")

 cal_db_interface = get_random_db_interface(cal_db_interface)
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Parameters are:")
 print(f"Proposal: {prop}")
 print(f"Memory cells: {mem_cells}/{max_cells}")
 print("Runs: {}".format([ v for v in offset_runs.values()]))
 print(f"Sequences: {sequences}")
 print(f"Using DB: {db_output}")
 print(f"Input: {in_folder}")
 print(f"Output: {out_folder}")
 print(f"Bias voltage: {bias_voltage}V")
 file_loc = f'proposal:{prop} runs:{[ v for v in offset_runs.values()][0]}'

 report = get_report(out_folder)
 ```

 %% Cell type:markdown id: tags:

 The following lines will create a queue of files which will the be executed module-parallel. Distinguishing between different gains.

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 os.makedirs(out_folder, exist_ok=True)
 gmf = map_gain_stages(in_folder, offset_runs, path_template, karabo_da, sequences)
 gain_mapped_files, total_sequences, total_file_size = gmf
 print(f"Will process a total of {total_sequences} file.")
 ```

 %% Cell type:markdown id: tags:

 ## Calculate Offsets, Noise and Thresholds ##

 The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array.

 %% Cell type:code id: tags:

 ``` python
 import copy
 from functools import partial


 def characterize_module(cells, bp_thresh, rawversion, karabo_id, h5path, h5path_idx, inp):
    import copy

    import h5py
    import numpy as np
    from cal_tools.enums import BadPixels

    def get_num_cells(fname, h5path):
        with h5py.File(fname, "r") as f:

            cells = f[f"{h5path}/cellId"][()]
            if cells == []:
                return
            maxcell = np.max(cells)
            options = [100, 200, 400, 500, 600, 700, 800]
            dists = np.array([(o-maxcell) for o in options])
            dists[dists<0] = 10000 # assure to always go higher
            return options[np.argmin(dists)]

    filename, channel = inp

    h5path = h5path.format(channel)
    h5path_idx = h5path_idx.format(channel)
    if cells == 0:
        cells = get_num_cells(filename, h5path)
        if cells is None:
            raise ValueError(f"ERROR! Empty image data file for channel {channel}")


    print(f"Using {cells} memory cells")

    pulseid_checksum = None

    thresholds_offset_hard, thresholds_offset_sigma, thresholds_noise_hard, thresholds_noise_sigma = bp_thresh

-    infile = h5py.File(filename, "r", driver="core")
+    infile = h5py.File(filename, "r")
    if rawversion == 2:
        count = np.squeeze(infile[f"{h5path_idx}/count"])
        first = np.squeeze(infile[f"{h5path_idx}/first"])
        last_index = int(first[count != 0][-1]+count[count != 0][-1])
        first_index = int(first[count != 0][0])
    else:
        status = np.squeeze(infile[f"{h5path_idx}/status"])
        if np.count_nonzero(status != 0) == 0:
            return
        last = np.squeeze(infile[f"{h5path_idx}/last"])
        first = np.squeeze(infile[f"{h5path_idx}/first"])
        last_index = int(last[status != 0][-1]) + 1
        first_index = int(first[status != 0][0])
    im = np.array(infile[f"{h5path}/data"][first_index:last_index,...])
    cellIds = np.squeeze(infile[f"{h5path}/cellId"][first_index:last_index,...])
    infile.close()

    pulseid_checksum = get_pulseid_checksum(filename, h5path, h5path_idx)

    im = im[:, 0, ...].astype(np.float32)

    im = np.rollaxis(im, 2)
    im = np.rollaxis(im, 2, 1)

    mcells = cells
    offset = np.zeros((im.shape[0], im.shape[1], mcells), dtype = np.float64)
    noise = np.zeros((im.shape[0], im.shape[1], mcells), dtype = np.float64)

    for cc in np.unique(cellIds[cellIds < mcells]):
        cellidx = cellIds == cc
        if offset_numpy_algorithm == "mean":
            offset[...,cc] = np.mean(im[..., cellidx], axis=2)
        else:
            offset[...,cc] = np.median(im[..., cellidx], axis=2)
        noise[...,cc] = np.std(im[..., cellidx], axis=2)


    # bad pixels
    bp = np.zeros(offset.shape, np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0,1))
    offset_std = np.nanstd(offset, axis=(0,1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[(offset < thresholds_offset_hard[0]) | (offset > thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0,1))
    noise_std = np.nanstd(noise, axis=(0,1))

    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[(noise < thresholds_noise_hard[0]) | (noise > thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value


    return offset, noise, bp, cells, pulseid_checksum


 offset_g = OrderedDict()
 noise_g = OrderedDict()
 gain_g = OrderedDict()
 badpix_g = OrderedDict()
 gg = 0

 start = datetime.now()
 all_cells = []
 checksums = {}

 try:
    tGain, encodedGain, operatingFreq = get_dssc_ctrl_data(in_folder + "/r{:04d}/".format(offset_runs["high"]),
                                                           slow_data_pattern,
                                                           slow_data_aggregators,
-                                                           offset_runs["high"])
+                                                           offset_runs["high"], slow_data_path)
 except IOError:
    print("ERROR: Couldn't access slow data to read tGain, encodedGain, and operatingFreq \n")

 for gain, mapped_files in gain_mapped_files.items():
    inp = []
    dones = []
    for i in modules:
        qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
        if qm in mapped_files and not mapped_files[qm].empty():
            fname_in = mapped_files[qm].get()
            print("Process file: ", fname_in)
            dones.append(mapped_files[qm].empty())
        else:
            continue
        inp.append((fname_in, i))

    p = partial(characterize_module, max_cells,
               (thresholds_offset_hard, thresholds_offset_sigma,
                thresholds_noise_hard, thresholds_noise_sigma), rawversion, karabo_id, h5path, h5path_idx)


    results = list(map(p, inp))

    for ii, r in enumerate(results):
        i = modules[ii]
        offset, noise,  bp, thiscell, pulseid_checksum = r
        all_cells.append(thiscell)
        qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
        if qm not in offset_g:
            offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2]))
            noise_g[qm] = np.zeros_like(offset_g[qm])

            badpix_g[qm] = np.zeros_like(offset_g[qm], np.uint32)
            checksums[qm] = pulseid_checksum

        offset_g[qm][...] = offset
        noise_g[qm][...] = noise
        badpix_g[qm][...] = bp
    gg +=1

 if len(all_cells) > 0:
    max_cells = np.max(all_cells)
    print(f"Using {max_cells} memory cells")
 else:
    raise ValueError("0 processed memory cells. No raw data available.")
 ```

 %% Cell type:code id: tags:

 ``` python
 # TODO: add db_module when received from myMDC
 # Create the modules dict of karabo_das and PDUs
 qm_dict = OrderedDict()
 for i, k_da in zip(modules, karabo_da):
    qm = f"Q{i//4+1}M{i%4+1}"
    qm_dict[qm] = {"karabo_da": k_da,
                   "db_module": ""}
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve existing constants for comparison
 clist = ["Offset", "Noise"]
 old_const = {}
 old_mdata = {}

 print('Retrieve pre-existing constants for comparison.')
 for qm in offset_g.keys():
    old_const[qm] = {}
    old_mdata[qm] = {}
    qm_db = qm_dict[qm]
    karabo_da = qm_db["karabo_da"]
    for const in clist:

        dconst =getattr(Constants.DSSC, const)()
        condition = Conditions.Dark.DSSC(memory_cells=max_cells,
                                         bias_voltage=bias_voltage,
                                         pulseid_checksum=checksums[qm],
                                         acquisition_rate=operatingFreq[qm],
                                         target_gain=tGain[qm],
                                         encoded_gain=encodedGain[qm])

        # This should be used in case of running notebook
        # by a different method other than myMDC which already
        # sends CalCat info.
        # TODO: Set db_module to "" by default in the first cell
        if not qm_db["db_module"]:
            qm_db["db_module"] = get_pdu_from_db(karabo_id, karabo_da, dconst,
                                                 condition, cal_db_interface,
                                                 snapshot_at=creation_time)[0]
        data, mdata = get_from_db(karabo_id, karabo_da,
                                  dconst,
                                  condition,
                                  None,
                                  cal_db_interface, creation_time=creation_time,
                                  verbosity=2, timeout=cal_db_timeout)

        old_const[qm][const] = data

        if mdata is None or data is None:
            old_mdata[qm][const] = {
                "timestamp": "Not found",
                "filepath": None,
                "h5path": None
            }
        else:
            old_mdata[qm][const] = {
                "timestamp": mdata.calibration_constant_version.begin_at.isoformat(),
                "filepath": os.path.join(
                    mdata.calibration_constant_version.hdf5path,
                    mdata.calibration_constant_version.filename,
                ),
                "h5path": mdata.calibration_constant_version.h5path,
            }
    with open(f"{out_folder}/module_metadata_{qm}.yml", "w") as fd:
        yaml.safe_dump(
            {"module": qm, "pdu": qm_db["db_module"], "old-constants": old_mdata[qm]},
            fd,
        )
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()
 for i in modules:
    qm = f"Q{i//4+1}M{i%4+1}"
    try:
        res[qm] = {'Offset': offset_g[qm],
                   'Noise': noise_g[qm],
                   }
    except Exception as e:
        print(f"Error: No constants for {qm}: {e}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Push the same constant two different times.
 # One with the generated pulseID check sum setting for the offline calibration.
 # And another for the online calibration as it doesn't have this pulseID checksum, yet.
 md = None
 for dont_use_pulseIds in [True, False]:
    for qm in res.keys():
        karabo_da = qm_dict[qm]["karabo_da"]
        db_module = qm_dict[qm]["db_module"]
        for const in res[qm].keys():
            dconst = getattr(Constants.DSSC, const)()
            dconst.data = res[qm][const]

            opfreq = None if dont_use_pulseIds else operatingFreq[qm]
            targetgain = None if dont_use_pulseIds else tGain[qm]
            encodedgain =  None if dont_use_pulseIds else encodedGain[qm]
            pidsum = None if dont_use_pulseIds else checksums[qm]

            # set the operating condition
            condition = Conditions.Dark.DSSC(memory_cells=max_cells,
                                             bias_voltage=bias_voltage,
                                             pulseid_checksum=pidsum,
                                             acquisition_rate=opfreq,
                                             target_gain=targetgain,
                                             encoded_gain=encodedgain)

            if db_output:
                md = send_to_db(db_module, karabo_id, dconst, condition, file_loc, report,
                                cal_db_interface, creation_time=creation_time, timeout=cal_db_timeout)

            if local_output and dont_use_pulseIds: # Don't save constant localy two times.
                md = save_const_to_h5(db_module, karabo_id, dconst, condition,
                                      dconst.data, file_loc, report,
                                      creation_time, out_folder)
                print(f"Calibration constant {const} is stored locally.\n")

        if not dont_use_pulseIds:
            print("Constants parameter conditions are:\n")
            print(f"• memory_cells: {max_cells}\n• bias_voltage: {bias_voltage}\n"
                  f"• pulseid_checksum: {pidsum}\n• acquisition_rate: {opfreq}\n"
                  f"• target_gain: {targetgain}\n• encoded_gain: {encodedgain}\n"
                  f"• creation_time: {creation_time}\n")
 ```

 %% Cell type:code id: tags:

 ``` python
 mnames = []
 for i in modules:
    qm = f"Q{i//4+1}M{i % 4+1}"
    display(Markdown(f'## Position of the module {qm} and its ASICs##'))
    mnames.append(qm)

 show_processed_modules(dinstance=dinstance, constants=None, mnames=mnames, mode="position")
 ```

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:code id: tags:

 ``` python
 cell = 9
 gain = 0
 out_folder = None
 show_overview(res, cell, gain, out_folder=out_folder, infix="_{}".format(run))
 ```

 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.NOISE_OUT_OF_THRESHOLD.value: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR.value: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value | BadPixels.NOISE_OUT_OF_THRESHOLD.value: ('MIXED', '#DD00DD80')}

 if high_res_badpix_3d:
    display(Markdown("""

    ## Global Bad Pixel Behaviour ##

    The following plots show the results of bad pixel evaluation for all evaluated memory cells.
    Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2.
    This excludes single bad pixels present only in disconnected pixels.
    Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
    Colors encode the bad pixel type, or mixed type.

    """))
    # set rebin_fac to 1 for avoiding rebining and
    # losing real values of badpixels(High resolution).
    gain = 0
    for mod, data in badpix_g.items():
        plot_badpix_3d(data, cols, title=mod, rebin_fac=2)
        plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Aggregate values, and per Cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior.

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(offset_g, "Offset (ADU)", max_cells, entries=1)
 ```

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(noise_g, "Noise (ADU)", max_cells, 0, 100, entries=1)
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixel_aggregate_g = OrderedDict()
 for m, d in badpix_g.items():
    bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)
 create_constant_overview(bad_pixel_aggregate_g, "Bad pixel fraction", max_cells, entries=1)
 ```

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 time_summary = []
 for qm, qm_data in old_mdata.items():
    time_summary.append(f"The following pre-existing constants are used for comparison for module {qm}:")
    for const, const_data in qm_data.items():
        time_summary.append(f"- {const} created at {const_data['timestamp']}")
 display(Markdown("\n".join(time_summary)))
 ```

 %% Cell type:code id: tags:

 ``` python
 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 for const in ['Offset', 'Noise']:
    table = [['','High gain', 'High gain']]
    for qm in res.keys():

        data = np.copy(res[qm][const])

        if old_const[qm][const] is not None:
            dataold = np.copy(old_const[qm][const])

        f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
        n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

        for i, f in enumerate(f_list):
            line = [n_list[i]]
            line.append('{:6.1f}'.format(f(data[...,gain])))
            if old_const[qm][const] is not None:
                line.append('{:6.1f}'.format(f(dataold[...,gain])))
            else:
                line.append('-')

            table.append(line)

    display(Markdown('### {} [ADU], good and bad pixels ###'.format(const)))
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
 ```

 %% Cell type:markdown id: tags:

 # DSSC Characterize Dark Images #

 Author: S. Hauf, Version: 0.1

 The following code analyzes a set of dark images taken with the DSSC detector to deduce detector offsets and noise. Data for the detector is presented in one run and don't acquire multiple gain stages.

 The notebook explicitely does what pyDetLib provides in its offset calculation method for streaming data.

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB" # The ipcluster profile to use
-in_folder = "/gpfs/exfel/exp/SCS/202031/p900170/raw" # path to input data, required
+in_folder = "/gpfs/exfel/exp/SQS/202131/p900210/raw" # path to input data, required
 out_folder = "/gpfs/exfel/data/scratch/samartse/data/DSSC" # path to output to, required
 sequences = [0] # sequence files to evaluate.
 modules = [-1]  # modules to run for
-run = 223 #run number in which data was recorded, required
+run = 20 #run number in which data was recorded, required

-karabo_id = "SCS_DET_DSSC1M-1" # karabo karabo_id
+karabo_id = "SQS_DET_DSSC1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = '/INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
 h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
 slow_data_pattern = 'RAW-R{}-DA{}-S00000.h5'

 use_dir_creation_date = True # use the dir creation date for determining the creation time
 cal_db_interface = "tcp://max-exfl016:8020" # the database interface to use
 cal_db_timeout = 3000000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database

 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 100 # detector bias voltage
 rawversion = 2 # RAW file format version

 thresholds_offset_sigma = 3. # thresholds in terms of n sigma noise for offset deduced bad pixels
 thresholds_offset_hard = [4, 125] # thresholds in absolute ADU terms for offset deduced bad pixels,
 # minimal threshold at 4 is set at hardware level, DSSC full range 0-511

 thresholds_noise_sigma = 3. # thresholds in terms of n sigma noise for offset deduced bad pixels
 thresholds_noise_hard = [0.001, 3] # thresholds in absolute ADU terms for offset deduced bad pixels
 offset_numpy_algorithm = "mean"

-instrument = "SCS" # the instrument
+instrument = "SQS" # the instrument
 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h
 slow_data_aggregators = [1,2,3,4]  # quadrant/aggregator
+slow_data_path = 'SQS_NQS_DSSC/FPGA/PPT_Q'
 operation_mode = ''  # Detector operation mode, optional
 ```

 %% Cell type:code id: tags:

 ``` python
 import os
 import warnings

 # imports and things that do not usually need to be changed
 from datetime import datetime

 warnings.filterwarnings('ignore')
 from collections import OrderedDict

 import h5py
 import matplotlib
 from ipyparallel import Client
 from IPython.display import Latex, Markdown, display

 matplotlib.use('agg')
 import matplotlib.pyplot as plt

 %matplotlib inline
 import numpy as np
 import tabulate
 import yaml
 from iCalibrationDB import Conditions, Constants, Detectors, Versions

 from cal_tools.dssclib import get_dssc_ctrl_data, get_pulseid_checksum
 from cal_tools.enums import BadPixels
 from cal_tools.plotting import (
    create_constant_overview,
    plot_badpix_3d,
    show_overview,
    show_processed_modules,
 )
 from cal_tools.tools import (
    get_dir_creation_date,
    get_from_db,
    get_notebook_name,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    map_gain_stages,
    parse_runs,
    run_prop_seq_from_path,
    save_const_to_h5,
    send_to_db,
 )

 view = Client(profile=cluster_profile)[:]
 view.use_dill()

 # make sure a cluster is running with ipcluster start --n=32, give it a while to start

 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)
 gain_names = ['High', 'Medium', 'Low']

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(16))
    karabo_da = ["DSSC{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 max_cells = mem_cells

 offset_runs = OrderedDict()
 offset_runs["high"] = run

 creation_time=None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time} as creation time of constant.")

 run, prop, seq = run_prop_seq_from_path(in_folder)

 dinstance = "DSSC1M1"

 print(f"Detector in use is {karabo_id}")

 cal_db_interface = get_random_db_interface(cal_db_interface)
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Parameters are:")
 print(f"Proposal: {prop}")
 print(f"Memory cells: {mem_cells}/{max_cells}")
 print("Runs: {}".format([ v for v in offset_runs.values()]))
 print(f"Sequences: {sequences}")
 print(f"Using DB: {db_output}")
 print(f"Input: {in_folder}")
 print(f"Output: {out_folder}")
 print(f"Bias voltage: {bias_voltage}V")
 file_loc = f'proposal:{prop} runs:{[ v for v in offset_runs.values()][0]}'

 report = get_report(out_folder)
 ```

 %% Cell type:markdown id: tags:

 The following lines will create a queue of files which will the be executed module-parallel. Distinguishing between different gains.

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 os.makedirs(out_folder, exist_ok=True)
 gmf = map_gain_stages(in_folder, offset_runs, path_template, karabo_da, sequences)
 gain_mapped_files, total_sequences, total_file_size = gmf
 print(f"Will process a total of {total_sequences} file.")
 ```

 %% Cell type:markdown id: tags:

 ## Calculate Offsets, Noise and Thresholds ##

 The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array.

 %% Cell type:code id: tags:

 ``` python
 import copy
 from functools import partial


 def characterize_module(cells, bp_thresh, rawversion, karabo_id, h5path, h5path_idx, inp):
    import copy

    import h5py
    import numpy as np
    from cal_tools.enums import BadPixels

    def get_num_cells(fname, h5path):
        with h5py.File(fname, "r") as f:

            cells = f[f"{h5path}/cellId"][()]
            if cells == []:
                return
            maxcell = np.max(cells)
            options = [100, 200, 400, 500, 600, 700, 800]
            dists = np.array([(o-maxcell) for o in options])
            dists[dists<0] = 10000 # assure to always go higher
            return options[np.argmin(dists)]

    filename, channel = inp

    h5path = h5path.format(channel)
    h5path_idx = h5path_idx.format(channel)
    if cells == 0:
        cells = get_num_cells(filename, h5path)
        if cells is None:
            raise ValueError(f"ERROR! Empty image data file for channel {channel}")


    print(f"Using {cells} memory cells")

    pulseid_checksum = None

    thresholds_offset_hard, thresholds_offset_sigma, thresholds_noise_hard, thresholds_noise_sigma = bp_thresh

-    infile = h5py.File(filename, "r", driver="core")
+    infile = h5py.File(filename, "r")
    if rawversion == 2:
        count = np.squeeze(infile[f"{h5path_idx}/count"])
        first = np.squeeze(infile[f"{h5path_idx}/first"])
        last_index = int(first[count != 0][-1]+count[count != 0][-1])
        first_index = int(first[count != 0][0])
    else:
        status = np.squeeze(infile[f"{h5path_idx}/status"])
        if np.count_nonzero(status != 0) == 0:
            return
        last = np.squeeze(infile[f"{h5path_idx}/last"])
        first = np.squeeze(infile[f"{h5path_idx}/first"])
        last_index = int(last[status != 0][-1]) + 1
        first_index = int(first[status != 0][0])
    im = np.array(infile[f"{h5path}/data"][first_index:last_index,...])
    cellIds = np.squeeze(infile[f"{h5path}/cellId"][first_index:last_index,...])
    infile.close()

    pulseid_checksum = get_pulseid_checksum(filename, h5path, h5path_idx)

    im = im[:, 0, ...].astype(np.float32)

    im = np.rollaxis(im, 2)
    im = np.rollaxis(im, 2, 1)

    mcells = cells
    offset = np.zeros((im.shape[0], im.shape[1], mcells), dtype = np.float64)
    noise = np.zeros((im.shape[0], im.shape[1], mcells), dtype = np.float64)

    for cc in np.unique(cellIds[cellIds < mcells]):
        cellidx = cellIds == cc
        if offset_numpy_algorithm == "mean":
            offset[...,cc] = np.mean(im[..., cellidx], axis=2)
        else:
            offset[...,cc] = np.median(im[..., cellidx], axis=2)
        noise[...,cc] = np.std(im[..., cellidx], axis=2)


    # bad pixels
    bp = np.zeros(offset.shape, np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0,1))
    offset_std = np.nanstd(offset, axis=(0,1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[(offset < thresholds_offset_hard[0]) | (offset > thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0,1))
    noise_std = np.nanstd(noise, axis=(0,1))

    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[(noise < thresholds_noise_hard[0]) | (noise > thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value


    return offset, noise, bp, cells, pulseid_checksum


 offset_g = OrderedDict()
 noise_g = OrderedDict()
 gain_g = OrderedDict()
 badpix_g = OrderedDict()
 gg = 0

 start = datetime.now()
 all_cells = []
 checksums = {}

 try:
    tGain, encodedGain, operatingFreq = get_dssc_ctrl_data(in_folder + "/r{:04d}/".format(offset_runs["high"]),
                                                           slow_data_pattern,
                                                           slow_data_aggregators,
-                                                           offset_runs["high"])
+                                                           offset_runs["high"], slow_data_path)
 except IOError:
    print("ERROR: Couldn't access slow data to read tGain, encodedGain, and operatingFreq \n")

 for gain, mapped_files in gain_mapped_files.items():
    inp = []
    dones = []
    for i in modules:
        qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
        if qm in mapped_files and not mapped_files[qm].empty():
            fname_in = mapped_files[qm].get()
            print("Process file: ", fname_in)
            dones.append(mapped_files[qm].empty())
        else:
            continue
        inp.append((fname_in, i))

    p = partial(characterize_module, max_cells,
               (thresholds_offset_hard, thresholds_offset_sigma,
                thresholds_noise_hard, thresholds_noise_sigma), rawversion, karabo_id, h5path, h5path_idx)


    results = list(map(p, inp))

    for ii, r in enumerate(results):
        i = modules[ii]
        offset, noise,  bp, thiscell, pulseid_checksum = r
        all_cells.append(thiscell)
        qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
        if qm not in offset_g:
            offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2]))
            noise_g[qm] = np.zeros_like(offset_g[qm])

            badpix_g[qm] = np.zeros_like(offset_g[qm], np.uint32)
            checksums[qm] = pulseid_checksum

        offset_g[qm][...] = offset
        noise_g[qm][...] = noise
        badpix_g[qm][...] = bp
    gg +=1

 if len(all_cells) > 0:
    max_cells = np.max(all_cells)
    print(f"Using {max_cells} memory cells")
 else:
    raise ValueError("0 processed memory cells. No raw data available.")
 ```

 %% Cell type:code id: tags:

 ``` python
 # TODO: add db_module when received from myMDC
 # Create the modules dict of karabo_das and PDUs
 qm_dict = OrderedDict()
 for i, k_da in zip(modules, karabo_da):
    qm = f"Q{i//4+1}M{i%4+1}"
    qm_dict[qm] = {"karabo_da": k_da,
                   "db_module": ""}
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve existing constants for comparison
 clist = ["Offset", "Noise"]
 old_const = {}
 old_mdata = {}

 print('Retrieve pre-existing constants for comparison.')
 for qm in offset_g.keys():
    old_const[qm] = {}
    old_mdata[qm] = {}
    qm_db = qm_dict[qm]
    karabo_da = qm_db["karabo_da"]
    for const in clist:

        dconst =getattr(Constants.DSSC, const)()
        condition = Conditions.Dark.DSSC(memory_cells=max_cells,
                                         bias_voltage=bias_voltage,
                                         pulseid_checksum=checksums[qm],
                                         acquisition_rate=operatingFreq[qm],
                                         target_gain=tGain[qm],
                                         encoded_gain=encodedGain[qm])

        # This should be used in case of running notebook
        # by a different method other than myMDC which already
        # sends CalCat info.
        # TODO: Set db_module to "" by default in the first cell
        if not qm_db["db_module"]:
            qm_db["db_module"] = get_pdu_from_db(karabo_id, karabo_da, dconst,
                                                 condition, cal_db_interface,
                                                 snapshot_at=creation_time)[0]
        data, mdata = get_from_db(karabo_id, karabo_da,
                                  dconst,
                                  condition,
                                  None,
                                  cal_db_interface, creation_time=creation_time,
                                  verbosity=2, timeout=cal_db_timeout)

        old_const[qm][const] = data

        if mdata is None or data is None:
            old_mdata[qm][const] = {
                "timestamp": "Not found",
                "filepath": None,
                "h5path": None
            }
        else:
            old_mdata[qm][const] = {
                "timestamp": mdata.calibration_constant_version.begin_at.isoformat(),
                "filepath": os.path.join(
                    mdata.calibration_constant_version.hdf5path,
                    mdata.calibration_constant_version.filename,
                ),
                "h5path": mdata.calibration_constant_version.h5path,
            }
    with open(f"{out_folder}/module_metadata_{qm}.yml", "w") as fd:
        yaml.safe_dump(
            {"module": qm, "pdu": qm_db["db_module"], "old-constants": old_mdata[qm]},
            fd,
        )
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()
 for i in modules:
    qm = f"Q{i//4+1}M{i%4+1}"
    try:
        res[qm] = {'Offset': offset_g[qm],
                   'Noise': noise_g[qm],
                   }
    except Exception as e:
        print(f"Error: No constants for {qm}: {e}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Push the same constant two different times.
 # One with the generated pulseID check sum setting for the offline calibration.
 # And another for the online calibration as it doesn't have this pulseID checksum, yet.
 md = None
 for dont_use_pulseIds in [True, False]:
    for qm in res.keys():
        karabo_da = qm_dict[qm]["karabo_da"]
        db_module = qm_dict[qm]["db_module"]
        for const in res[qm].keys():
            dconst = getattr(Constants.DSSC, const)()
            dconst.data = res[qm][const]

            opfreq = None if dont_use_pulseIds else operatingFreq[qm]
            targetgain = None if dont_use_pulseIds else tGain[qm]
            encodedgain =  None if dont_use_pulseIds else encodedGain[qm]
            pidsum = None if dont_use_pulseIds else checksums[qm]

            # set the operating condition
            condition = Conditions.Dark.DSSC(memory_cells=max_cells,
                                             bias_voltage=bias_voltage,
                                             pulseid_checksum=pidsum,
                                             acquisition_rate=opfreq,
                                             target_gain=targetgain,
                                             encoded_gain=encodedgain)

            if db_output:
                md = send_to_db(db_module, karabo_id, dconst, condition, file_loc, report,
                                cal_db_interface, creation_time=creation_time, timeout=cal_db_timeout)

            if local_output and dont_use_pulseIds: # Don't save constant localy two times.
                md = save_const_to_h5(db_module, karabo_id, dconst, condition,
                                      dconst.data, file_loc, report,
                                      creation_time, out_folder)
                print(f"Calibration constant {const} is stored locally.\n")

        if not dont_use_pulseIds:
            print("Constants parameter conditions are:\n")
            print(f"• memory_cells: {max_cells}\n• bias_voltage: {bias_voltage}\n"
                  f"• pulseid_checksum: {pidsum}\n• acquisition_rate: {opfreq}\n"
                  f"• target_gain: {targetgain}\n• encoded_gain: {encodedgain}\n"
                  f"• creation_time: {creation_time}\n")
 ```

 %% Cell type:code id: tags:

 ``` python
 mnames = []
 for i in modules:
    qm = f"Q{i//4+1}M{i % 4+1}"
    display(Markdown(f'## Position of the module {qm} and its ASICs##'))
    mnames.append(qm)

 show_processed_modules(dinstance=dinstance, constants=None, mnames=mnames, mode="position")
 ```

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:code id: tags:

 ``` python
 cell = 9
 gain = 0
 out_folder = None
 show_overview(res, cell, gain, out_folder=out_folder, infix="_{}".format(run))
 ```

 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.NOISE_OUT_OF_THRESHOLD.value: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR.value: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value | BadPixels.NOISE_OUT_OF_THRESHOLD.value: ('MIXED', '#DD00DD80')}

 if high_res_badpix_3d:
    display(Markdown("""

    ## Global Bad Pixel Behaviour ##

    The following plots show the results of bad pixel evaluation for all evaluated memory cells.
    Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2.
    This excludes single bad pixels present only in disconnected pixels.
    Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
    Colors encode the bad pixel type, or mixed type.

    """))
    # set rebin_fac to 1 for avoiding rebining and
    # losing real values of badpixels(High resolution).
    gain = 0
    for mod, data in badpix_g.items():
        plot_badpix_3d(data, cols, title=mod, rebin_fac=2)
        plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Aggregate values, and per Cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior.

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(offset_g, "Offset (ADU)", max_cells, entries=1)
 ```

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(noise_g, "Noise (ADU)", max_cells, 0, 100, entries=1)
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixel_aggregate_g = OrderedDict()
 for m, d in badpix_g.items():
    bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)
 create_constant_overview(bad_pixel_aggregate_g, "Bad pixel fraction", max_cells, entries=1)
 ```

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 time_summary = []
 for qm, qm_data in old_mdata.items():
    time_summary.append(f"The following pre-existing constants are used for comparison for module {qm}:")
    for const, const_data in qm_data.items():
        time_summary.append(f"- {const} created at {const_data['timestamp']}")
 display(Markdown("\n".join(time_summary)))
 ```

 %% Cell type:code id: tags:

 ``` python
 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 for const in ['Offset', 'Noise']:
    table = [['','High gain', 'High gain']]
    for qm in res.keys():

        data = np.copy(res[qm][const])

        if old_const[qm][const] is not None:
            dataold = np.copy(old_const[qm][const])

        f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
        n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

        for i, f in enumerate(f_list):
            line = [n_list[i]]
            line.append('{:6.1f}'.format(f(data[...,gain])))
            if old_const[qm][const] is not None:
                line.append('{:6.1f}'.format(f(dataold[...,gain])))
            else:
                line.append('-')

            table.append(line)

    display(Markdown('### {} [ADU], good and bad pixels ###'.format(const)))
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
 ```

--- a/notebooks/DSSC/DSSC_Correct_and_Verify.ipynb
+++ b/notebooks/DSSC/DSSC_Correct_and_Verify.ipynb
 %% Cell type:markdown id: tags:

 # DSSC Offline Correction #

 Author: European XFEL Detector Group, Version: 1.0

 Offline Calibration for the DSSC Detector

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB" # The ipcluster profile to use
-in_folder = "/gpfs/exfel/exp/SCS/202031/p900170/raw" # path to input data, required
-out_folder = "/gpfs/exfel/data/scratch/samartse/test/DSSC" # path to output to, required
+in_folder = "/gpfs/exfel/exp/SQS/202131/p900210/raw" # path to input data, required
+out_folder = "/gpfs/exfel/data/scratch/samartse/data/DSSC" # path to output to, required
 sequences = [-1] # sequence files to evaluate.
 modules = [-1] # modules to correct, set to -1 for all, range allowed
-run = 229 #runs to process, required
+run = 20 #runs to process, required

-karabo_id = "SCS_DET_DSSC1M-1" # karabo karabo_id
+karabo_id = "SQS_DET_DSSC1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = 'INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
 h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
 slow_data_pattern = 'RAW-R{}-DA{}-S00000.h5'

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8020#8025" # the database interface to use
 cal_db_timeout = 300000 # in milli seconds

 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 overwrite = True # set to True if existing data should be overwritten
 max_pulses = 800 # maximum number of pulses per train
 bias_voltage = 100 # detector bias voltage
 sequences_per_node = 1 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
 chunk_size_idim = 1  # chunking size of imaging dimension, adjust if user software is sensitive to this.
 mask_noisy_asic = 0.25 # set to a value other than 0 and below 1 to mask entire ADC if fraction of noisy pixels is above
 mask_cold_asic = 0.25 # mask cold ASICS if number of pixels with negligable standard deviation is larger than this fraction
 noisy_pix_threshold = 1. # threshold above which ap pixel is considered noisy.
 geo_file = "/gpfs/exfel/data/scratch/xcal/dssc_geo_june19.h5" # detector geometry file
 dinstance = "DSSC1M1"
 slow_data_aggregators = [1,2,3,4] #quadrant/aggregator
+slow_data_path = 'SQS_NQS_DSSC/FPGA/PPT_Q'

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)

 ```

 %% Cell type:code id: tags:

 ``` python
 # make sure a cluster is running with ipcluster start --n=32, give it a while to start
 import os
 import sys
 from collections import OrderedDict

 import h5py
 import matplotlib
 import numpy as np

 matplotlib.use("agg")
 import matplotlib.pyplot as plt
 from ipyparallel import Client
 from IPython.display import Latex, Markdown, display

 print(f"Connecting to profile {cluster_profile}")
 view = Client(profile=cluster_profile)[:]
 view.use_dill()

 from datetime import timedelta

 from cal_tools.dssclib import get_dssc_ctrl_data, get_pulseid_checksum
 from cal_tools.tools import (
    get_constant_from_db,
    get_dir_creation_date,
    get_notebook_name,
    map_modules_from_folder,
    parse_runs,
    run_prop_seq_from_path,
 )
 from dateutil import parser
 from iCalibrationDB import Conditions, ConstantMetaData, Constants, Detectors, Versions
 ```

 %% Cell type:code id: tags:

 ``` python
 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time} as creation time")

 if sequences[0] == -1:
    sequences = None

 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)


 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(16))
    karabo_da = ["DSSC{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
 print("Process modules: ",
      ', '.join([f"Q{x // 4 + 1}M{x % 4 + 1}" for x in modules]))

 CHUNK_SIZE = 512
 MAX_PAR = 32

 if in_folder[-1] == "/":
    in_folder = in_folder[:-1]
 print(f"Outputting to {out_folder}")

 if not os.path.exists(out_folder):
    os.makedirs(out_folder)
 elif not overwrite:
    raise AttributeError("Output path exists! Exiting")

 import warnings

 warnings.filterwarnings('ignore')

 print(f"Detector in use is {karabo_id}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)
 mapped_files, mod_ids, total_sequences, sequences_qm, file_size = mmf
 MAX_PAR = min(MAX_PAR, total_sequences)
 ```

 %% Cell type:markdown id: tags:

 ## Processed Files ##

 %% Cell type:code id: tags:

 ``` python
 import copy

 import tabulate
 from IPython.display import HTML, Latex, Markdown, display

 print(f"Processing a total of {total_sequences} sequence files in chunks of {MAX_PAR}")
 table = []
 mfc = copy.copy(mapped_files)
 ti = 0
 for k, files in mfc.items():
    i = 0
    while not files.empty():
        f = files.get()
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 if len(table):
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["#", "module", "# module", "file"])))
 # restore the queue
 mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)
 mapped_files, mod_ids, total_sequences, sequences_qm, file_size = mmf
 ```

 %% Cell type:code id: tags:

 ``` python
 import copy
 from functools import partial


 def correct_module(total_sequences, sequences_qm, karabo_id, dinstance, mask_noisy_asic,
                   mask_cold_asic, noisy_pix_threshold, chunksize, mem_cells, bias_voltage,
                   cal_db_timeout, creation_time, cal_db_interface, h5path, h5path_idx, inp):

    import binascii
    import copy
    import struct
    from hashlib import blake2b

    import h5py
    import numpy as np
    from cal_tools.dssclib import get_dssc_ctrl_data, get_pulseid_checksum
    from cal_tools.enums import BadPixels
    from cal_tools.tools import get_constant_from_db_and_time
    from iCalibrationDB import (
        Conditions,
        ConstantMetaData,
        Constants,
        Detectors,
        Versions,
    )

    filename, filename_out, channel, karabo_da, qm, conditions = inp

    # DSSC correction requires path without the leading "/"
    if h5path[0] == '/':
        h5path = h5path[1:]
    if h5path_idx[0] == '/':
        h5path_idx = h5path_idx[1:]

    h5path = h5path.format(channel)
    h5path_idx = h5path_idx.format(channel)

    low_edges = None
    hists_signal_low = None
    high_edges = None
    hists_signal_high = None
    pulse_edges = None
    err = None
    offset_not_found = False
    def get_num_cells(fname, h5path):
        with h5py.File(fname, "r") as f:

            cells = f[f"{h5path}/cellId"][()]
            maxcell = np.max(cells)
            options = [100, 200, 400, 500, 600, 700, 800]
            dists = np.array([(o-maxcell) for o in options])
            dists[dists<0] = 10000 # assure to always go higher
            return options[np.argmin(dists)]

    if mem_cells == 0:
        mem_cells = get_num_cells(filename, h5path)

    pulseid_checksum = get_pulseid_checksum(filename, h5path, h5path_idx)

    print(f"Memcells: {mem_cells}")

    condition =  Conditions.Dark.DSSC(bias_voltage=bias_voltage, memory_cells=mem_cells,\
                                      pulseid_checksum=pulseid_checksum,\
                                      acquisition_rate=conditions['acquisition_rate'],\
                                      target_gain=conditions['target_gain'],\
                                      encoded_gain=conditions['encoded_gain'])

    detinst = getattr(Detectors, dinstance)
    device = getattr(detinst, qm)
    with h5py.File(filename, "r", driver="core") as infile:
        y = infile[f"{h5path}/data"].shape[2]
        x = infile[f"{h5path}/data"].shape[3]
    offset, when = get_constant_from_db_and_time(karabo_id, karabo_da,
                                                 Constants.DSSC.Offset(),
                                                 condition,
                                                 None,
                                                 cal_db_interface,
                                                 creation_time=creation_time,
                                                 timeout=cal_db_timeout)
    if offset is not None:
        offset = np.moveaxis(np.moveaxis(offset[...], 2, 0), 2, 1)
    else:
        offset_not_found = True
        print("No offset found in the database")

    def copy_and_sanitize_non_cal_data(infile, outfile):
        # these are touched in the correct function, do not copy them here
        dont_copy = ["data"]
        dont_copy = [h5path + "/{}".format(do)
                     for do in dont_copy]

        # a visitor to copy everything else
        def visitor(k, item):
            if k not in dont_copy:

                if isinstance(item, h5py.Group):
                    outfile.create_group(k)
                elif isinstance(item, h5py.Dataset):
                    group = str(k).split("/")
                    group = "/".join(group[:-1])
                    infile.copy(k, outfile[group])

        infile.visititems(visitor)

    try:
        with h5py.File(filename, "r", driver="core") as infile:
            with h5py.File(filename_out, "w") as outfile:
                copy_and_sanitize_non_cal_data(infile, outfile)
                # get indices of last images in each train
                first_arr = np.squeeze(infile[f"{h5path_idx}/first"]).astype(np.int)
                last_arr = np.concatenate((first_arr[1:], np.array([-1,]))).astype(np.int)
                assert first_arr.size == last_arr.size
                oshape = list(infile[f"{h5path}/data"].shape)
                if len(oshape) == 4:
                    oshape = [oshape[0],]+oshape[2:]
                chunks = (chunksize, oshape[1], oshape[2])
                ddset = outfile.create_dataset(f"{h5path}/data",
                                               oshape, chunks=chunks,
                                               dtype=np.float32,
                                               fletcher32=True)

                mdset = outfile.create_dataset(f"{h5path}/mask",
                                               oshape, chunks=chunks,
                                               dtype=np.uint32,
                                               compression="gzip",
                                               compression_opts=1,
                                               shuffle=True,
                                               fletcher32=True)

                for train in range(first_arr.size):
                    first = first_arr[train]
                    last = last_arr[train]
                    if first == last:
                        continue
                    data = np.squeeze(infile[f"{h5path}/data"][first:last, ...].astype(np.float32))
                    cellId = np.squeeze(infile[f"{h5path}/cellId"][first:last, ...])
                    pulseId = np.squeeze(infile[f"{h5path}/pulseId"][first:last, ...])
                    if not offset_not_found:
                        data[...] -= offset[cellId,...]

                    if hists_signal_low is None:
                        pulseId = np.repeat(pulseId[:, None], data.shape[1], axis=1)
                        pulseId = np.repeat(pulseId[:,:,None], data.shape[2], axis=2)
                        bins = (55, int(pulseId.max()))
                        rnge = [[-5, 50], [0, int(pulseId.max())]]
                        hists_signal_low, low_edges, pulse_edges = np.histogram2d(data.flatten(),
                                                                                  pulseId.flatten(),
                                                                                  bins=bins,
                                                                                  range=rnge)
                        rnge = [[-5, 300], [0, pulseId.max()]]
                        hists_signal_high, high_edges, _ = np.histogram2d(data.flatten(),
                                                                          pulseId.flatten(),
                                                                          bins=bins,
                                                                          range=rnge)
                    ddset[first:last, ...] = data

                # find static and noisy values in dark images
                data = infile[f"{h5path}/data"][last, ...].astype(np.float32)
                bpix = np.zeros(oshape[1:], np.uint32)
                dark_std = np.std(data, axis=0)
                bpix[dark_std > noisy_pix_threshold] = BadPixels.NOISE_OUT_OF_THRESHOLD.value

                for i in range(8):
                    for j in range(2):
                        count_noise = np.count_nonzero(bpix[i*64:(i+1)*64, j*64:(j+1)*64])
                        asic_std = np.std(data[:, i*64:(i+1)*64, j*64:(j+1)*64])
                        if mask_noisy_asic:
                            if count_noise/(64*64) > mask_noisy_asic:
                                bpix[i*64:(i+1)*64, j*64:(j+1)*64] = BadPixels.NOISY_ADC.value

                        if mask_cold_asic:
                            count_cold = np.count_nonzero(asic_std < 0.5)
                            if count_cold/(64*64) > mask_cold_asic:
                                bpix[i*64:(i+1)*64, j*64:(j+1)*64] = BadPixels.ASIC_STD_BELOW_NOISE.value

    except Exception as e:
        print(e)
        success = False
        reason = "Error"
        err = e

    if err is None and offset_not_found:
        err = "Offset not found in database!. No offset correction applied."

    return (hists_signal_low, hists_signal_high, low_edges, high_edges, pulse_edges, when, qm, err)

 done = False
 first_files = {}
 inp = []
 left = total_sequences

 hists_signal_low = 0
 hists_signal_high = 0

 low_edges, high_edges, pulse_edges = None, None, None

 tGain, encodedGain, operatingFreq = get_dssc_ctrl_data(in_folder\
                                + "/r{:04d}/".format(run),\
-                                slow_data_pattern,slow_data_aggregators, run)
+                                slow_data_pattern,slow_data_aggregators, run, slow_data_path)

 whens = []
 qms = []
 Errors = []
 while not done:
    dones = []
    for i, k_da in zip(modules, karabo_da):
        qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)

        if qm in mapped_files:
            if not mapped_files[qm].empty():
                fname_in = str(mapped_files[qm].get())
                dones.append(mapped_files[qm].empty())
            else:
                print(f"{qm} file is missing")
                continue
        else:
            print(f"Skipping {qm}")
            continue
        fout = os.path.abspath("{}/{}".format(out_folder, (os.path.split(fname_in)[-1]).replace("RAW", "CORR")))

        first_files[i] = (fname_in, fout)
        conditions = {}
        conditions['acquisition_rate'] = operatingFreq[qm]
        conditions['target_gain'] = tGain[qm]
        conditions['encoded_gain'] = encodedGain[qm]
        inp.append((fname_in, fout, i, k_da, qm, conditions))

    if len(inp) >= min(MAX_PAR, left):
        print(f"Running {len(inp)} tasks parallel")
        p = partial(correct_module, total_sequences, sequences_qm,
                    karabo_id, dinstance, mask_noisy_asic, mask_cold_asic,
                    noisy_pix_threshold, chunk_size_idim, mem_cells,
                    bias_voltage, cal_db_timeout, creation_time, cal_db_interface,
                    h5path, h5path_idx)

        r = view.map_sync(p, inp)
        #r = list(map(p, inp))

        inp = []
        left -= MAX_PAR

        for rr in r:
            if rr is not None:
                hl, hh, low_edges, high_edges, pulse_edges, when, qm, err = rr
                whens.append(when)
                qms.append(qm)
                Errors.append(err)
                if hl is not None:  # any one being None will also make the others None
                    hists_signal_low += hl.astype(np.float64)
                    hists_signal_high += hh.astype(np.float64)

    done = all(dones)

 whens = [x for _,x in sorted(zip(qms,whens))]
 qms = sorted(qms)
 for i, qm in enumerate(qms):
    try:
        when = whens[i].isoformat()
    except:
        when = whens[i]
    if Errors[i] is not None:

        # Avoid writing wrong injection date if cons. not found.
        if "not found" in str(Errors[i]):
            print(f"ERROR! {qm}: {Errors[i]}")
        else:
            print(f"Offset for {qm} was injected on {when}, ERROR!: {Errors[i]}")
    else:
        print(f"Offset for {qm} was injected on {when}")
 ```

 %% Cell type:code id: tags:

 ``` python
 import matplotlib.pyplot as plt
 import numpy as np
 from matplotlib import cm
 from matplotlib.ticker import FormatStrFormatter, LinearLocator
 from mpl_toolkits.mplot3d import Axes3D

 %matplotlib inline
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10,10))
    ax = fig.gca(projection='3d')

    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)

    Z = data.T

    # Plot the surface.
    surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,
                           linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis):
    from matplotlib.colors import LogNorm
    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1,:], extent=extent, aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(data)))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")


 ```

 %% Cell type:markdown id: tags:

 ## Mean Intensity per Pulse ##

 The following plots show the mean signal for each pulse in a detailed and expanded intensity region.

 %% Cell type:code id: tags:

 ``` python
 do_3d_plot(hists_signal_low, [low_edges, pulse_edges], "Signal (ADU)", "Pulse id")
 do_2d_plot(hists_signal_low, [low_edges, pulse_edges], "Signal (ADU)", "Pulse id")
 do_3d_plot(hists_signal_high, [high_edges, pulse_edges], "Signal (ADU)", "Pulse id")
 do_2d_plot(hists_signal_high, [high_edges, pulse_edges], "Signal (ADU)", "Pulse id")
 ```

 %% Cell type:code id: tags:

 ``` python
 corrected = []
 raw = []
 mask = []
 pulse_ids = []
 train_ids = []
 for channel, ff in first_files.items():
    try:
        raw_file, corr_file = ff
        data_path = h5path.format(channel)
        index_path = h5path_idx.format(channel)
        try:
            infile = h5py.File(raw_file, "r")
            first_idx = int(np.array(infile[f"{index_path}/first"])[0])

            raw_d = np.array(infile[f"{data_path}/data"])
            # Use first 128 images for plotting
            if raw_d.shape[0] >= 128:
                # random number for plotting
                plt_im = 128
            else:
                plt_im = d.shape[0]
            last_idx = first_idx + plt_im
            raw.append((channel,raw_d[first_idx:last_idx,0,...]))
        finally:
            infile.close()

        infile = h5py.File(corr_file, "r")
        try:
            corrected.append((channel, np.array(infile[f"{data_path}/data"][first_idx:last_idx,...])))
            mask.append((channel, np.array(infile[f"{data_path}/mask"][first_idx:last_idx,...])))
            pulse_ids.append((channel, np.squeeze(infile[f"{data_path}/pulseId"][first_idx:last_idx,...])))
            train_ids.append((channel, np.squeeze(infile[f"{data_path}/trainId"][first_idx:last_idx,...])))
        finally:
            infile.close()

    except Exception as e:
        print(e)
 ```

 %% Cell type:code id: tags:

 ``` python
 def combine_stack(d, sdim):
    combined = np.zeros((sdim, 1300,1300), np.float32)
    combined[...] = 0

    dy = 0
    quad_pos = [
        (0, 145),
        (130, 140),
        (125, 15),
        (0, 15),

    ]

    px = 0.236
    py = 0.204
    with h5py.File(geo_file, "r") as gf:
        # TODO: refactor to -> for ch, f in d:
        for i in range(len(d)):

            ch = d[i][0]

            mi = 3-(ch%4)
            mp = gf["Q{}/M{}/Position".format(ch//4+1, mi%4+1)][()]
            t1 = gf["Q{}/M{}/T01/Position".format(ch//4+1, ch%4+1)][()]
            t2 = gf["Q{}/M{}/T02/Position".format(ch//4+1, ch%4+1)][()]
            if ch//4 < 2:
                t1, t2 = t2, t1

            if ch // 4 == 0 or ch // 4 == 1:
                td = d[i][1][:,::-1,:]
            else:
                td = d[i][1][:,:,::-1]

            t1d = td[:,:,:256]
            t2d = td[:,:,256:]

            x0t1 = int((t1[0]+mp[0])/px)
            y0t1 = int((t1[1]+mp[1])/py)
            x0t2 = int((t2[0]+mp[0])/px)
            y0t2 = int((t2[1]+mp[1])/py)

            x0t1 += int(quad_pos[i//4][1]/px)
            x0t2 += int(quad_pos[i//4][1]/px)
            y0t1 += int(quad_pos[i//4][0]/py)+combined.shape[1]//16
            y0t2 += int(quad_pos[i//4][0]/py)+combined.shape[1]//16
            combined[:,y0t1:y0t1+128,x0t1:x0t1+256] = t1d
            combined[:,y0t2:y0t2+128,x0t2:x0t2+256] = t2d

    return combined
 ```

 %% Cell type:code id: tags:

 ``` python
 combined = combine_stack(corrected, last_idx-first_idx)
 combined_raw = combine_stack(raw, last_idx-first_idx)
 combined_mask = combine_stack(mask, last_idx-first_idx)
 ```

 %% Cell type:markdown id: tags:

 ### Mean RAW Preview ###


 %% Cell type:code id: tags:

 ``` python
 display(Markdown("The per pixel mean of the first {} images of the RAW data".format(plt_im)))
 ```

 %% Cell type:code id: tags:

 ``` python
 %matplotlib inline
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.mean(combined_raw[:,...],axis=0),
               vmin=min(0.75*np.median(combined_raw[combined_raw > 0]), -5),
               vmax=max(1.5*np.median(combined_raw[combined_raw > 0]), 50), cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Single Shot Preview ###

 A single shot image from cell 2 of the first train

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 dim = combined[2,...]

 im = ax.imshow(dim, vmin=-0, vmax=max(1.5*np.median(dim[dim > 0]), 50), cmap="jet", interpolation="nearest")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 h = ax.hist(dim.flatten(), bins=100, range=(0, 100))
 ```

 %% Cell type:markdown id: tags:

 ### Mean CORRECTED Preview ###

 %% Cell type:code id: tags:

 ``` python
 display(Markdown("The per pixel mean of the first {} images of the CORRECTED data".format(plt_im)))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.mean(combined[:,...], axis=0), vmin=0,
               vmax=max(1.5*np.median(combined[combined > 0]), 10), cmap="jet", interpolation="nearest")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Max CORRECTED Preview ###

 The per pixel maximum of the first 128 images of the CORRECTED data

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.max(combined[:,...], axis=0), vmin=0,
               vmax=max(100*np.median(combined[combined > 0]), 20), cmap="jet", interpolation="nearest")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 combined[combined <= 0] = 0
 h = ax.hist(combined.flatten(), bins=100, range=(-5, 100), log=True)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 import tabulate
 from cal_tools.enums import BadPixels
 from IPython.display import HTML, Latex, Markdown, display

 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:markdown id: tags:

 ### Full Train Bad Pixels ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.log2(np.max(combined_mask[:,...], axis=0)), vmin=0,
               vmax=32, cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Full Train Bad Pixels - Only Dark Char. Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.max((combined_mask.astype(np.uint32)[:,...] & BadPixels.NOISY_ADC.value) != 0, axis=0), vmin=0,
               vmax=1, cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 # DSSC Offline Correction #

 Author: European XFEL Detector Group, Version: 1.0

 Offline Calibration for the DSSC Detector

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB" # The ipcluster profile to use
-in_folder = "/gpfs/exfel/exp/SCS/202031/p900170/raw" # path to input data, required
-out_folder = "/gpfs/exfel/data/scratch/samartse/test/DSSC" # path to output to, required
+in_folder = "/gpfs/exfel/exp/SQS/202131/p900210/raw" # path to input data, required
+out_folder = "/gpfs/exfel/data/scratch/samartse/data/DSSC" # path to output to, required
 sequences = [-1] # sequence files to evaluate.
 modules = [-1] # modules to correct, set to -1 for all, range allowed
-run = 229 #runs to process, required
+run = 20 #runs to process, required

-karabo_id = "SCS_DET_DSSC1M-1" # karabo karabo_id
+karabo_id = "SQS_DET_DSSC1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = 'INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
 h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
 slow_data_pattern = 'RAW-R{}-DA{}-S00000.h5'

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8020#8025" # the database interface to use
 cal_db_timeout = 300000 # in milli seconds

 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 overwrite = True # set to True if existing data should be overwritten
 max_pulses = 800 # maximum number of pulses per train
 bias_voltage = 100 # detector bias voltage
 sequences_per_node = 1 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
 chunk_size_idim = 1  # chunking size of imaging dimension, adjust if user software is sensitive to this.
 mask_noisy_asic = 0.25 # set to a value other than 0 and below 1 to mask entire ADC if fraction of noisy pixels is above
 mask_cold_asic = 0.25 # mask cold ASICS if number of pixels with negligable standard deviation is larger than this fraction
 noisy_pix_threshold = 1. # threshold above which ap pixel is considered noisy.
 geo_file = "/gpfs/exfel/data/scratch/xcal/dssc_geo_june19.h5" # detector geometry file
 dinstance = "DSSC1M1"
 slow_data_aggregators = [1,2,3,4] #quadrant/aggregator
+slow_data_path = 'SQS_NQS_DSSC/FPGA/PPT_Q'

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)

 ```

 %% Cell type:code id: tags:

 ``` python
 # make sure a cluster is running with ipcluster start --n=32, give it a while to start
 import os
 import sys
 from collections import OrderedDict

 import h5py
 import matplotlib
 import numpy as np

 matplotlib.use("agg")
 import matplotlib.pyplot as plt
 from ipyparallel import Client
 from IPython.display import Latex, Markdown, display

 print(f"Connecting to profile {cluster_profile}")
 view = Client(profile=cluster_profile)[:]
 view.use_dill()

 from datetime import timedelta

 from cal_tools.dssclib import get_dssc_ctrl_data, get_pulseid_checksum
 from cal_tools.tools import (
    get_constant_from_db,
    get_dir_creation_date,
    get_notebook_name,
    map_modules_from_folder,
    parse_runs,
    run_prop_seq_from_path,
 )
 from dateutil import parser
 from iCalibrationDB import Conditions, ConstantMetaData, Constants, Detectors, Versions
 ```

 %% Cell type:code id: tags:

 ``` python
 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time} as creation time")

 if sequences[0] == -1:
    sequences = None

 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)


 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(16))
    karabo_da = ["DSSC{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
 print("Process modules: ",
      ', '.join([f"Q{x // 4 + 1}M{x % 4 + 1}" for x in modules]))

 CHUNK_SIZE = 512
 MAX_PAR = 32

 if in_folder[-1] == "/":
    in_folder = in_folder[:-1]
 print(f"Outputting to {out_folder}")

 if not os.path.exists(out_folder):
    os.makedirs(out_folder)
 elif not overwrite:
    raise AttributeError("Output path exists! Exiting")

 import warnings

 warnings.filterwarnings('ignore')

 print(f"Detector in use is {karabo_id}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)
 mapped_files, mod_ids, total_sequences, sequences_qm, file_size = mmf
 MAX_PAR = min(MAX_PAR, total_sequences)
 ```

 %% Cell type:markdown id: tags:

 ## Processed Files ##

 %% Cell type:code id: tags:

 ``` python
 import copy

 import tabulate
 from IPython.display import HTML, Latex, Markdown, display

 print(f"Processing a total of {total_sequences} sequence files in chunks of {MAX_PAR}")
 table = []
 mfc = copy.copy(mapped_files)
 ti = 0
 for k, files in mfc.items():
    i = 0
    while not files.empty():
        f = files.get()
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 if len(table):
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["#", "module", "# module", "file"])))
 # restore the queue
 mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)
 mapped_files, mod_ids, total_sequences, sequences_qm, file_size = mmf
 ```

 %% Cell type:code id: tags:

 ``` python
 import copy
 from functools import partial


 def correct_module(total_sequences, sequences_qm, karabo_id, dinstance, mask_noisy_asic,
                   mask_cold_asic, noisy_pix_threshold, chunksize, mem_cells, bias_voltage,
                   cal_db_timeout, creation_time, cal_db_interface, h5path, h5path_idx, inp):

    import binascii
    import copy
    import struct
    from hashlib import blake2b

    import h5py
    import numpy as np
    from cal_tools.dssclib import get_dssc_ctrl_data, get_pulseid_checksum
    from cal_tools.enums import BadPixels
    from cal_tools.tools import get_constant_from_db_and_time
    from iCalibrationDB import (
        Conditions,
        ConstantMetaData,
        Constants,
        Detectors,
        Versions,
    )

    filename, filename_out, channel, karabo_da, qm, conditions = inp

    # DSSC correction requires path without the leading "/"
    if h5path[0] == '/':
        h5path = h5path[1:]
    if h5path_idx[0] == '/':
        h5path_idx = h5path_idx[1:]

    h5path = h5path.format(channel)
    h5path_idx = h5path_idx.format(channel)

    low_edges = None
    hists_signal_low = None
    high_edges = None
    hists_signal_high = None
    pulse_edges = None
    err = None
    offset_not_found = False
    def get_num_cells(fname, h5path):
        with h5py.File(fname, "r") as f:

            cells = f[f"{h5path}/cellId"][()]
            maxcell = np.max(cells)
            options = [100, 200, 400, 500, 600, 700, 800]
            dists = np.array([(o-maxcell) for o in options])
            dists[dists<0] = 10000 # assure to always go higher
            return options[np.argmin(dists)]

    if mem_cells == 0:
        mem_cells = get_num_cells(filename, h5path)

    pulseid_checksum = get_pulseid_checksum(filename, h5path, h5path_idx)

    print(f"Memcells: {mem_cells}")

    condition =  Conditions.Dark.DSSC(bias_voltage=bias_voltage, memory_cells=mem_cells,\
                                      pulseid_checksum=pulseid_checksum,\
                                      acquisition_rate=conditions['acquisition_rate'],\
                                      target_gain=conditions['target_gain'],\
                                      encoded_gain=conditions['encoded_gain'])

    detinst = getattr(Detectors, dinstance)
    device = getattr(detinst, qm)
    with h5py.File(filename, "r", driver="core") as infile:
        y = infile[f"{h5path}/data"].shape[2]
        x = infile[f"{h5path}/data"].shape[3]
    offset, when = get_constant_from_db_and_time(karabo_id, karabo_da,
                                                 Constants.DSSC.Offset(),
                                                 condition,
                                                 None,
                                                 cal_db_interface,
                                                 creation_time=creation_time,
                                                 timeout=cal_db_timeout)
    if offset is not None:
        offset = np.moveaxis(np.moveaxis(offset[...], 2, 0), 2, 1)
    else:
        offset_not_found = True
        print("No offset found in the database")

    def copy_and_sanitize_non_cal_data(infile, outfile):
        # these are touched in the correct function, do not copy them here
        dont_copy = ["data"]
        dont_copy = [h5path + "/{}".format(do)
                     for do in dont_copy]

        # a visitor to copy everything else
        def visitor(k, item):
            if k not in dont_copy:

                if isinstance(item, h5py.Group):
                    outfile.create_group(k)
                elif isinstance(item, h5py.Dataset):
                    group = str(k).split("/")
                    group = "/".join(group[:-1])
                    infile.copy(k, outfile[group])

        infile.visititems(visitor)

    try:
        with h5py.File(filename, "r", driver="core") as infile:
            with h5py.File(filename_out, "w") as outfile:
                copy_and_sanitize_non_cal_data(infile, outfile)
                # get indices of last images in each train
                first_arr = np.squeeze(infile[f"{h5path_idx}/first"]).astype(np.int)
                last_arr = np.concatenate((first_arr[1:], np.array([-1,]))).astype(np.int)
                assert first_arr.size == last_arr.size
                oshape = list(infile[f"{h5path}/data"].shape)
                if len(oshape) == 4:
                    oshape = [oshape[0],]+oshape[2:]
                chunks = (chunksize, oshape[1], oshape[2])
                ddset = outfile.create_dataset(f"{h5path}/data",
                                               oshape, chunks=chunks,
                                               dtype=np.float32,
                                               fletcher32=True)

                mdset = outfile.create_dataset(f"{h5path}/mask",
                                               oshape, chunks=chunks,
                                               dtype=np.uint32,
                                               compression="gzip",
                                               compression_opts=1,
                                               shuffle=True,
                                               fletcher32=True)

                for train in range(first_arr.size):
                    first = first_arr[train]
                    last = last_arr[train]
                    if first == last:
                        continue
                    data = np.squeeze(infile[f"{h5path}/data"][first:last, ...].astype(np.float32))
                    cellId = np.squeeze(infile[f"{h5path}/cellId"][first:last, ...])
                    pulseId = np.squeeze(infile[f"{h5path}/pulseId"][first:last, ...])
                    if not offset_not_found:
                        data[...] -= offset[cellId,...]

                    if hists_signal_low is None:
                        pulseId = np.repeat(pulseId[:, None], data.shape[1], axis=1)
                        pulseId = np.repeat(pulseId[:,:,None], data.shape[2], axis=2)
                        bins = (55, int(pulseId.max()))
                        rnge = [[-5, 50], [0, int(pulseId.max())]]
                        hists_signal_low, low_edges, pulse_edges = np.histogram2d(data.flatten(),
                                                                                  pulseId.flatten(),
                                                                                  bins=bins,
                                                                                  range=rnge)
                        rnge = [[-5, 300], [0, pulseId.max()]]
                        hists_signal_high, high_edges, _ = np.histogram2d(data.flatten(),
                                                                          pulseId.flatten(),
                                                                          bins=bins,
                                                                          range=rnge)
                    ddset[first:last, ...] = data

                # find static and noisy values in dark images
                data = infile[f"{h5path}/data"][last, ...].astype(np.float32)
                bpix = np.zeros(oshape[1:], np.uint32)
                dark_std = np.std(data, axis=0)
                bpix[dark_std > noisy_pix_threshold] = BadPixels.NOISE_OUT_OF_THRESHOLD.value

                for i in range(8):
                    for j in range(2):
                        count_noise = np.count_nonzero(bpix[i*64:(i+1)*64, j*64:(j+1)*64])
                        asic_std = np.std(data[:, i*64:(i+1)*64, j*64:(j+1)*64])
                        if mask_noisy_asic:
                            if count_noise/(64*64) > mask_noisy_asic:
                                bpix[i*64:(i+1)*64, j*64:(j+1)*64] = BadPixels.NOISY_ADC.value

                        if mask_cold_asic:
                            count_cold = np.count_nonzero(asic_std < 0.5)
                            if count_cold/(64*64) > mask_cold_asic:
                                bpix[i*64:(i+1)*64, j*64:(j+1)*64] = BadPixels.ASIC_STD_BELOW_NOISE.value

    except Exception as e:
        print(e)
        success = False
        reason = "Error"
        err = e

    if err is None and offset_not_found:
        err = "Offset not found in database!. No offset correction applied."

    return (hists_signal_low, hists_signal_high, low_edges, high_edges, pulse_edges, when, qm, err)

 done = False
 first_files = {}
 inp = []
 left = total_sequences

 hists_signal_low = 0
 hists_signal_high = 0

 low_edges, high_edges, pulse_edges = None, None, None

 tGain, encodedGain, operatingFreq = get_dssc_ctrl_data(in_folder\
                                + "/r{:04d}/".format(run),\
-                                slow_data_pattern,slow_data_aggregators, run)
+                                slow_data_pattern,slow_data_aggregators, run, slow_data_path)

 whens = []
 qms = []
 Errors = []
 while not done:
    dones = []
    for i, k_da in zip(modules, karabo_da):
        qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)

        if qm in mapped_files:
            if not mapped_files[qm].empty():
                fname_in = str(mapped_files[qm].get())
                dones.append(mapped_files[qm].empty())
            else:
                print(f"{qm} file is missing")
                continue
        else:
            print(f"Skipping {qm}")
            continue
        fout = os.path.abspath("{}/{}".format(out_folder, (os.path.split(fname_in)[-1]).replace("RAW", "CORR")))

        first_files[i] = (fname_in, fout)
        conditions = {}
        conditions['acquisition_rate'] = operatingFreq[qm]
        conditions['target_gain'] = tGain[qm]
        conditions['encoded_gain'] = encodedGain[qm]
        inp.append((fname_in, fout, i, k_da, qm, conditions))

    if len(inp) >= min(MAX_PAR, left):
        print(f"Running {len(inp)} tasks parallel")
        p = partial(correct_module, total_sequences, sequences_qm,
                    karabo_id, dinstance, mask_noisy_asic, mask_cold_asic,
                    noisy_pix_threshold, chunk_size_idim, mem_cells,
                    bias_voltage, cal_db_timeout, creation_time, cal_db_interface,
                    h5path, h5path_idx)

        r = view.map_sync(p, inp)
        #r = list(map(p, inp))

        inp = []
        left -= MAX_PAR

        for rr in r:
            if rr is not None:
                hl, hh, low_edges, high_edges, pulse_edges, when, qm, err = rr
                whens.append(when)
                qms.append(qm)
                Errors.append(err)
                if hl is not None:  # any one being None will also make the others None
                    hists_signal_low += hl.astype(np.float64)
                    hists_signal_high += hh.astype(np.float64)

    done = all(dones)

 whens = [x for _,x in sorted(zip(qms,whens))]
 qms = sorted(qms)
 for i, qm in enumerate(qms):
    try:
        when = whens[i].isoformat()
    except:
        when = whens[i]
    if Errors[i] is not None:

        # Avoid writing wrong injection date if cons. not found.
        if "not found" in str(Errors[i]):
            print(f"ERROR! {qm}: {Errors[i]}")
        else:
            print(f"Offset for {qm} was injected on {when}, ERROR!: {Errors[i]}")
    else:
        print(f"Offset for {qm} was injected on {when}")
 ```

 %% Cell type:code id: tags:

 ``` python
 import matplotlib.pyplot as plt
 import numpy as np
 from matplotlib import cm
 from matplotlib.ticker import FormatStrFormatter, LinearLocator
 from mpl_toolkits.mplot3d import Axes3D

 %matplotlib inline
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10,10))
    ax = fig.gca(projection='3d')

    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)

    Z = data.T

    # Plot the surface.
    surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,
                           linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis):
    from matplotlib.colors import LogNorm
    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1,:], extent=extent, aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(data)))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")


 ```

 %% Cell type:markdown id: tags:

 ## Mean Intensity per Pulse ##

 The following plots show the mean signal for each pulse in a detailed and expanded intensity region.

 %% Cell type:code id: tags:

 ``` python
 do_3d_plot(hists_signal_low, [low_edges, pulse_edges], "Signal (ADU)", "Pulse id")
 do_2d_plot(hists_signal_low, [low_edges, pulse_edges], "Signal (ADU)", "Pulse id")
 do_3d_plot(hists_signal_high, [high_edges, pulse_edges], "Signal (ADU)", "Pulse id")
 do_2d_plot(hists_signal_high, [high_edges, pulse_edges], "Signal (ADU)", "Pulse id")
 ```

 %% Cell type:code id: tags:

 ``` python
 corrected = []
 raw = []
 mask = []
 pulse_ids = []
 train_ids = []
 for channel, ff in first_files.items():
    try:
        raw_file, corr_file = ff
        data_path = h5path.format(channel)
        index_path = h5path_idx.format(channel)
        try:
            infile = h5py.File(raw_file, "r")
            first_idx = int(np.array(infile[f"{index_path}/first"])[0])

            raw_d = np.array(infile[f"{data_path}/data"])
            # Use first 128 images for plotting
            if raw_d.shape[0] >= 128:
                # random number for plotting
                plt_im = 128
            else:
                plt_im = d.shape[0]
            last_idx = first_idx + plt_im
            raw.append((channel,raw_d[first_idx:last_idx,0,...]))
        finally:
            infile.close()

        infile = h5py.File(corr_file, "r")
        try:
            corrected.append((channel, np.array(infile[f"{data_path}/data"][first_idx:last_idx,...])))
            mask.append((channel, np.array(infile[f"{data_path}/mask"][first_idx:last_idx,...])))
            pulse_ids.append((channel, np.squeeze(infile[f"{data_path}/pulseId"][first_idx:last_idx,...])))
            train_ids.append((channel, np.squeeze(infile[f"{data_path}/trainId"][first_idx:last_idx,...])))
        finally:
            infile.close()

    except Exception as e:
        print(e)
 ```

 %% Cell type:code id: tags:

 ``` python
 def combine_stack(d, sdim):
    combined = np.zeros((sdim, 1300,1300), np.float32)
    combined[...] = 0

    dy = 0
    quad_pos = [
        (0, 145),
        (130, 140),
        (125, 15),
        (0, 15),

    ]

    px = 0.236
    py = 0.204
    with h5py.File(geo_file, "r") as gf:
        # TODO: refactor to -> for ch, f in d:
        for i in range(len(d)):

            ch = d[i][0]

            mi = 3-(ch%4)
            mp = gf["Q{}/M{}/Position".format(ch//4+1, mi%4+1)][()]
            t1 = gf["Q{}/M{}/T01/Position".format(ch//4+1, ch%4+1)][()]
            t2 = gf["Q{}/M{}/T02/Position".format(ch//4+1, ch%4+1)][()]
            if ch//4 < 2:
                t1, t2 = t2, t1

            if ch // 4 == 0 or ch // 4 == 1:
                td = d[i][1][:,::-1,:]
            else:
                td = d[i][1][:,:,::-1]

            t1d = td[:,:,:256]
            t2d = td[:,:,256:]

            x0t1 = int((t1[0]+mp[0])/px)
            y0t1 = int((t1[1]+mp[1])/py)
            x0t2 = int((t2[0]+mp[0])/px)
            y0t2 = int((t2[1]+mp[1])/py)

            x0t1 += int(quad_pos[i//4][1]/px)
            x0t2 += int(quad_pos[i//4][1]/px)
            y0t1 += int(quad_pos[i//4][0]/py)+combined.shape[1]//16
            y0t2 += int(quad_pos[i//4][0]/py)+combined.shape[1]//16
            combined[:,y0t1:y0t1+128,x0t1:x0t1+256] = t1d
            combined[:,y0t2:y0t2+128,x0t2:x0t2+256] = t2d

    return combined
 ```

 %% Cell type:code id: tags:

 ``` python
 combined = combine_stack(corrected, last_idx-first_idx)
 combined_raw = combine_stack(raw, last_idx-first_idx)
 combined_mask = combine_stack(mask, last_idx-first_idx)
 ```

 %% Cell type:markdown id: tags:

 ### Mean RAW Preview ###


 %% Cell type:code id: tags:

 ``` python
 display(Markdown("The per pixel mean of the first {} images of the RAW data".format(plt_im)))
 ```

 %% Cell type:code id: tags:

 ``` python
 %matplotlib inline
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.mean(combined_raw[:,...],axis=0),
               vmin=min(0.75*np.median(combined_raw[combined_raw > 0]), -5),
               vmax=max(1.5*np.median(combined_raw[combined_raw > 0]), 50), cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Single Shot Preview ###

 A single shot image from cell 2 of the first train

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 dim = combined[2,...]

 im = ax.imshow(dim, vmin=-0, vmax=max(1.5*np.median(dim[dim > 0]), 50), cmap="jet", interpolation="nearest")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 h = ax.hist(dim.flatten(), bins=100, range=(0, 100))
 ```

 %% Cell type:markdown id: tags:

 ### Mean CORRECTED Preview ###

 %% Cell type:code id: tags:

 ``` python
 display(Markdown("The per pixel mean of the first {} images of the CORRECTED data".format(plt_im)))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.mean(combined[:,...], axis=0), vmin=0,
               vmax=max(1.5*np.median(combined[combined > 0]), 10), cmap="jet", interpolation="nearest")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Max CORRECTED Preview ###

 The per pixel maximum of the first 128 images of the CORRECTED data

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.max(combined[:,...], axis=0), vmin=0,
               vmax=max(100*np.median(combined[combined > 0]), 20), cmap="jet", interpolation="nearest")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 combined[combined <= 0] = 0
 h = ax.hist(combined.flatten(), bins=100, range=(-5, 100), log=True)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 import tabulate
 from cal_tools.enums import BadPixels
 from IPython.display import HTML, Latex, Markdown, display

 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:markdown id: tags:

 ### Full Train Bad Pixels ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.log2(np.max(combined_mask[:,...], axis=0)), vmin=0,
               vmax=32, cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Full Train Bad Pixels - Only Dark Char. Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.max((combined_mask.astype(np.uint32)[:,...] & BadPixels.NOISY_ADC.value) != 0, axis=0), vmin=0,
               vmax=1, cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

--- a/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Jungfrau Dark Image Characterization #

 Version: 0.1, Author: M. Ramilli, S. Hauf

 Analyzes Jungfrau dark image data to deduce offset, noise and resulting bad pixel maps

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = 'noDB'  # the ipcluster profile name
 in_folder = '/gpfs/exfel/exp/SPB/202130/p900204/raw/'  # folder under which runs are located, required
 out_folder = '/gpfs/exfel/data/scratch/ahmedk/jftest_dark/' # path to place reports at, required
 run_high = 141 # run number for G0 dark run, required
 run_med = 142 # run number for G1 dark run, required
 run_low = 143 # run number for G2 dark run, required

 karabo_da = ['JNGFR01', 'JNGFR02','JNGFR03','JNGFR04', 'JNGFR05', 'JNGFR06','JNGFR07','JNGFR08'] # list of data aggregators, which corresponds to different JF modules
 karabo_id = "SPB_IRDA_JF4M"  # karabo_id (detector identifier) prefix of Jungfrau detector to process.
 karabo_id_control = ""  # if control is on a different ID, set to empty string if it is the same a karabo-id
 receiver_id = 'JNGFR{:02}' # inset for receiver devices
 receiver_control_id = "CONTROL" # inset for control devices
 path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5'  # template to use for file name, double escape sequence number
 h5path = '/INSTRUMENT/{}/DET/{}:daqOutput/data'  # path in H5 file under which images are located
 h5path_run = '/RUN/{}/DET/{}' # path to run data
 h5path_cntrl = '/CONTROL/{}/DET/{}' # path to control data
 karabo_da_control = "JNGFRCTRL00" # file inset for control data

 use_dir_creation_date = True # use dir creation date
 cal_db_interface = 'tcp://max-exfl016:8016'  # calibrate db interface to connect to
 cal_db_timeout = 300000 # timeout on caldb requests
 local_output = True # output constants locally
 db_output = False # output constants to database

 integration_time = 1000 # integration time in us, will be overwritten by value in file
 bias_voltage = 90  # sensor bias voltage in V, will be overwritten by value in file
 badpixel_threshold_sigma = 5.  # bad pixels defined by values outside n times this std from median
 offset_abs_threshold_low = [1000, 10000, 10000]  # absolute bad pixel threshold in terms of offset, lower values
 offset_abs_threshold_high = [8000, 15000, 15000]  # absolute bad pixel threshold in terms of offset, upper values
 chunkSize = 10  # iteration chunk size, needs to match or be less than number of images in a sequence file
 imageRange = [0, 500]  # image range in which to evaluate
 memoryCells = 16  # number of memory cells
 db_module = ['Jungfrau_M275', "Jungfrau_M035", 'Jungfrau_M273','Jungfrau_M203','Jungfrau_M221','Jungfrau_M267'] # ID of module in calibration database
 manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values
 time_limits = 0.025 #  to find calibration constants later on, the integration time is allowed to vary by 0.5 us
 operation_mode = ''  # Detector operation mode, optional
 ```

 %% Cell type:code id: tags:

 ``` python
 import glob
 import os
 import warnings

 warnings.filterwarnings('ignore')

 import h5py
 import matplotlib
 from h5py import File as h5file

 matplotlib.use('agg')
 import matplotlib.pyplot as plt

 %matplotlib inline
 import numpy as np
 from cal_tools.ana_tools import save_dict_to_hdf5
 from cal_tools.enums import BadPixels
 from cal_tools.tools import (
    get_dir_creation_date,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    save_const_to_h5,
    send_to_db,
 )
 from iCalibrationDB import Conditions, Constants, Detectors, Versions
 from XFELDetAna.util import env

 env.iprofile = cluster_profile
 from XFELDetAna.detectors.jungfrau import reader as jfreader
 from XFELDetAna.detectors.jungfrau import readerPSI as jfreaderPSI
 from XFELDetAna.detectors.jungfrau.jf_chunk_reader import JFChunkReader
 from XFELDetAna.detectors.jungfrau.util import (
    count_n_files,
    rollout_data,
    sanitize_data_cellid,
 )
 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.histogram import histPlot
 ```

 %% Cell type:code id: tags:

 ``` python
 path_inset = karabo_da[0] # karabo_da is a concurrency parameter
 receiver_id = receiver_id.format(int(path_inset[-2:]))
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_high, run_med, run_low)

 report = get_report(out_folder)

 os.makedirs(out_folder, exist_ok=True)

 # TODO
 # this trick is needed until proper mapping is introduced
 if len(db_module)>1:
    # TODO: SPB JF Hack till using all modules.
    if karabo_id == "SPB_IRDA_JNGFR" and int(path_inset[-2:]) > 5:
        db_module = db_module[int(path_inset[-2:])-3]
    else:
        db_module = db_module[int(path_inset[-2:])-1]
 else:
    db_module = db_module[0]

 # Constants relevant for the analysis
 run_nums = [run_high, run_med, run_low] # run number for G0/HG0, G1, G2
 sensorSize = [1024, 512]
 blockSize = [1024, 512]
 xRange = [0, 0+sensorSize[0]]
 yRange = [0, 0+sensorSize[1]]
 gains = [0, 1, 2]
 h5path = h5path.format(karabo_id, receiver_id)
 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)
    print("Using {} as creation time".format(creation_time))

 cal_db_interface = get_random_db_interface(cal_db_interface)
 print('Calibration database interface: {}'.format(cal_db_interface))

 offset_abs_threshold = [offset_abs_threshold_low, offset_abs_threshold_high]

 if karabo_id_control == "":
    karabo_id_control = karabo_id

 print('Path inset ', path_inset)
 print('Receiver Id ', receiver_id)
 ```

 %% Cell type:code id: tags:

 ``` python
 def check_memoryCells(file_name, path):
    with h5file(file_name, 'r') as f:
        t_stamp = np.array(f[path + '/storageCells/timestamp'])
        st_cells = np.array(f[path + '/storageCells/value'])
        sc_start = np.array(f[path + '/storageCellStart/value'])

    valid_train = t_stamp > 0
    n_scs = st_cells[valid_train][0] + 1
    sc_s = sc_start[valid_train][0]

    return n_scs, sc_s
 ```

 %% Cell type:code id: tags:

 ``` python
 chunkSize = 100
 filep_size = 1000
 noiseCal = None
 noise_map = None
 offset_map = None
 memoryCells = None
 for i, r_n in enumerate(run_nums):

    gain = i
    print(f"Gain stage {gain}, run {r_n}")
    valid_data = []
    valid_cellids = []
    if r_n is not None:
        n_tr = 0
        n_empty_trains = 0
        n_empty_sc = 0

        ped_dir = "{}/r{:04d}/".format(in_folder, r_n)
        fp_name = path_template.format(r_n, karabo_da_control)
        fp_path = '{}/{}'.format(ped_dir, fp_name)

-        n_files = len(glob.glob("{}/*{}*.h5".format(ped_dir, path_inset)))
+        files_pattern = "{}/*{}*.h5".format(ped_dir, path_inset)
+        n_files = len(glob.glob(files_pattern))
+        if n_files == 0:
+            raise Exception(f"No files found matching {files_pattern!r}")
+
        myRange = range(0, n_files)
        control_path = h5path_cntrl.format(karabo_id_control, receiver_control_id)

        this_run_mcells, sc_start = check_memoryCells(fp_path.format(0).format(myRange[0]), control_path)

        if noise_map is None:
            if not manual_slow_data:
                with h5py.File(fp_path.format(0), 'r') as f:
                    run_path = h5path_run.format(karabo_id_control, receiver_control_id)
                    integration_time = float(f[f'{run_path}/exposureTime/value'][()]*1e6)
                    bias_voltage = int(np.squeeze(f[f'{run_path}/vHighVoltage/value'])[0])
            print("Integration time is {} us".format(integration_time))
            print("Bias voltage is {} V".format(bias_voltage))
            if this_run_mcells == 1:
                memoryCells = 1
                print('Dark runs in single cell mode\n storage cell start: {:02d}'.format(sc_start))
            else:
                memoryCells = 16
                print('Dark runs in burst mode\n storage cell start: {:02d}'.format(sc_start))

            noise_map = np.zeros(sensorSize+[memoryCells, 3])
            offset_map = np.zeros(sensorSize+[memoryCells, 3])

        fp_name = path_template.format(r_n, path_inset)
        fp_path = '{}/{}'.format(ped_dir, fp_name)
        path = h5path

        print("Reading data from {}".format(fp_path))
        print("Run is: {}".format(r_n))
        print("HDF5 path: {}".format(h5path))

        imageRange = [0, filep_size*len(myRange)]
        reader = JFChunkReader(filename = fp_path, readFun = jfreader.readData, size = filep_size, chunkSize = chunkSize,
                               path = h5path, image_range=imageRange, pixels_x = sensorSize[0], pixels_y = sensorSize[1],
                               x_range = xRange, y_range = yRange, imagesPerChunk=chunkSize, filesRange = myRange,
                               memoryCells=this_run_mcells, blockSize=blockSize)

        for data in reader.readChunks():

            images = np.array(data[0], dtype=np.float)
            gainmaps = np.array(data[1], dtype=np.uint16)
            trainId = np.array(data[2])
            fr_num = np.array(data[3])
            acelltable = np.array(data[4])
            n_tr += acelltable.shape[-1]
            this_tr = acelltable.shape[-1]



            idxs = np.nonzero(trainId)[0]
            images = images[..., idxs]
            gainmaps = gainmaps[..., idxs]
            fr_num = fr_num[..., idxs]
            acelltable = acelltable[..., idxs]

            if memoryCells == 1:
                acelltable -= sc_start

            n_empty_trains += this_tr - acelltable.shape[-1]
            n_empty_sc += len(acelltable[acelltable > 15])

            if i > 0 and memoryCells == 16: ## throwing away all the SC entries except the first for lower gains
                acelltable[1:] = 255

            # makes 4-dim vecs into 3-dim
            # makes 2-dim into 1-dim
            # leaves  1-dim and 3-dim vecs

            images, gainmaps, acelltable = rollout_data([images, gainmaps, acelltable])

            images, gainmaps, acelltable = sanitize_data_cellid([images, gainmaps], acelltable) # removes entries with cellID 255
            valid_data.append(images)
            valid_cellids.append(acelltable)

        valid_data = np.concatenate(valid_data, axis=2)
        valid_cellids = np.concatenate(valid_cellids, axis=0)

        for cell in range(memoryCells):

            thiscell = valid_data[...,valid_cellids == cell]
            noise_map[...,cell,gain] = np.std(thiscell, axis=2)
            offset_map[...,cell,gain] = np.mean(thiscell, axis=2)


        print('G{:01d} dark calibration'.format(i))
        print('Missed {:d} out of {:d} trains'.format(n_empty_trains, n_tr))
        print('Lost {:d} images out of {:d}'.format(n_empty_sc, this_run_mcells * (n_tr - n_empty_trains)))

    else:
        print('missing G{:01d}'.format(i))
 ```

 %% Cell type:markdown id: tags:

 ## Offset and Noise Maps ##

 Below offset and noise maps for the high ($g_0$) gain stage are shown, alongside the distribution of these values. One expects block-like structures mapping to the ASICs of the detector

 %% Cell type:code id: tags:

 ``` python
 import matplotlib.pyplot as plt
 from XFELDetAna.core.util import remove_nans

 %matplotlib inline
 #%matplotlib notebook

 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.histogram import histPlot

 g_name = ['G0', 'G1', 'G2']
 g_range = [(0, 8000), (8000, 16000), (8000, 16000)]
 n_range = [(0., 50.), (0., 50.), (0., 50.)]

 unit = '[ADCu]'
 ```

 %% Cell type:code id: tags:

 ``` python
 for g_idx in gains:
    for cell in range(0, memoryCells):
        f_o0 = heatmapPlot(np.swapaxes(offset_map[..., cell, g_idx], 0, 1),
                           y_label="Row",
                           x_label="Column",
                           lut_label=unit,
                           aspect=1.,
                           vmin=g_range[g_idx][0],
                           vmax=g_range[g_idx][1],
                           title=f'Pedestal {g_name[g_idx]} - Cell {cell:02d}')

        fo0, ax_o0 = plt.subplots()
        res_o0 = histPlot(ax_o0, offset_map[..., cell, g_idx],
                          bins=800,
                          range=g_range[g_idx],
                          facecolor='b',
                          histotype='stepfilled')

        ax_o0.tick_params(axis='both',which='major',labelsize=15)
        ax_o0.set_title(f'Module pedestal distribution - Cell {cell:02d}', fontsize=15)
        ax_o0.set_xlabel(f'Pedestal {g_name[g_idx]} {unit}',fontsize=15)
        ax_o0.set_yscale('log')

        f_n0 = heatmapPlot(np.swapaxes(noise_map[..., cell, g_idx], 0, 1),
                           y_label="Row",
                           x_label="Column",
                           lut_label= unit,
                           aspect=1.,
                           vmin=n_range[g_idx][0],
                           vmax=n_range[g_idx][1],
                           title=f"RMS noise {g_name[g_idx]} - Cell {cell:02d}")

        fn0, ax_n0 = plt.subplots()
        res_n0 = histPlot(ax_n0, noise_map[..., cell, g_idx],
                          bins=100,
                          range=n_range[g_idx],
                          facecolor='b',
                          histotype='stepfilled')

        ax_n0.tick_params(axis='both',which='major',labelsize=15)
        ax_n0.set_title(f'Module noise distribution - Cell {cell:02d}', fontsize=15)
        ax_n0.set_xlabel(f'RMS noise {g_name[g_idx]} ' + unit, fontsize=15)
        #ax_n0.set_yscale('log')

        plt.show()


 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixel Map ###

 The bad pixel map is deduced by comparing offset and noise of each pixel ($v_i$) and each gain ($g$) against the median value for that gain stage:

 $$
 v_i > \mathrm{median}(v_{k,g}) + n \sigma_{v_{k,g}}
 $$
 or
 $$
 v_i < \mathrm{median}(v_{k,g}) - n \sigma_{v_{k,g}}
 $$

 Values are encoded in a 32 bit mask, where for the dark image deduced bad pixels the following non-zero entries are relevant:

 %% Cell type:code id: tags:

 ``` python
 def print_bp_entry(bp):
    print("{:<30s} {:032b}".format(bp.name, bp.value))

 print_bp_entry(BadPixels.OFFSET_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.NOISE_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.OFFSET_NOISE_EVAL_ERROR)
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixels_map = np.zeros(noise_map.shape, np.uint32)
 def eval_bpidx(d):

    mdn = np.nanmedian(d, axis=(0, 1))[None, None, :, :]
    std = np.nanstd(d, axis=(0, 1))[None, None, :, :]
    idx = (d > badpixel_threshold_sigma*std+mdn) | (d < (-badpixel_threshold_sigma)*std+mdn)

    return idx

 offset_abs_threshold = np.array(offset_abs_threshold)

 bad_pixels_map[eval_bpidx(offset_map)] = BadPixels.OFFSET_OUT_OF_THRESHOLD.value
 bad_pixels_map[~np.isfinite(offset_map)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value
 bad_pixels_map[eval_bpidx(noise_map)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
 bad_pixels_map[~np.isfinite(noise_map)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value
 bad_pixels_map[(offset_map < offset_abs_threshold[0][None, None, None, :]) | (offset_map > offset_abs_threshold[1][None, None, None, :])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value

 for g_idx in gains:
    for cell in range(memoryCells):
        bad_pixels = bad_pixels_map[:, :, cell, g_idx]
        fn_0 = heatmapPlot(np.swapaxes(bad_pixels, 0, 1),
                           y_label="Row",
                           x_label="Column",
                           lut_label=f"Badpixels {g_name[g_idx]} [ADCu]",
                           aspect=1.,
                           vmin=0, title=f'G{g_idx} Bad pixel map - Cell {cell:02d}')
 ```

 %% Cell type:code id: tags:

 ``` python
 # TODO: this cell in the notebook is not designed to run for more than one module
 # Constants need to be able to have constant for each module as for big detectors
 constants = {'Offset': np.moveaxis(offset_map, 0, 1),
             'Noise': np.moveaxis(noise_map, 0, 1),
             'BadPixelsDark': np.moveaxis(bad_pixels_map, 0, 1)}
 md = None
 for key, const_data in constants.items():

    const =  getattr(Constants.jungfrau, key)()
    const.data = const_data

    # set the operating condition
    condition = Conditions.Dark.jungfrau(memory_cells=memoryCells, bias_voltage=bias_voltage,
                                         integration_time=integration_time)

    for parm in condition.parameters:
        if parm.name == "Integration Time":
            parm.lower_deviation = time_limits
            parm.upper_deviation = time_limits

    # This should be used in case of running notebook
    # by a different method other than myMDC which already
    # sends CalCat info.
    # TODO: Set db_module to "" by default in the first cell
    if not db_module:
        db_module = get_pdu_from_db(karabo_id, karabo_da, const,
                                    condition, cal_db_interface,
                                    snapshot_at=creation_time)[0]
    if db_output:
        md = send_to_db(db_module, karabo_id, const, condition,
                        file_loc=file_loc, report_path=report,
                        cal_db_interface=cal_db_interface,
                        creation_time=creation_time,
                        timeout=cal_db_timeout)
    if local_output:
        md = save_const_to_h5(db_module, karabo_id, const, condition,
                              const.data, file_loc, report,
                              creation_time, out_folder)
        print(f"Calibration constant {key} is stored locally at {out_folder}.\n")

 print("Constants parameter conditions are:\n")
 print(f"• Bias voltage: {bias_voltage}\n• Memory cells: {memoryCells}\n"
      f"• Integration time: {integration_time}\n"
      f"• Creation time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:markdown id: tags:

 # Jungfrau Dark Image Characterization #

 Version: 0.1, Author: M. Ramilli, S. Hauf

 Analyzes Jungfrau dark image data to deduce offset, noise and resulting bad pixel maps

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = 'noDB'  # the ipcluster profile name
 in_folder = '/gpfs/exfel/exp/SPB/202130/p900204/raw/'  # folder under which runs are located, required
 out_folder = '/gpfs/exfel/data/scratch/ahmedk/jftest_dark/' # path to place reports at, required
 run_high = 141 # run number for G0 dark run, required
 run_med = 142 # run number for G1 dark run, required
 run_low = 143 # run number for G2 dark run, required

 karabo_da = ['JNGFR01', 'JNGFR02','JNGFR03','JNGFR04', 'JNGFR05', 'JNGFR06','JNGFR07','JNGFR08'] # list of data aggregators, which corresponds to different JF modules
 karabo_id = "SPB_IRDA_JF4M"  # karabo_id (detector identifier) prefix of Jungfrau detector to process.
 karabo_id_control = ""  # if control is on a different ID, set to empty string if it is the same a karabo-id
 receiver_id = 'JNGFR{:02}' # inset for receiver devices
 receiver_control_id = "CONTROL" # inset for control devices
 path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5'  # template to use for file name, double escape sequence number
 h5path = '/INSTRUMENT/{}/DET/{}:daqOutput/data'  # path in H5 file under which images are located
 h5path_run = '/RUN/{}/DET/{}' # path to run data
 h5path_cntrl = '/CONTROL/{}/DET/{}' # path to control data
 karabo_da_control = "JNGFRCTRL00" # file inset for control data

 use_dir_creation_date = True # use dir creation date
 cal_db_interface = 'tcp://max-exfl016:8016'  # calibrate db interface to connect to
 cal_db_timeout = 300000 # timeout on caldb requests
 local_output = True # output constants locally
 db_output = False # output constants to database

 integration_time = 1000 # integration time in us, will be overwritten by value in file
 bias_voltage = 90  # sensor bias voltage in V, will be overwritten by value in file
 badpixel_threshold_sigma = 5.  # bad pixels defined by values outside n times this std from median
 offset_abs_threshold_low = [1000, 10000, 10000]  # absolute bad pixel threshold in terms of offset, lower values
 offset_abs_threshold_high = [8000, 15000, 15000]  # absolute bad pixel threshold in terms of offset, upper values
 chunkSize = 10  # iteration chunk size, needs to match or be less than number of images in a sequence file
 imageRange = [0, 500]  # image range in which to evaluate
 memoryCells = 16  # number of memory cells
 db_module = ['Jungfrau_M275', "Jungfrau_M035", 'Jungfrau_M273','Jungfrau_M203','Jungfrau_M221','Jungfrau_M267'] # ID of module in calibration database
 manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values
 time_limits = 0.025 #  to find calibration constants later on, the integration time is allowed to vary by 0.5 us
 operation_mode = ''  # Detector operation mode, optional
 ```

 %% Cell type:code id: tags:

 ``` python
 import glob
 import os
 import warnings

 warnings.filterwarnings('ignore')

 import h5py
 import matplotlib
 from h5py import File as h5file

 matplotlib.use('agg')
 import matplotlib.pyplot as plt

 %matplotlib inline
 import numpy as np
 from cal_tools.ana_tools import save_dict_to_hdf5
 from cal_tools.enums import BadPixels
 from cal_tools.tools import (
    get_dir_creation_date,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    save_const_to_h5,
    send_to_db,
 )
 from iCalibrationDB import Conditions, Constants, Detectors, Versions
 from XFELDetAna.util import env

 env.iprofile = cluster_profile
 from XFELDetAna.detectors.jungfrau import reader as jfreader
 from XFELDetAna.detectors.jungfrau import readerPSI as jfreaderPSI
 from XFELDetAna.detectors.jungfrau.jf_chunk_reader import JFChunkReader
 from XFELDetAna.detectors.jungfrau.util import (
    count_n_files,
    rollout_data,
    sanitize_data_cellid,
 )
 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.histogram import histPlot
 ```

 %% Cell type:code id: tags:

 ``` python
 path_inset = karabo_da[0] # karabo_da is a concurrency parameter
 receiver_id = receiver_id.format(int(path_inset[-2:]))
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_high, run_med, run_low)

 report = get_report(out_folder)

 os.makedirs(out_folder, exist_ok=True)

 # TODO
 # this trick is needed until proper mapping is introduced
 if len(db_module)>1:
    # TODO: SPB JF Hack till using all modules.
    if karabo_id == "SPB_IRDA_JNGFR" and int(path_inset[-2:]) > 5:
        db_module = db_module[int(path_inset[-2:])-3]
    else:
        db_module = db_module[int(path_inset[-2:])-1]
 else:
    db_module = db_module[0]

 # Constants relevant for the analysis
 run_nums = [run_high, run_med, run_low] # run number for G0/HG0, G1, G2
 sensorSize = [1024, 512]
 blockSize = [1024, 512]
 xRange = [0, 0+sensorSize[0]]
 yRange = [0, 0+sensorSize[1]]
 gains = [0, 1, 2]
 h5path = h5path.format(karabo_id, receiver_id)
 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)
    print("Using {} as creation time".format(creation_time))

 cal_db_interface = get_random_db_interface(cal_db_interface)
 print('Calibration database interface: {}'.format(cal_db_interface))

 offset_abs_threshold = [offset_abs_threshold_low, offset_abs_threshold_high]

 if karabo_id_control == "":
    karabo_id_control = karabo_id

 print('Path inset ', path_inset)
 print('Receiver Id ', receiver_id)
 ```

 %% Cell type:code id: tags:

 ``` python
 def check_memoryCells(file_name, path):
    with h5file(file_name, 'r') as f:
        t_stamp = np.array(f[path + '/storageCells/timestamp'])
        st_cells = np.array(f[path + '/storageCells/value'])
        sc_start = np.array(f[path + '/storageCellStart/value'])

    valid_train = t_stamp > 0
    n_scs = st_cells[valid_train][0] + 1
    sc_s = sc_start[valid_train][0]

    return n_scs, sc_s
 ```

 %% Cell type:code id: tags:

 ``` python
 chunkSize = 100
 filep_size = 1000
 noiseCal = None
 noise_map = None
 offset_map = None
 memoryCells = None
 for i, r_n in enumerate(run_nums):

    gain = i
    print(f"Gain stage {gain}, run {r_n}")
    valid_data = []
    valid_cellids = []
    if r_n is not None:
        n_tr = 0
        n_empty_trains = 0
        n_empty_sc = 0

        ped_dir = "{}/r{:04d}/".format(in_folder, r_n)
        fp_name = path_template.format(r_n, karabo_da_control)
        fp_path = '{}/{}'.format(ped_dir, fp_name)

-        n_files = len(glob.glob("{}/*{}*.h5".format(ped_dir, path_inset)))
+        files_pattern = "{}/*{}*.h5".format(ped_dir, path_inset)
+        n_files = len(glob.glob(files_pattern))
+        if n_files == 0:
+            raise Exception(f"No files found matching {files_pattern!r}")
+
        myRange = range(0, n_files)
        control_path = h5path_cntrl.format(karabo_id_control, receiver_control_id)

        this_run_mcells, sc_start = check_memoryCells(fp_path.format(0).format(myRange[0]), control_path)

        if noise_map is None:
            if not manual_slow_data:
                with h5py.File(fp_path.format(0), 'r') as f:
                    run_path = h5path_run.format(karabo_id_control, receiver_control_id)
                    integration_time = float(f[f'{run_path}/exposureTime/value'][()]*1e6)
                    bias_voltage = int(np.squeeze(f[f'{run_path}/vHighVoltage/value'])[0])
            print("Integration time is {} us".format(integration_time))
            print("Bias voltage is {} V".format(bias_voltage))
            if this_run_mcells == 1:
                memoryCells = 1
                print('Dark runs in single cell mode\n storage cell start: {:02d}'.format(sc_start))
            else:
                memoryCells = 16
                print('Dark runs in burst mode\n storage cell start: {:02d}'.format(sc_start))

            noise_map = np.zeros(sensorSize+[memoryCells, 3])
            offset_map = np.zeros(sensorSize+[memoryCells, 3])

        fp_name = path_template.format(r_n, path_inset)
        fp_path = '{}/{}'.format(ped_dir, fp_name)
        path = h5path

        print("Reading data from {}".format(fp_path))
        print("Run is: {}".format(r_n))
        print("HDF5 path: {}".format(h5path))

        imageRange = [0, filep_size*len(myRange)]
        reader = JFChunkReader(filename = fp_path, readFun = jfreader.readData, size = filep_size, chunkSize = chunkSize,
                               path = h5path, image_range=imageRange, pixels_x = sensorSize[0], pixels_y = sensorSize[1],
                               x_range = xRange, y_range = yRange, imagesPerChunk=chunkSize, filesRange = myRange,
                               memoryCells=this_run_mcells, blockSize=blockSize)

        for data in reader.readChunks():

            images = np.array(data[0], dtype=np.float)
            gainmaps = np.array(data[1], dtype=np.uint16)
            trainId = np.array(data[2])
            fr_num = np.array(data[3])
            acelltable = np.array(data[4])
            n_tr += acelltable.shape[-1]
            this_tr = acelltable.shape[-1]



            idxs = np.nonzero(trainId)[0]
            images = images[..., idxs]
            gainmaps = gainmaps[..., idxs]
            fr_num = fr_num[..., idxs]
            acelltable = acelltable[..., idxs]

            if memoryCells == 1:
                acelltable -= sc_start

            n_empty_trains += this_tr - acelltable.shape[-1]
            n_empty_sc += len(acelltable[acelltable > 15])

            if i > 0 and memoryCells == 16: ## throwing away all the SC entries except the first for lower gains
                acelltable[1:] = 255

            # makes 4-dim vecs into 3-dim
            # makes 2-dim into 1-dim
            # leaves  1-dim and 3-dim vecs

            images, gainmaps, acelltable = rollout_data([images, gainmaps, acelltable])

            images, gainmaps, acelltable = sanitize_data_cellid([images, gainmaps], acelltable) # removes entries with cellID 255
            valid_data.append(images)
            valid_cellids.append(acelltable)

        valid_data = np.concatenate(valid_data, axis=2)
        valid_cellids = np.concatenate(valid_cellids, axis=0)

        for cell in range(memoryCells):

            thiscell = valid_data[...,valid_cellids == cell]
            noise_map[...,cell,gain] = np.std(thiscell, axis=2)
            offset_map[...,cell,gain] = np.mean(thiscell, axis=2)


        print('G{:01d} dark calibration'.format(i))
        print('Missed {:d} out of {:d} trains'.format(n_empty_trains, n_tr))
        print('Lost {:d} images out of {:d}'.format(n_empty_sc, this_run_mcells * (n_tr - n_empty_trains)))

    else:
        print('missing G{:01d}'.format(i))
 ```

 %% Cell type:markdown id: tags:

 ## Offset and Noise Maps ##

 Below offset and noise maps for the high ($g_0$) gain stage are shown, alongside the distribution of these values. One expects block-like structures mapping to the ASICs of the detector

 %% Cell type:code id: tags:

 ``` python
 import matplotlib.pyplot as plt
 from XFELDetAna.core.util import remove_nans

 %matplotlib inline
 #%matplotlib notebook

 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.histogram import histPlot

 g_name = ['G0', 'G1', 'G2']
 g_range = [(0, 8000), (8000, 16000), (8000, 16000)]
 n_range = [(0., 50.), (0., 50.), (0., 50.)]

 unit = '[ADCu]'
 ```

 %% Cell type:code id: tags:

 ``` python
 for g_idx in gains:
    for cell in range(0, memoryCells):
        f_o0 = heatmapPlot(np.swapaxes(offset_map[..., cell, g_idx], 0, 1),
                           y_label="Row",
                           x_label="Column",
                           lut_label=unit,
                           aspect=1.,
                           vmin=g_range[g_idx][0],
                           vmax=g_range[g_idx][1],
                           title=f'Pedestal {g_name[g_idx]} - Cell {cell:02d}')

        fo0, ax_o0 = plt.subplots()
        res_o0 = histPlot(ax_o0, offset_map[..., cell, g_idx],
                          bins=800,
                          range=g_range[g_idx],
                          facecolor='b',
                          histotype='stepfilled')

        ax_o0.tick_params(axis='both',which='major',labelsize=15)
        ax_o0.set_title(f'Module pedestal distribution - Cell {cell:02d}', fontsize=15)
        ax_o0.set_xlabel(f'Pedestal {g_name[g_idx]} {unit}',fontsize=15)
        ax_o0.set_yscale('log')

        f_n0 = heatmapPlot(np.swapaxes(noise_map[..., cell, g_idx], 0, 1),
                           y_label="Row",
                           x_label="Column",
                           lut_label= unit,
                           aspect=1.,
                           vmin=n_range[g_idx][0],
                           vmax=n_range[g_idx][1],
                           title=f"RMS noise {g_name[g_idx]} - Cell {cell:02d}")

        fn0, ax_n0 = plt.subplots()
        res_n0 = histPlot(ax_n0, noise_map[..., cell, g_idx],
                          bins=100,
                          range=n_range[g_idx],
                          facecolor='b',
                          histotype='stepfilled')

        ax_n0.tick_params(axis='both',which='major',labelsize=15)
        ax_n0.set_title(f'Module noise distribution - Cell {cell:02d}', fontsize=15)
        ax_n0.set_xlabel(f'RMS noise {g_name[g_idx]} ' + unit, fontsize=15)
        #ax_n0.set_yscale('log')

        plt.show()


 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixel Map ###

 The bad pixel map is deduced by comparing offset and noise of each pixel ($v_i$) and each gain ($g$) against the median value for that gain stage:

 $$
 v_i > \mathrm{median}(v_{k,g}) + n \sigma_{v_{k,g}}
 $$
 or
 $$
 v_i < \mathrm{median}(v_{k,g}) - n \sigma_{v_{k,g}}
 $$

 Values are encoded in a 32 bit mask, where for the dark image deduced bad pixels the following non-zero entries are relevant:

 %% Cell type:code id: tags:

 ``` python
 def print_bp_entry(bp):
    print("{:<30s} {:032b}".format(bp.name, bp.value))

 print_bp_entry(BadPixels.OFFSET_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.NOISE_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.OFFSET_NOISE_EVAL_ERROR)
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixels_map = np.zeros(noise_map.shape, np.uint32)
 def eval_bpidx(d):

    mdn = np.nanmedian(d, axis=(0, 1))[None, None, :, :]
    std = np.nanstd(d, axis=(0, 1))[None, None, :, :]
    idx = (d > badpixel_threshold_sigma*std+mdn) | (d < (-badpixel_threshold_sigma)*std+mdn)

    return idx

 offset_abs_threshold = np.array(offset_abs_threshold)

 bad_pixels_map[eval_bpidx(offset_map)] = BadPixels.OFFSET_OUT_OF_THRESHOLD.value
 bad_pixels_map[~np.isfinite(offset_map)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value
 bad_pixels_map[eval_bpidx(noise_map)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
 bad_pixels_map[~np.isfinite(noise_map)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value
 bad_pixels_map[(offset_map < offset_abs_threshold[0][None, None, None, :]) | (offset_map > offset_abs_threshold[1][None, None, None, :])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value

 for g_idx in gains:
    for cell in range(memoryCells):
        bad_pixels = bad_pixels_map[:, :, cell, g_idx]
        fn_0 = heatmapPlot(np.swapaxes(bad_pixels, 0, 1),
                           y_label="Row",
                           x_label="Column",
                           lut_label=f"Badpixels {g_name[g_idx]} [ADCu]",
                           aspect=1.,
                           vmin=0, title=f'G{g_idx} Bad pixel map - Cell {cell:02d}')
 ```

 %% Cell type:code id: tags:

 ``` python
 # TODO: this cell in the notebook is not designed to run for more than one module
 # Constants need to be able to have constant for each module as for big detectors
 constants = {'Offset': np.moveaxis(offset_map, 0, 1),
             'Noise': np.moveaxis(noise_map, 0, 1),
             'BadPixelsDark': np.moveaxis(bad_pixels_map, 0, 1)}
 md = None
 for key, const_data in constants.items():

    const =  getattr(Constants.jungfrau, key)()
    const.data = const_data

    # set the operating condition
    condition = Conditions.Dark.jungfrau(memory_cells=memoryCells, bias_voltage=bias_voltage,
                                         integration_time=integration_time)

    for parm in condition.parameters:
        if parm.name == "Integration Time":
            parm.lower_deviation = time_limits
            parm.upper_deviation = time_limits

    # This should be used in case of running notebook
    # by a different method other than myMDC which already
    # sends CalCat info.
    # TODO: Set db_module to "" by default in the first cell
    if not db_module:
        db_module = get_pdu_from_db(karabo_id, karabo_da, const,
                                    condition, cal_db_interface,
                                    snapshot_at=creation_time)[0]
    if db_output:
        md = send_to_db(db_module, karabo_id, const, condition,
                        file_loc=file_loc, report_path=report,
                        cal_db_interface=cal_db_interface,
                        creation_time=creation_time,
                        timeout=cal_db_timeout)
    if local_output:
        md = save_const_to_h5(db_module, karabo_id, const, condition,
                              const.data, file_loc, report,
                              creation_time, out_folder)
        print(f"Calibration constant {key} is stored locally at {out_folder}.\n")

 print("Constants parameter conditions are:\n")
 print(f"• Bias voltage: {bias_voltage}\n• Memory cells: {memoryCells}\n"
      f"• Integration time: {integration_time}\n"
      f"• Creation time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

--- a/notebooks/LPD/LPDChar_Darks_NBC.ipynb
+++ b/notebooks/LPD/LPDChar_Darks_NBC.ipynb
 %% Cell type:markdown id: tags:

 # LPD Offset, Noise and Dead Pixels Characterization #

 Author: M. Karnevskiy, S. Hauf

 This notebook performs re-characterize of dark images to derive offset, noise and bad-pixel maps. All three types of constants are evaluated per-pixel and per-memory cell.

 The notebook will correctly handle veto settings, but note that if you veto cells you will not be able to use these offsets for runs with different veto settings - vetoed cells will have zero offset.

 The evaluated calibration constants are stored locally and injected in the calibration data base.


 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB" # The ipcluster profile to use
 in_folder = "/gpfs/exfel/exp/FXE/202030/p900121/raw" # path to input data, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/LPD/" # path to output to, required
 sequence = 0 # sequence files to evaluate
 modules = [-1] # list of modules to evaluate, RANGE ALLOWED
 run_high = 120 # run number in which high gain data was recorded, required
 run_med = 121 # run number in which medium gain data was recorded, required
 run_low = 122 # run number in which low gain data was recorded, required

 karabo_id = "FXE_DET_LPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = '/INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
 h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images

 use_dir_creation_date = True # use the creation date of the directory for database time derivation
 cal_db_interface = "tcp://max-exfl016:8015#8025" # the database interface to use
 cal_db_timeout = 300000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database

 capacitor_setting = 5 # capacitor_setting for which data was taken
 mem_cells = 512 # number of memory cells used
 bias_voltage = 250 # detector bias voltage
 thresholds_offset_sigma = 3. # bad pixel relative threshold in terms of n sigma offset
 thresholds_offset_hard = [400, 1500] # bad pixel hard threshold
 thresholds_noise_sigma = 7. # bad pixel relative threshold in terms of n sigma noise
 thresholds_noise_hard = [1, 35] # bad pixel hard threshold
 skip_first_ntrains = 10 # Number of first trains to skip

 instrument = "FXE" # instrument name
 ntrains = 100 # number of trains to use
 high_res_badpix_3d = False # plot bad-pixel summary in high resolution
 test_for_normality = False # permorm normality test
 operation_mode = ''  # Detector operation mode, optional
 ```

 %% Cell type:code id: tags:

 ``` python
 import copy
 import os
 import warnings
 from collections import OrderedDict
 from datetime import datetime
 from functools import partial

 warnings.filterwarnings('ignore')

 import dateutil.parser
 import h5py
 import matplotlib
 from ipyparallel import Client
 from IPython.display import Latex, Markdown, display

 matplotlib.use("agg")
 import matplotlib.patches as patches
 import matplotlib.pyplot as plt

 %matplotlib inline
 import numpy as np
 import tabulate
 import yaml
 from iCalibrationDB import Conditions, Constants, Detectors, Versions
 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.simpleplot import simplePlot

 from cal_tools.enums import BadPixels
 from cal_tools.plotting import (
    create_constant_overview,
    plot_badpix_3d,
    show_overview,
    show_processed_modules,
 )
 from cal_tools.tools import (
    get_dir_creation_date,
    get_from_db,
    get_notebook_name,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    map_gain_stages,
    module_index_to_qm,
    parse_runs,
    run_prop_seq_from_path,
    save_const_to_h5,
    send_to_db,
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 client = Client(profile=cluster_profile)
 view = client[:]
 view.use_dill()
 gains = np.arange(3)
 max_cells = mem_cells
 cells = np.arange(max_cells)
 gain_names = ['High', 'Medium', 'Low']

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(16))
    karabo_da = ['LPD{:02d}'.format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 gain_runs = OrderedDict()
 if capacitor_setting == 5:
    gain_runs["high_5pf"] = run_high
    gain_runs["med_5pf"] =  run_med
    gain_runs["low_5pf"] =  run_low
 elif capacitor_setting == 50:
    gain_runs["high_50pf"] = run_high
    gain_runs["med_50pf"] =  run_med
    gain_runs["low_50pf"] =  run_low

 capacitor_settings = [capacitor_setting]
 capacitor_settings = ['{}pf'.format(c) for c in capacitor_settings]

 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)
    print("Using {} as creation time".format(creation_time))

 run, prop, seq = run_prop_seq_from_path(in_folder)

 cal_db_interface = get_random_db_interface(cal_db_interface)

 display(Markdown('## Evaluated parameters'))
 print('CalDB Interface {}'.format(cal_db_interface))
 print("Proposal: {}".format(prop))
 print("Memory cells: {}/{}".format(mem_cells, max_cells))
 print("Runs: {}, {}, {}".format(run_high, run_med, run_low))
 print("Sequence: {}".format(sequence))
 print("Using DB: {}".format(db_output))
 print("Input: {}".format(in_folder))
 print("Output: {}".format(out_folder))
 print("Bias voltage: {}V".format(bias_voltage))
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 gmf = map_gain_stages(in_folder, gain_runs, path_template, karabo_da, [sequence])
 gain_mapped_files, total_sequences, total_file_size = gmf
 print(f"Will process a total of {total_sequences} files.")
 ```

 %% Cell type:markdown id: tags:

 ## Data processing

 %% Cell type:code id: tags:

 ``` python
 # the actual characterization
 def characterize_module(cells, bp_thresh, skip_first_ntrains, ntrains, test_for_normality,
                        h5path, h5path_idx, inp):
    import copy

    import h5py
    import numpy as np
    import scipy.stats
    from cal_tools.enums import BadPixels

    def splitOffGainLPD(d):
        msk = np.zeros(d.shape, np.uint16)
        msk[...] = 0b0000111111111111
        data = np.bitwise_and(d, msk)
        msk[...] = 0b0011000000000000
        gain = np.bitwise_and(d, msk)//4096
        gain[gain > 2] = 2
        return data, gain

    filename, channel, gg, cap = inp
    thresholds_offset_hard, thresholds_offset_sigma, thresholds_noise_hard, thresholds_noise_sigma = bp_thresh

-    infile = h5py.File(filename, "r", driver="core")
+    infile = h5py.File(filename, "r")

    h5path = h5path.format(channel)
    h5path_idx = h5path_idx.format(channel)
    count = infile[f"{h5path_idx}/count"][()]
    first = infile[f"{h5path_idx}/first"][()]
    valid = count != 0
    count, first = count[valid], first[valid]
    first_image = int(first[skip_first_ntrains] if first.shape[0] > skip_first_ntrains else 0)
    last_image = int(first_image + np.sum(count[skip_first_ntrains:skip_first_ntrains+ntrains]))

    im = np.array(infile["{}/data".format(h5path, channel)][first_image:last_image, ...])
    cellid = np.squeeze(np.array(infile["{}/cellId".format(h5path, channel)][first_image:last_image, ...]))
    infile.close()

    im, g = splitOffGainLPD(im[:, 0, ...])
    im = im.astype(np.float32)

    im = np.rollaxis(im, 2)
    im = np.rollaxis(im, 2, 1)

    offset = np.zeros((im.shape[0], im.shape[1], cells))
    noise = np.zeros((im.shape[0], im.shape[1], cells))
    normal_test = np.zeros((im.shape[0], im.shape[1], cells))
    for cc in range(cells):
        idx = cellid == cc
        if np.any(idx):

            offset[..., cc] = np.median(im[:, :, idx], axis=2)
            noise[..., cc] = np.std(im[:, :, idx], axis=2)
            if test_for_normality:
                _, normal_test[..., cc] = scipy.stats.normaltest(
                    im[:, :, idx], axis=2)

    # bad pixels
    bp = np.zeros(offset.shape, np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0, 1))
    offset_std = np.nanstd(offset, axis=(0, 1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[(offset < thresholds_offset_hard[0]) | (
        offset > thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0, 1))
    noise_std = np.nanstd(noise, axis=(0, 1))

    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[(noise < thresholds_noise_hard[0]) | (
        noise > thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    idx = cellid == 12
    return offset, noise, channel, gg, cap, bp, im[12, 12, idx], normal_test


 offset_g = OrderedDict()
 noise_g = OrderedDict()
 badpix_g = OrderedDict()
 data_g = OrderedDict()
 ntest_g = OrderedDict()

 gg = 0
 old_cap = None
 start = datetime.now()
 inp = []

 for gain, mapped_files in gain_mapped_files.items():
    cap = gain.split("_")[1]
    if cap != old_cap:
        gg = 0
        old_cap = cap
        offset_g[cap] = OrderedDict()
        noise_g[cap] = OrderedDict()
        badpix_g[cap] = OrderedDict()
        data_g[cap] = OrderedDict()
        ntest_g[cap] = OrderedDict()

    for i in modules:
        qm = module_index_to_qm(i)
        if qm in mapped_files and not mapped_files[qm].empty():
            fname_in = mapped_files[qm].get()
            print("Process file: ", fname_in)
            inp.append((fname_in, i, gg, cap))

    gg+=1

 p = partial(characterize_module, max_cells,
                (thresholds_offset_hard, thresholds_offset_sigma,
                 thresholds_noise_hard, thresholds_noise_sigma),
                skip_first_ntrains, ntrains, test_for_normality,
            h5path, h5path_idx)

 # Don't remove. Used for Debugging.
 #results = list(map(p, inp))
 results = view.map_sync(p, inp)

 for ir, r in enumerate(results):
    offset, noise, i, gg, cap, bp, data, normal = r
    qm = module_index_to_qm(i)
    if qm not in offset_g[cap]:
        offset_g[cap][qm] = np.zeros(
            (offset.shape[0], offset.shape[1], offset.shape[2], 3))
        noise_g[cap][qm] = np.zeros_like(offset_g[cap][qm])
        badpix_g[cap][qm] = np.zeros_like(offset_g[cap][qm])
        data_g[cap][qm] = np.full((ntrains, 3), np.nan)
        ntest_g[cap][qm] = np.zeros_like(offset_g[cap][qm])

    offset_g[cap][qm][..., gg] = offset
    noise_g[cap][qm][..., gg] = noise
    badpix_g[cap][qm][..., gg] = bp
    data_g[cap][qm][:data.shape[0], gg] = data
    ntest_g[cap][qm][..., gg] = normal

    hn, cn = np.histogram(data, bins=20)
    print(f"{gain_names[gg]} gain, Capacitor {cap}, Module: {qm}. "
          f"Number of processed trains per cell: {data.shape[0]}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read report path and create file location tuple to add with the injection
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_low, run_med, run_high)

 report = get_report(out_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 # TODO: add db_module when received from myMDC
 # Create the modules dict of karabo_das and PDUs
 qm_dict = OrderedDict()
 for i, k_da in zip(modules, karabo_da):
    qm = module_index_to_qm(i)
    qm_dict[qm] = {"karabo_da": k_da,
                   "db_module": ""}
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve existing constants for comparison
 clist = ["Offset", "Noise", "BadPixelsDark"]
 old_const = {}
 old_mdata = {}

 dinstance = "LPD1M1"
 detinst = getattr(Detectors, dinstance)
 print('Retrieve pre-existing constants for comparison.')
 for cap in capacitor_settings:
    old_const[cap] = {}
    old_mdata[cap] = {}
    for qm in offset_g[cap].keys():
        old_const[cap][qm] = {}
        old_mdata[cap][qm] = {}
        qm_db = qm_dict[qm]
        karabo_da = qm_db["karabo_da"]

        condition = Conditions.Dark.LPD(memory_cells=max_cells,
                                        bias_voltage=bias_voltage,
                                        capacitor=cap)
        for const in clist:
            constant = getattr(Constants.LPD, const)()
            if not qm_db["db_module"]:
                # This should be used in case of running notebook
                # by a different method other than myMDC which already
                # sends CalCat info.
                qm_db["db_module"] = get_pdu_from_db(karabo_id, [karabo_da], constant,
                                                     condition, cal_db_interface,
                                                     snapshot_at=creation_time)[0]

            data, mdata = get_from_db(karabo_id, karabo_da,
                                      constant,
                                      condition, None,
                                      cal_db_interface,
                                      creation_time=creation_time,
                                      verbosity=2, timeout=cal_db_timeout)

            old_const[cap][qm][const] = data

            if mdata is None or data is None:
                old_mdata[cap][qm][const] = {
                    "timestamp": "Not found",
                    "filepath": None,
                    "h5path": None
                }
            else:
                timestamp = mdata.calibration_constant_version.begin_at.isoformat()
                filepath = os.path.join(
                    mdata.calibration_constant_version.hdf5path,
                    mdata.calibration_constant_version.filename
                )
                h5path = mdata.calibration_constant_version.h5path
                old_mdata[cap][qm][const] = {
                    "timestamp": timestamp,
                    "filepath": filepath,
                    "h5path": h5path
                }

        with open(f"{out_folder}/module_metadata_{qm}.yml","w") as fd:
            yaml.safe_dump(
                {
                    "module": qm,
                    "pdu": qm_db["db_module"],
                    "old-constants": old_mdata[cap][qm]
                }, fd)
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()
 for cap in capacitor_settings:
    res[cap] = OrderedDict()
    for i in modules:
        qm = module_index_to_qm(i)

        res[cap][qm] = {'Offset': offset_g[cap][qm],
                        'Noise': noise_g[cap][qm],
                        'BadPixelsDark': badpix_g[cap][qm]
                        }
 ```

 %% Cell type:code id: tags:

 ``` python
 # Save constants in the calibration DB
 md = None
 for cap in capacitor_settings:
    for qm in res[cap]:

        karabo_da = qm_dict[qm]["karabo_da"]
        db_module = qm_dict[qm]["db_module"]

        # Do not store empty constants
        # In case of 0 trains data_g is initiated with nans and never refilled.
        if np.count_nonzero(~np.isnan(data_g[cap][qm]))==0:
            continue
        for const in res[cap][qm]:

            dconst = getattr(Constants.LPD, const)()
            dconst.data = res[cap][qm][const]

            # set the operating condition
            condition = Conditions.Dark.LPD(memory_cells=max_cells,
                                            bias_voltage=bias_voltage,
                                            capacitor=cap)

            if db_output:
                md = send_to_db(db_module, karabo_id, dconst, condition,
                                file_loc, report_path=report,
                                cal_db_interface=cal_db_interface,
                                creation_time=creation_time,
                                timeout=cal_db_timeout)

            if local_output:
                md = save_const_to_h5(db_module, karabo_id, dconst, condition,
                                      dconst.data, file_loc, report, creation_time, out_folder)
                print(f"Calibration constant {const} is stored locally.\n")

        print("Constants parameter conditions are:\n")
        print(f"• memory_cells: {max_cells}\n• bias_voltage: {bias_voltage}\n"
              f"• capacitor: {cap}\n"
              f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:code id: tags:

 ``` python
 show_processed_modules(
    dinstance=dinstance,
    constants=None,
    mnames=[module_index_to_qm(i) for i in modules],
    mode="position"
 )
 ```

 %% Cell type:markdown id: tags:

 ## Raw pedestal distribution ##

 Distribution of a pedestal (ADUs) over trains for the pixel (12,12), memory cell 12. A median of the distribution is shown in yellow. A standard deviation is shown in red. The green line shows average over all pixels for a given memory cell and gain stage.

 %% Cell type:code id: tags:

 ``` python
 fig, grid = plt.subplots(3, 1, sharex="col", sharey="row", figsize=(10, 7))
 fig.subplots_adjust(wspace=0, hspace=0)

 for cap in capacitor_settings:
    for i in modules:
        qm = module_index_to_qm(i)
        if np.count_nonzero(~np.isnan(data_g[cap][qm])) == 0:
            break
        for gain in range(3):
            data = data_g[cap][qm][:, gain]
            offset = np.nanmedian(data)
            noise = np.nanstd(data)
            xrange = [np.nanmin(data_g[cap][qm]), np.nanmax(data_g[cap][qm])]
            nbins = int(xrange[1] - xrange[0])

            hn, cn = np.histogram(data, bins=nbins, range=xrange)

            grid[gain].hist(data, range=xrange, bins=nbins)
            grid[gain].plot([offset-noise, offset-noise], [0, np.nanmax(hn)],
                            linewidth=1.5, color='red',
                            label='1 $\sigma$ deviation')
            grid[gain].plot([offset+noise, offset+noise],
                            [0, np.nanmax(hn)], linewidth=1.5, color='red')
            grid[gain].plot([offset, offset], [0, 0],
                            linewidth=1.5, color='y', label='median')

            grid[gain].plot([np.nanmedian(offset_g[cap][qm][:, :, 12, gain]),
                             np.nanmedian(offset_g[cap][qm][:, :, 12, gain])],
                            [0, np.nanmax(hn)], linewidth=1.5, color='green',
                            label='average over pixels')

            grid[gain].set_xlim(xrange)
            grid[gain].set_ylim(0, np.nanmax(hn)*1.1)
            grid[gain].set_xlabel("Offset value [ADU]")
            grid[gain].set_ylabel("# of occurance")

            if gain == 0:
                leg = grid[gain].legend(
                    loc='upper center', ncol=3,
                    bbox_to_anchor=(0.1, 0.25, 0.7, 1.0))

            grid[gain].text(820, np.nanmax(hn)*0.4,
                            "{} gain".format(gain_names[gain]), fontsize=20)

            a = plt.axes([.125, .1, 0.775, .8], frame_on=False)
            a.patch.set_alpha(0.05)
            a.set_xlim(xrange)
            plt.plot([offset, offset], [0, 1], linewidth=1.5, color='y')
            plt.xticks([])
            plt.yticks([])

        ypos = 0.9
        x1pos = (np.nanmedian(data_g[cap][qm][:, 0]) +
                 np.nanmedian(data_g[cap][qm][:, 2]))/2.
        x2pos = (np.nanmedian(data_g[cap][qm][:, 2]) +
                 np.nanmedian(data_g[cap][qm][:, 1]))/2.-10

        plt.annotate("", xy=(np.nanmedian(data_g[cap][qm][:, 0]), ypos), xycoords='data',
                     xytext=(np.nanmedian(data_g[cap][qm][:, 2]), ypos), textcoords='data',
                     arrowprops=dict(arrowstyle="<->", connectionstyle="arc3"))

        plt.annotate('{}'.format(np.nanmedian(data_g[cap][qm][:, 0])-np.nanmedian(data_g[cap][qm][:, 2])),
                     xy=(x1pos, ypos), xycoords='data', xytext=(5, 5), textcoords='offset points')

        plt.annotate("", xy=(np.nanmedian(data_g[cap][qm][:, 2]), ypos), xycoords='data',
                     xytext=(np.nanmedian(data_g[cap][qm][:, 1]), ypos), textcoords='data',
                     arrowprops=dict(arrowstyle="<->", connectionstyle="arc3"))

        plt.annotate('{}'.format(np.nanmedian(data_g[cap][qm][:, 2])-np.nanmedian(data_g[cap][qm][:, 1])),
                     xy=(x2pos, ypos), xycoords='data', xytext=(5, 5), textcoords='offset points')

 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Normality test ##

 Distributions of raw pedestal values have been tested if they are normally distributed. A normality test have been performed for each pixel and each memory cell. Plots below show histogram of p-Values and a 2D distribution for the  memory cell 12.

 %% Cell type:code id: tags:

 ``` python
 # Loop over capacitor settings, modules, constants
 for cap in capacitor_settings:
    if not test_for_normality:
        print('Normality test was not requested. Flag `test_for_normality` False')
        break
    for i in modules:
        qm = module_index_to_qm(i)

        data = np.copy(ntest_g[cap][qm][:,:,:,:])
        data[badpix_g[cap][qm][:,:,:,:]>0] = 1.01

        hn,cn = np.histogram(data[:,:,:,0], bins=100)

        d = [{'x': np.arange(100)*0.01+0.01,
              'y': np.histogram(data[:,:,:,0], bins=100)[0],
              'drawstyle': 'steps-pre',
              'label' : 'High gain',
              },
             {'x': np.arange(100)*0.01+0.01,
              'y': np.histogram(data[:,:,:,1], bins=100)[0],
              'drawstyle': 'steps-pre',
              'label' : 'Medium gain',
              },
             {'x': np.arange(100)*0.01+0.01,
              'y': np.histogram(data[:,:,:,2], bins=100)[0],
              'drawstyle': 'steps-pre',
              'label' : 'Low gain',
              },
            ]


        fig = plt.figure(figsize=(15,15), tight_layout={'pad': 0.5, 'w_pad': 0.3})

        for gain in range(3):
            ax = fig.add_subplot(221+gain)
            heatmapPlot(data[:,:,12,gain], add_panels=False, cmap='viridis', figsize=(10,10),
                y_label='Rows', x_label='Columns',
                lut_label='p-Value',
                use_axis=ax,
                title='p-Value for cell 12, {} gain'.format(gain_names[gain]) )

        ax = fig.add_subplot(224)
        _ = simplePlot(d, #aspect=1.6,
                              x_label = "p-Value".format(gain),
                              y_label="# of occurance",
                              use_axis=ax,
                               y_log=False, legend='outside-top-ncol3-frame', legend_pad=0.05, legend_size='5%')
        ax.ticklabel_format(style='sci', axis='y', scilimits=(4,6))

 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on a sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:code id: tags:

 ``` python
 cell = 12
 for cap in capacitor_settings:
    for gain in range(3):
        display(
            Markdown('### Cell-12 overview - {} gain'.format(gain_names[gain])))

        fig = plt.figure(figsize=(18, 22) , tight_layout={'pad': 0.1, 'w_pad': 0.1})
        for qm in res[cap]:
            for iconst, const in enumerate(['Offset', 'Noise', 'BadPixelsDark']):

                ax = fig.add_subplot(321+iconst)

                data = res[cap][qm][const][:, :, 12, gain]
                vmax = 1.5 * np.nanmedian(res[cap][qm][const][:, :, 12, gain])
                title = const
                label = '{} value [ADU]'.format(const)
                title = '{} value'.format(const)
                if const == 'BadPixelsDark':
                    vmax = 4
                    data[data == 0] = np.nan
                    title = 'Bad pixel code'
                    label = title

                    cb_labels = ['1 {}'.format(BadPixels.NOISE_OUT_OF_THRESHOLD.name),
                                 '2 {}'.format(BadPixels.OFFSET_NOISE_EVAL_ERROR.name),
                                 '3 {}'.format(BadPixels.OFFSET_OUT_OF_THRESHOLD.name),
                                 '4 {}'.format('MIXED')]

                    heatmapPlot(data, add_panels=False, cmap='viridis',
                                y_label='Rows', x_label='Columns',
                                lut_label='', vmax=vmax,
                                use_axis=ax, cb_ticklabels=cb_labels, cb_ticks = np.arange(4)+1,
                                title='{}'.format(title))

                else:

                    heatmapPlot(data, add_panels=False, cmap='viridis',
                                y_label='Rows', x_label='Columns',
                                lut_label=label, vmax=vmax,
                                use_axis=ax,
                                title='{}'.format(title))

        for qm in res[cap]:
            for iconst, const in enumerate(['Offset', 'Noise']):
                data = res[cap][qm][const]
                dataBP = np.copy(data)
                dataBP[res[cap][qm]['BadPixelsDark'] > 0] = -1

                x_ranges = [[0, 1500], [0, 40]]
                hn, cn = np.histogram(
                    data[:, :, :, gain], bins=100, range=x_ranges[iconst])
                hnBP, cnBP = np.histogram(dataBP[:, :, :, gain], bins=cn)

                d = [{'x': cn[:-1],
                      'y': hn,
                      'drawstyle': 'steps-pre',
                      'label': 'All data',
                      },
                     {'x': cnBP[:-1],
                      'y': hnBP,
                      'drawstyle': 'steps-pre',
                      'label': 'Bad pixels masked',
                      },
                     ]

                ax = fig.add_subplot(325+iconst)
                _ = simplePlot(d, figsize=(5, 7), aspect=1,
                                    x_label="{} value [ADU]".format(const),
                                    y_label="# of occurance",
                                    title='', legend_pad=0.1, legend_size='10%',
                                    use_axis=ax,
                                    y_log=True, legend='outside-top-2col-frame')

        plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:


 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.NOISE_OUT_OF_THRESHOLD.value: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR.value: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value | BadPixels.NOISE_OUT_OF_THRESHOLD.value: ('MIXED', '#DD00DD80')}

 if high_res_badpix_3d:
    display(Markdown("""

    ## Global Bad Pixel Behaviour ##

    The following plots shows the results of a bad pixel evaluation for all evaluated memory cells.
    Cells are stacked in the Z-dimension, while pixels values in x/y are re-binned with a factor of 2.
    This excludes single bad pixels present only in disconnected pixels.
    Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
    Colors encode the bad pixel type, or mixed type.

        """))
    # Switch rebin to 1 for full resolution and
    # no interpolation for badpixel values.
    rebin = 2
    for gain in range(3):
        display(Markdown('### Bad pixel behaviour - {} gain ###'.format(gain_names[gain])))
        for cap in capacitor_settings:
            for mod, data in badpix_g[cap].items():
                plot_badpix_3d(data[...,gain], cols, title='', rebin_fac=rebin)
                ax = plt.gca()
                leg = ax.get_legend()
                leg.set(alpha=0.5)
        plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Summary across tiles ##

 Plots give an overview of calibration constants averaged across tiles. A bad pixel mask is applied. Constants are compared with pre-existing constants retrieved from the calibration database. Differences $\Delta$ between the old and new constants is shown.

 %% Cell type:code id: tags:

 ``` python
 time_summary = []
 for cap, cap_data in old_mdata.items():
    time_summary.append(f"The following pre-existing constants are used for comparison for capacitor setting **{cap}**:")
    for qm, qm_data in cap_data.items():
        time_summary.append(f"- Module {qm}")
        for const, const_data in qm_data.items():
            time_summary.append(f"    - {const} created at {const_data['timestamp']}")
 display(Markdown("\n".join(time_summary)))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Loop over capacitor settings, modules, constants
 for cap in res:
    for qm in res[cap]:
        for gain in range(3):
            display(Markdown('### Summary across tiles - {} gain'.format(gain_names[gain])))

            for const in res[cap][qm]:
                data = np.copy(res[cap][qm][const][:, :, :, gain])

                label = 'Fraction of bad pixels'

                if const != 'BadPixelsDark':
                    data[badpix_g[cap][qm][:, :, :, gain] > 0] = np.nan
                    label = '{} value [ADU]'.format(const)
                else:
                    data[data>0] = 1.0

                data = data.reshape(
                    int(data.shape[0] / 32),
                    32,
                    int(data.shape[1] / 128),
                    128,
                    data.shape[2])
                data = np.nanmean(data, axis=(1, 3)).swapaxes(
                    0, 2).reshape(512, 16)

                fig = plt.figure(figsize=(15, 6))
                ax = fig.add_subplot(121)

                _ = heatmapPlot(data[:510, :], add_panels=True,
                                y_label='Momery Cell ID', x_label='Tile ID',
                                lut_label=label, use_axis=ax,
                                panel_y_label=label, panel_x_label=label,
                                cmap='viridis',  # cb_loc='right',cb_aspect=15,
                                x_ticklabels=np.arange(16)+1,
                                x_ticks=np.arange(16)+0.5)

                if old_const[cap][qm][const] is not None:
                    ax = fig.add_subplot(122)

                    dataold = np.copy(old_const[cap][qm][const][:, :, :, gain])

                    label = '$\Delta$ {}'.format(label)

                    if const != 'BadPixelsDark':
                        if old_const[cap][qm]['BadPixelsDark'] is not None:
                            dataold[old_const[cap][qm]['BadPixelsDark'][:, :, :, gain] > 0] = np.nan
                        else:
                            dataold[:] = np.nan
                    else:
                        dataold[dataold>0]=1.0

                    dataold = dataold.reshape(
                        int(dataold.shape[0] / 32),
                        32,
                        int(dataold.shape[1] / 128),
                        128,
                        dataold.shape[2])
                    dataold = np.nanmean(dataold, axis=(
                        1, 3)).swapaxes(0, 2).reshape(512, 16)
                    dataold = dataold - data

                    _ = heatmapPlot(dataold[:510, :], add_panels=True,
                                    y_label='Momery Cell ID', x_label='Tile ID',
                                    lut_label=label, use_axis=ax,
                                    panel_y_label=label, panel_x_label=label,
                                    cmap='viridis',  # cb_loc='right',cb_aspect=15,
                                    x_ticklabels=np.arange(16)+1,
                                    x_ticks=np.arange(16)+0.5)
            plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Variation of offset and noise across Tiles and ASICs ##

 The following plots show a standard deviation $\sigma$ of the calibration constant. The plot of standard deviations across tiles show pixels of one tile ($128 \times 32$). Value for each pixel shows a standard deviation across 16 tiles. The standard deviation across ASICs are shown overall tiles. The plot shows pixels of one ASIC ($16 \times 32$), where the value shows a standard deviation across all ACIS of the module.

 %% Cell type:code id: tags:

 ``` python
 # Loop over capacitor settings, modules, constants
 for cap in res:
    for qm in res[cap]:
        for gain in range(3):
            display(Markdown('### Variation of offset and noise across ASICs - {} gain'.format(gain_names[gain])))

            fig = plt.figure(figsize=(15, 6))
            for iconst, const in enumerate(['Offset', 'Noise']):
                data = np.copy(res[cap][qm][const][:, :, :, gain])
                data[badpix_g[cap][qm][:, :, :, gain] > 0] = np.nan
                label = '$\sigma$ {} [ADU]'.format(const)

                dataA = np.nanmean(data, axis=2)  # average over cells
                dataA = dataA.reshape(8, 32, 16, 16)
                dataA = np.nanstd(dataA, axis=(0, 2))  # average across ASICs

                ax = fig.add_subplot(121+iconst)
                _ = heatmapPlot(dataA, add_panels=True,
                                y_label='rows', x_label='columns',
                                lut_label=label, use_axis=ax,
                                panel_y_label=label, panel_x_label=label,
                                cmap='viridis'
                                )

            plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Loop over capacitor settings, modules, constants
 for cap in res:
    for qm in res[cap]:
        for gain in range(3):
            display(Markdown('### Variation of offset and noise across tiles - {} gain'.format(gain_names[gain])))

            fig = plt.figure(figsize=(15, 6))
            for iconst, const in enumerate(['Offset', 'Noise']):
                data = np.copy(res[cap][qm][const][:, :, :, gain])
                data[badpix_g[cap][qm][:, :, :, gain] > 0] = np.nan
                label = '$\sigma$ {} [ADU]'.format(const)

                dataT = data.reshape(
                    int(data.shape[0] / 32),
                    32,
                    int(data.shape[1] / 128),
                    128,
                    data.shape[2])
                dataT = np.nanstd(dataT, axis=(0, 2))
                dataT = np.nanmean(dataT, axis=2)

                ax = fig.add_subplot(121+iconst)
                _ = heatmapPlot(dataT, add_panels=True,
                                y_label='rows', x_label='columns',
                                lut_label=label, use_axis=ax,
                                panel_y_label=label, panel_x_label=label,
                                cmap='viridis')
            plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Aggregate values and per cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per-cell behavior, averaged across pixels.

 %% Cell type:code id: tags:

 ``` python
 # Loop over capacitor settings, modules, constants
 for cap in res:
    for qm in res[cap]:
        for gain in range(3):
            display(Markdown('### Mean over pixels - {} gain'.format(gain_names[gain])))

            fig = plt.figure(figsize=(9,11))

            for iconst, const in enumerate(res[cap][qm]):

                ax = fig.add_subplot(311+iconst)

                data = res[cap][qm][const][:,:,:510,gain]
                if const == 'BadPixelsDark':
                    data[data>0] = 1.0

                dataBP = np.copy(data)
                dataBP[badpix_g[cap][qm][:,:,:510,gain]>0] = -10

                data = np.nanmean(data, axis=(0,1))
                dataBP = np.nanmean(dataBP, axis=(0,1))

                d = [{'y': data,
                      'x': np.arange(data.shape[0]),
                      'drawstyle': 'steps-mid',
                      'label' : 'All data'
                     }
                    ]

                if const != 'BadPixelsDark':
                    d.append({'y': dataBP,
                      'x': np.arange(data.shape[0]),
                      'drawstyle': 'steps-mid',
                      'label' : 'good pixels only'
                     })
                    y_title = "{} value [ADU]".format(const)
                    title = "{} value, {} gain".format(const, gain_names[gain])
                else:
                    y_title = "Fraction of Bad Pixels"
                    title = "Fraction of Bad Pixels, {} gain".format(gain_names[gain])

                data_min = np.min([data, dataBP])if const != 'BadPixelsDark' else np.min([data])
                data_max = np.max([data[20:], dataBP[20:]])
                data_dif = data_max - data_min

                local_max = np.max([data[200:300], dataBP[200:300]])
                frac = 0.35
                new_max = (local_max - data_min*(1-frac))/frac
                new_max = np.max([data_max, new_max])

                _ = simplePlot(d, figsize=(10,10), aspect=2, xrange=(-12, 510),
                                  x_label = 'Memory Cell ID',
                                  y_label=y_title, use_axis=ax,
                                  title=title,
                                  title_position=[0.5, 1.15],
                                  inset='xy-coord-right', inset_x_range=(0,20), inset_indicated=True,
                                  inset_labeled=True, inset_coord=[0.2,0.5,0.6,0.95],
                                    inset_lw = 1.0, y_range = [data_min-data_dif*0.05, new_max+data_dif*0.05],
                                  y_log=False, legend='outside-top-ncol2-frame', legend_size='18%',
                                     legend_pad=0.00)

                plt.tight_layout(pad=1.08, h_pad=0.35)

            plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 table = []
 bits = [BadPixels.NOISE_OUT_OF_THRESHOLD, BadPixels.OFFSET_OUT_OF_THRESHOLD, BadPixels.OFFSET_NOISE_EVAL_ERROR]
 for cap in res:
    for qm in res[cap]:
        for gain in range(3):

            l_data = []
            l_data_old = []

            data = np.copy(res[cap][qm]['BadPixelsDark'][:,:,:,gain])
            datau32 = data.astype(np.uint32)
            l_data.append(len(datau32[datau32>0].flatten()))
            for bit in bits:
                l_data.append(np.count_nonzero(badpix_g[cap][qm][:,:,:,gain].astype(np.uint32) & bit.value))

            if old_const[cap][qm]['BadPixelsDark'] is not None:
                dataold = np.copy(old_const[cap][qm]['BadPixelsDark'][:, :, :, gain])
                datau32old = dataold.astype(np.uint32)
                l_data_old.append(len(datau32old[datau32old>0].flatten()))
                for bit in bits:
                    l_data_old.append(np.count_nonzero(old_const[cap][qm]['BadPixelsDark'][:, :, :, gain].astype(np.uint32) & bit.value))

            l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',
                           'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR']

            l_threshold = ['', f'{thresholds_noise_sigma}', f'{thresholds_offset_sigma}',
                           f'{thresholds_offset_hard}/{thresholds_noise_hard}']

            for i in range(len(l_data)):
                line = [f'{l_data_name[i]}, gain {gain_names[gain]}', l_threshold[i], l_data[i]]

                if old_const[cap][qm]['BadPixelsDark'] is not None:
                    line += [l_data_old[i]]
                else:
                    line += ['-']

                table.append(line)
            table.append(['', '', '', ''])

 display(Markdown('''

 ### Number of bad pixels ###

 One pixel can be bad for different reasons, therefore, the sum of all types of bad pixels can be more than the number of all bad pixels.

 '''))
 if len(table)>0:
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Pixel type", "Threshold",
                                              "New constant", "Old constant"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 for const in ['Offset', 'Noise']:
    table = [['','High gain', 'High gain', 'Medium gain', 'Medium gain', 'Low gain', 'Low gain']]
    for cap in res:
        for qm in res[cap]:

            data = np.copy(res[cap][qm][const])
            data[res[cap][qm]['BadPixelsDark']>0] = np.nan

            if old_const[cap][qm][const] is not None and old_const[cap][qm]['BadPixelsDark'] is not None :
                dataold = np.copy(old_const[cap][qm][const])
                dataold[old_const[cap][qm]['BadPixelsDark']>0] = np.nan

            f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
            n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

            for i, f in enumerate(f_list):
                line = [n_list[i]]
                for gain in range(3):
                    line.append('{:6.1f}'.format(f(data[...,gain])))
                    if old_const[cap][qm][const] is not None and old_const[cap][qm]['BadPixelsDark'] is not None:
                        line.append('{:6.1f}'.format(f(dataold[...,gain])))
                    else:
                        line.append('-')

                table.append(line)

    display(Markdown('### {} [ADU], good pixels only ###'.format(const)))
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
 ```

 %% Cell type:markdown id: tags:

 # LPD Offset, Noise and Dead Pixels Characterization #

 Author: M. Karnevskiy, S. Hauf

 This notebook performs re-characterize of dark images to derive offset, noise and bad-pixel maps. All three types of constants are evaluated per-pixel and per-memory cell.

 The notebook will correctly handle veto settings, but note that if you veto cells you will not be able to use these offsets for runs with different veto settings - vetoed cells will have zero offset.

 The evaluated calibration constants are stored locally and injected in the calibration data base.


 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB" # The ipcluster profile to use
 in_folder = "/gpfs/exfel/exp/FXE/202030/p900121/raw" # path to input data, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/LPD/" # path to output to, required
 sequence = 0 # sequence files to evaluate
 modules = [-1] # list of modules to evaluate, RANGE ALLOWED
 run_high = 120 # run number in which high gain data was recorded, required
 run_med = 121 # run number in which medium gain data was recorded, required
 run_low = 122 # run number in which low gain data was recorded, required

 karabo_id = "FXE_DET_LPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = '/INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
 h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images

 use_dir_creation_date = True # use the creation date of the directory for database time derivation
 cal_db_interface = "tcp://max-exfl016:8015#8025" # the database interface to use
 cal_db_timeout = 300000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database

 capacitor_setting = 5 # capacitor_setting for which data was taken
 mem_cells = 512 # number of memory cells used
 bias_voltage = 250 # detector bias voltage
 thresholds_offset_sigma = 3. # bad pixel relative threshold in terms of n sigma offset
 thresholds_offset_hard = [400, 1500] # bad pixel hard threshold
 thresholds_noise_sigma = 7. # bad pixel relative threshold in terms of n sigma noise
 thresholds_noise_hard = [1, 35] # bad pixel hard threshold
 skip_first_ntrains = 10 # Number of first trains to skip

 instrument = "FXE" # instrument name
 ntrains = 100 # number of trains to use
 high_res_badpix_3d = False # plot bad-pixel summary in high resolution
 test_for_normality = False # permorm normality test
 operation_mode = ''  # Detector operation mode, optional
 ```

 %% Cell type:code id: tags:

 ``` python
 import copy
 import os
 import warnings
 from collections import OrderedDict
 from datetime import datetime
 from functools import partial

 warnings.filterwarnings('ignore')

 import dateutil.parser
 import h5py
 import matplotlib
 from ipyparallel import Client
 from IPython.display import Latex, Markdown, display

 matplotlib.use("agg")
 import matplotlib.patches as patches
 import matplotlib.pyplot as plt

 %matplotlib inline
 import numpy as np
 import tabulate
 import yaml
 from iCalibrationDB import Conditions, Constants, Detectors, Versions
 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.simpleplot import simplePlot

 from cal_tools.enums import BadPixels
 from cal_tools.plotting import (
    create_constant_overview,
    plot_badpix_3d,
    show_overview,
    show_processed_modules,
 )
 from cal_tools.tools import (
    get_dir_creation_date,
    get_from_db,
    get_notebook_name,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    map_gain_stages,
    module_index_to_qm,
    parse_runs,
    run_prop_seq_from_path,
    save_const_to_h5,
    send_to_db,
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 client = Client(profile=cluster_profile)
 view = client[:]
 view.use_dill()
 gains = np.arange(3)
 max_cells = mem_cells
 cells = np.arange(max_cells)
 gain_names = ['High', 'Medium', 'Low']

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(16))
    karabo_da = ['LPD{:02d}'.format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 gain_runs = OrderedDict()
 if capacitor_setting == 5:
    gain_runs["high_5pf"] = run_high
    gain_runs["med_5pf"] =  run_med
    gain_runs["low_5pf"] =  run_low
 elif capacitor_setting == 50:
    gain_runs["high_50pf"] = run_high
    gain_runs["med_50pf"] =  run_med
    gain_runs["low_50pf"] =  run_low

 capacitor_settings = [capacitor_setting]
 capacitor_settings = ['{}pf'.format(c) for c in capacitor_settings]

 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)
    print("Using {} as creation time".format(creation_time))

 run, prop, seq = run_prop_seq_from_path(in_folder)

 cal_db_interface = get_random_db_interface(cal_db_interface)

 display(Markdown('## Evaluated parameters'))
 print('CalDB Interface {}'.format(cal_db_interface))
 print("Proposal: {}".format(prop))
 print("Memory cells: {}/{}".format(mem_cells, max_cells))
 print("Runs: {}, {}, {}".format(run_high, run_med, run_low))
 print("Sequence: {}".format(sequence))
 print("Using DB: {}".format(db_output))
 print("Input: {}".format(in_folder))
 print("Output: {}".format(out_folder))
 print("Bias voltage: {}V".format(bias_voltage))
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 gmf = map_gain_stages(in_folder, gain_runs, path_template, karabo_da, [sequence])
 gain_mapped_files, total_sequences, total_file_size = gmf
 print(f"Will process a total of {total_sequences} files.")
 ```

 %% Cell type:markdown id: tags:

 ## Data processing

 %% Cell type:code id: tags:

 ``` python
 # the actual characterization
 def characterize_module(cells, bp_thresh, skip_first_ntrains, ntrains, test_for_normality,
                        h5path, h5path_idx, inp):
    import copy

    import h5py
    import numpy as np
    import scipy.stats
    from cal_tools.enums import BadPixels

    def splitOffGainLPD(d):
        msk = np.zeros(d.shape, np.uint16)
        msk[...] = 0b0000111111111111
        data = np.bitwise_and(d, msk)
        msk[...] = 0b0011000000000000
        gain = np.bitwise_and(d, msk)//4096
        gain[gain > 2] = 2
        return data, gain

    filename, channel, gg, cap = inp
    thresholds_offset_hard, thresholds_offset_sigma, thresholds_noise_hard, thresholds_noise_sigma = bp_thresh

-    infile = h5py.File(filename, "r", driver="core")
+    infile = h5py.File(filename, "r")

    h5path = h5path.format(channel)
    h5path_idx = h5path_idx.format(channel)
    count = infile[f"{h5path_idx}/count"][()]
    first = infile[f"{h5path_idx}/first"][()]
    valid = count != 0
    count, first = count[valid], first[valid]
    first_image = int(first[skip_first_ntrains] if first.shape[0] > skip_first_ntrains else 0)
    last_image = int(first_image + np.sum(count[skip_first_ntrains:skip_first_ntrains+ntrains]))

    im = np.array(infile["{}/data".format(h5path, channel)][first_image:last_image, ...])
    cellid = np.squeeze(np.array(infile["{}/cellId".format(h5path, channel)][first_image:last_image, ...]))
    infile.close()

    im, g = splitOffGainLPD(im[:, 0, ...])
    im = im.astype(np.float32)

    im = np.rollaxis(im, 2)
    im = np.rollaxis(im, 2, 1)

    offset = np.zeros((im.shape[0], im.shape[1], cells))
    noise = np.zeros((im.shape[0], im.shape[1], cells))
    normal_test = np.zeros((im.shape[0], im.shape[1], cells))
    for cc in range(cells):
        idx = cellid == cc
        if np.any(idx):

            offset[..., cc] = np.median(im[:, :, idx], axis=2)
            noise[..., cc] = np.std(im[:, :, idx], axis=2)
            if test_for_normality:
                _, normal_test[..., cc] = scipy.stats.normaltest(
                    im[:, :, idx], axis=2)

    # bad pixels
    bp = np.zeros(offset.shape, np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0, 1))
    offset_std = np.nanstd(offset, axis=(0, 1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[(offset < thresholds_offset_hard[0]) | (
        offset > thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0, 1))
    noise_std = np.nanstd(noise, axis=(0, 1))

    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[(noise < thresholds_noise_hard[0]) | (
        noise > thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    idx = cellid == 12
    return offset, noise, channel, gg, cap, bp, im[12, 12, idx], normal_test


 offset_g = OrderedDict()
 noise_g = OrderedDict()
 badpix_g = OrderedDict()
 data_g = OrderedDict()
 ntest_g = OrderedDict()

 gg = 0
 old_cap = None
 start = datetime.now()
 inp = []

 for gain, mapped_files in gain_mapped_files.items():
    cap = gain.split("_")[1]
    if cap != old_cap:
        gg = 0
        old_cap = cap
        offset_g[cap] = OrderedDict()
        noise_g[cap] = OrderedDict()
        badpix_g[cap] = OrderedDict()
        data_g[cap] = OrderedDict()
        ntest_g[cap] = OrderedDict()

    for i in modules:
        qm = module_index_to_qm(i)
        if qm in mapped_files and not mapped_files[qm].empty():
            fname_in = mapped_files[qm].get()
            print("Process file: ", fname_in)
            inp.append((fname_in, i, gg, cap))

    gg+=1

 p = partial(characterize_module, max_cells,
                (thresholds_offset_hard, thresholds_offset_sigma,
                 thresholds_noise_hard, thresholds_noise_sigma),
                skip_first_ntrains, ntrains, test_for_normality,
            h5path, h5path_idx)

 # Don't remove. Used for Debugging.
 #results = list(map(p, inp))
 results = view.map_sync(p, inp)

 for ir, r in enumerate(results):
    offset, noise, i, gg, cap, bp, data, normal = r
    qm = module_index_to_qm(i)
    if qm not in offset_g[cap]:
        offset_g[cap][qm] = np.zeros(
            (offset.shape[0], offset.shape[1], offset.shape[2], 3))
        noise_g[cap][qm] = np.zeros_like(offset_g[cap][qm])
        badpix_g[cap][qm] = np.zeros_like(offset_g[cap][qm])
        data_g[cap][qm] = np.full((ntrains, 3), np.nan)
        ntest_g[cap][qm] = np.zeros_like(offset_g[cap][qm])

    offset_g[cap][qm][..., gg] = offset
    noise_g[cap][qm][..., gg] = noise
    badpix_g[cap][qm][..., gg] = bp
    data_g[cap][qm][:data.shape[0], gg] = data
    ntest_g[cap][qm][..., gg] = normal

    hn, cn = np.histogram(data, bins=20)
    print(f"{gain_names[gg]} gain, Capacitor {cap}, Module: {qm}. "
          f"Number of processed trains per cell: {data.shape[0]}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read report path and create file location tuple to add with the injection
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_low, run_med, run_high)

 report = get_report(out_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 # TODO: add db_module when received from myMDC
 # Create the modules dict of karabo_das and PDUs
 qm_dict = OrderedDict()
 for i, k_da in zip(modules, karabo_da):
    qm = module_index_to_qm(i)
    qm_dict[qm] = {"karabo_da": k_da,
                   "db_module": ""}
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve existing constants for comparison
 clist = ["Offset", "Noise", "BadPixelsDark"]
 old_const = {}
 old_mdata = {}

 dinstance = "LPD1M1"
 detinst = getattr(Detectors, dinstance)
 print('Retrieve pre-existing constants for comparison.')
 for cap in capacitor_settings:
    old_const[cap] = {}
    old_mdata[cap] = {}
    for qm in offset_g[cap].keys():
        old_const[cap][qm] = {}
        old_mdata[cap][qm] = {}
        qm_db = qm_dict[qm]
        karabo_da = qm_db["karabo_da"]

        condition = Conditions.Dark.LPD(memory_cells=max_cells,
                                        bias_voltage=bias_voltage,
                                        capacitor=cap)
        for const in clist:
            constant = getattr(Constants.LPD, const)()
            if not qm_db["db_module"]:
                # This should be used in case of running notebook
                # by a different method other than myMDC which already
                # sends CalCat info.
                qm_db["db_module"] = get_pdu_from_db(karabo_id, [karabo_da], constant,
                                                     condition, cal_db_interface,
                                                     snapshot_at=creation_time)[0]

            data, mdata = get_from_db(karabo_id, karabo_da,
                                      constant,
                                      condition, None,
                                      cal_db_interface,
                                      creation_time=creation_time,
                                      verbosity=2, timeout=cal_db_timeout)

            old_const[cap][qm][const] = data

            if mdata is None or data is None:
                old_mdata[cap][qm][const] = {
                    "timestamp": "Not found",
                    "filepath": None,
                    "h5path": None
                }
            else:
                timestamp = mdata.calibration_constant_version.begin_at.isoformat()
                filepath = os.path.join(
                    mdata.calibration_constant_version.hdf5path,
                    mdata.calibration_constant_version.filename
                )
                h5path = mdata.calibration_constant_version.h5path
                old_mdata[cap][qm][const] = {
                    "timestamp": timestamp,
                    "filepath": filepath,
                    "h5path": h5path
                }

        with open(f"{out_folder}/module_metadata_{qm}.yml","w") as fd:
            yaml.safe_dump(
                {
                    "module": qm,
                    "pdu": qm_db["db_module"],
                    "old-constants": old_mdata[cap][qm]
                }, fd)
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()
 for cap in capacitor_settings:
    res[cap] = OrderedDict()
    for i in modules:
        qm = module_index_to_qm(i)

        res[cap][qm] = {'Offset': offset_g[cap][qm],
                        'Noise': noise_g[cap][qm],
                        'BadPixelsDark': badpix_g[cap][qm]
                        }
 ```

 %% Cell type:code id: tags:

 ``` python
 # Save constants in the calibration DB
 md = None
 for cap in capacitor_settings:
    for qm in res[cap]:

        karabo_da = qm_dict[qm]["karabo_da"]
        db_module = qm_dict[qm]["db_module"]

        # Do not store empty constants
        # In case of 0 trains data_g is initiated with nans and never refilled.
        if np.count_nonzero(~np.isnan(data_g[cap][qm]))==0:
            continue
        for const in res[cap][qm]:

            dconst = getattr(Constants.LPD, const)()
            dconst.data = res[cap][qm][const]

            # set the operating condition
            condition = Conditions.Dark.LPD(memory_cells=max_cells,
                                            bias_voltage=bias_voltage,
                                            capacitor=cap)

            if db_output:
                md = send_to_db(db_module, karabo_id, dconst, condition,
                                file_loc, report_path=report,
                                cal_db_interface=cal_db_interface,
                                creation_time=creation_time,
                                timeout=cal_db_timeout)

            if local_output:
                md = save_const_to_h5(db_module, karabo_id, dconst, condition,
                                      dconst.data, file_loc, report, creation_time, out_folder)
                print(f"Calibration constant {const} is stored locally.\n")

        print("Constants parameter conditions are:\n")
        print(f"• memory_cells: {max_cells}\n• bias_voltage: {bias_voltage}\n"
              f"• capacitor: {cap}\n"
              f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:code id: tags:

 ``` python
 show_processed_modules(
    dinstance=dinstance,
    constants=None,
    mnames=[module_index_to_qm(i) for i in modules],
    mode="position"
 )
 ```

 %% Cell type:markdown id: tags:

 ## Raw pedestal distribution ##

 Distribution of a pedestal (ADUs) over trains for the pixel (12,12), memory cell 12. A median of the distribution is shown in yellow. A standard deviation is shown in red. The green line shows average over all pixels for a given memory cell and gain stage.

 %% Cell type:code id: tags:

 ``` python
 fig, grid = plt.subplots(3, 1, sharex="col", sharey="row", figsize=(10, 7))
 fig.subplots_adjust(wspace=0, hspace=0)

 for cap in capacitor_settings:
    for i in modules:
        qm = module_index_to_qm(i)
        if np.count_nonzero(~np.isnan(data_g[cap][qm])) == 0:
            break
        for gain in range(3):
            data = data_g[cap][qm][:, gain]
            offset = np.nanmedian(data)
            noise = np.nanstd(data)
            xrange = [np.nanmin(data_g[cap][qm]), np.nanmax(data_g[cap][qm])]
            nbins = int(xrange[1] - xrange[0])

            hn, cn = np.histogram(data, bins=nbins, range=xrange)

            grid[gain].hist(data, range=xrange, bins=nbins)
            grid[gain].plot([offset-noise, offset-noise], [0, np.nanmax(hn)],
                            linewidth=1.5, color='red',
                            label='1 $\sigma$ deviation')
            grid[gain].plot([offset+noise, offset+noise],
                            [0, np.nanmax(hn)], linewidth=1.5, color='red')
            grid[gain].plot([offset, offset], [0, 0],
                            linewidth=1.5, color='y', label='median')

            grid[gain].plot([np.nanmedian(offset_g[cap][qm][:, :, 12, gain]),
                             np.nanmedian(offset_g[cap][qm][:, :, 12, gain])],
                            [0, np.nanmax(hn)], linewidth=1.5, color='green',
                            label='average over pixels')

            grid[gain].set_xlim(xrange)
            grid[gain].set_ylim(0, np.nanmax(hn)*1.1)
            grid[gain].set_xlabel("Offset value [ADU]")
            grid[gain].set_ylabel("# of occurance")

            if gain == 0:
                leg = grid[gain].legend(
                    loc='upper center', ncol=3,
                    bbox_to_anchor=(0.1, 0.25, 0.7, 1.0))

            grid[gain].text(820, np.nanmax(hn)*0.4,
                            "{} gain".format(gain_names[gain]), fontsize=20)

            a = plt.axes([.125, .1, 0.775, .8], frame_on=False)
            a.patch.set_alpha(0.05)
            a.set_xlim(xrange)
            plt.plot([offset, offset], [0, 1], linewidth=1.5, color='y')
            plt.xticks([])
            plt.yticks([])

        ypos = 0.9
        x1pos = (np.nanmedian(data_g[cap][qm][:, 0]) +
                 np.nanmedian(data_g[cap][qm][:, 2]))/2.
        x2pos = (np.nanmedian(data_g[cap][qm][:, 2]) +
                 np.nanmedian(data_g[cap][qm][:, 1]))/2.-10

        plt.annotate("", xy=(np.nanmedian(data_g[cap][qm][:, 0]), ypos), xycoords='data',
                     xytext=(np.nanmedian(data_g[cap][qm][:, 2]), ypos), textcoords='data',
                     arrowprops=dict(arrowstyle="<->", connectionstyle="arc3"))

        plt.annotate('{}'.format(np.nanmedian(data_g[cap][qm][:, 0])-np.nanmedian(data_g[cap][qm][:, 2])),
                     xy=(x1pos, ypos), xycoords='data', xytext=(5, 5), textcoords='offset points')

        plt.annotate("", xy=(np.nanmedian(data_g[cap][qm][:, 2]), ypos), xycoords='data',
                     xytext=(np.nanmedian(data_g[cap][qm][:, 1]), ypos), textcoords='data',
                     arrowprops=dict(arrowstyle="<->", connectionstyle="arc3"))

        plt.annotate('{}'.format(np.nanmedian(data_g[cap][qm][:, 2])-np.nanmedian(data_g[cap][qm][:, 1])),
                     xy=(x2pos, ypos), xycoords='data', xytext=(5, 5), textcoords='offset points')

 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Normality test ##

 Distributions of raw pedestal values have been tested if they are normally distributed. A normality test have been performed for each pixel and each memory cell. Plots below show histogram of p-Values and a 2D distribution for the  memory cell 12.

 %% Cell type:code id: tags:

 ``` python
 # Loop over capacitor settings, modules, constants
 for cap in capacitor_settings:
    if not test_for_normality:
        print('Normality test was not requested. Flag `test_for_normality` False')
        break
    for i in modules:
        qm = module_index_to_qm(i)

        data = np.copy(ntest_g[cap][qm][:,:,:,:])
        data[badpix_g[cap][qm][:,:,:,:]>0] = 1.01

        hn,cn = np.histogram(data[:,:,:,0], bins=100)

        d = [{'x': np.arange(100)*0.01+0.01,
              'y': np.histogram(data[:,:,:,0], bins=100)[0],
              'drawstyle': 'steps-pre',
              'label' : 'High gain',
              },
             {'x': np.arange(100)*0.01+0.01,
              'y': np.histogram(data[:,:,:,1], bins=100)[0],
              'drawstyle': 'steps-pre',
              'label' : 'Medium gain',
              },
             {'x': np.arange(100)*0.01+0.01,
              'y': np.histogram(data[:,:,:,2], bins=100)[0],
              'drawstyle': 'steps-pre',
              'label' : 'Low gain',
              },
            ]


        fig = plt.figure(figsize=(15,15), tight_layout={'pad': 0.5, 'w_pad': 0.3})

        for gain in range(3):
            ax = fig.add_subplot(221+gain)
            heatmapPlot(data[:,:,12,gain], add_panels=False, cmap='viridis', figsize=(10,10),
                y_label='Rows', x_label='Columns',
                lut_label='p-Value',
                use_axis=ax,
                title='p-Value for cell 12, {} gain'.format(gain_names[gain]) )

        ax = fig.add_subplot(224)
        _ = simplePlot(d, #aspect=1.6,
                              x_label = "p-Value".format(gain),
                              y_label="# of occurance",
                              use_axis=ax,
                               y_log=False, legend='outside-top-ncol3-frame', legend_pad=0.05, legend_size='5%')
        ax.ticklabel_format(style='sci', axis='y', scilimits=(4,6))

 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on a sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:code id: tags:

 ``` python
 cell = 12
 for cap in capacitor_settings:
    for gain in range(3):
        display(
            Markdown('### Cell-12 overview - {} gain'.format(gain_names[gain])))

        fig = plt.figure(figsize=(18, 22) , tight_layout={'pad': 0.1, 'w_pad': 0.1})
        for qm in res[cap]:
            for iconst, const in enumerate(['Offset', 'Noise', 'BadPixelsDark']):

                ax = fig.add_subplot(321+iconst)

                data = res[cap][qm][const][:, :, 12, gain]
                vmax = 1.5 * np.nanmedian(res[cap][qm][const][:, :, 12, gain])
                title = const
                label = '{} value [ADU]'.format(const)
                title = '{} value'.format(const)
                if const == 'BadPixelsDark':
                    vmax = 4
                    data[data == 0] = np.nan
                    title = 'Bad pixel code'
                    label = title

                    cb_labels = ['1 {}'.format(BadPixels.NOISE_OUT_OF_THRESHOLD.name),
                                 '2 {}'.format(BadPixels.OFFSET_NOISE_EVAL_ERROR.name),
                                 '3 {}'.format(BadPixels.OFFSET_OUT_OF_THRESHOLD.name),
                                 '4 {}'.format('MIXED')]

                    heatmapPlot(data, add_panels=False, cmap='viridis',
                                y_label='Rows', x_label='Columns',
                                lut_label='', vmax=vmax,
                                use_axis=ax, cb_ticklabels=cb_labels, cb_ticks = np.arange(4)+1,
                                title='{}'.format(title))

                else:

                    heatmapPlot(data, add_panels=False, cmap='viridis',
                                y_label='Rows', x_label='Columns',
                                lut_label=label, vmax=vmax,
                                use_axis=ax,
                                title='{}'.format(title))

        for qm in res[cap]:
            for iconst, const in enumerate(['Offset', 'Noise']):
                data = res[cap][qm][const]
                dataBP = np.copy(data)
                dataBP[res[cap][qm]['BadPixelsDark'] > 0] = -1

                x_ranges = [[0, 1500], [0, 40]]
                hn, cn = np.histogram(
                    data[:, :, :, gain], bins=100, range=x_ranges[iconst])
                hnBP, cnBP = np.histogram(dataBP[:, :, :, gain], bins=cn)

                d = [{'x': cn[:-1],
                      'y': hn,
                      'drawstyle': 'steps-pre',
                      'label': 'All data',
                      },
                     {'x': cnBP[:-1],
                      'y': hnBP,
                      'drawstyle': 'steps-pre',
                      'label': 'Bad pixels masked',
                      },
                     ]

                ax = fig.add_subplot(325+iconst)
                _ = simplePlot(d, figsize=(5, 7), aspect=1,
                                    x_label="{} value [ADU]".format(const),
                                    y_label="# of occurance",
                                    title='', legend_pad=0.1, legend_size='10%',
                                    use_axis=ax,
                                    y_log=True, legend='outside-top-2col-frame')

        plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:


 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.NOISE_OUT_OF_THRESHOLD.value: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR.value: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value | BadPixels.NOISE_OUT_OF_THRESHOLD.value: ('MIXED', '#DD00DD80')}

 if high_res_badpix_3d:
    display(Markdown("""

    ## Global Bad Pixel Behaviour ##

    The following plots shows the results of a bad pixel evaluation for all evaluated memory cells.
    Cells are stacked in the Z-dimension, while pixels values in x/y are re-binned with a factor of 2.
    This excludes single bad pixels present only in disconnected pixels.
    Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
    Colors encode the bad pixel type, or mixed type.

        """))
    # Switch rebin to 1 for full resolution and
    # no interpolation for badpixel values.
    rebin = 2
    for gain in range(3):
        display(Markdown('### Bad pixel behaviour - {} gain ###'.format(gain_names[gain])))
        for cap in capacitor_settings:
            for mod, data in badpix_g[cap].items():
                plot_badpix_3d(data[...,gain], cols, title='', rebin_fac=rebin)
                ax = plt.gca()
                leg = ax.get_legend()
                leg.set(alpha=0.5)
        plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Summary across tiles ##

 Plots give an overview of calibration constants averaged across tiles. A bad pixel mask is applied. Constants are compared with pre-existing constants retrieved from the calibration database. Differences $\Delta$ between the old and new constants is shown.

 %% Cell type:code id: tags:

 ``` python
 time_summary = []
 for cap, cap_data in old_mdata.items():
    time_summary.append(f"The following pre-existing constants are used for comparison for capacitor setting **{cap}**:")
    for qm, qm_data in cap_data.items():
        time_summary.append(f"- Module {qm}")
        for const, const_data in qm_data.items():
            time_summary.append(f"    - {const} created at {const_data['timestamp']}")
 display(Markdown("\n".join(time_summary)))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Loop over capacitor settings, modules, constants
 for cap in res:
    for qm in res[cap]:
        for gain in range(3):
            display(Markdown('### Summary across tiles - {} gain'.format(gain_names[gain])))

            for const in res[cap][qm]:
                data = np.copy(res[cap][qm][const][:, :, :, gain])

                label = 'Fraction of bad pixels'

                if const != 'BadPixelsDark':
                    data[badpix_g[cap][qm][:, :, :, gain] > 0] = np.nan
                    label = '{} value [ADU]'.format(const)
                else:
                    data[data>0] = 1.0

                data = data.reshape(
                    int(data.shape[0] / 32),
                    32,
                    int(data.shape[1] / 128),
                    128,
                    data.shape[2])
                data = np.nanmean(data, axis=(1, 3)).swapaxes(
                    0, 2).reshape(512, 16)

                fig = plt.figure(figsize=(15, 6))
                ax = fig.add_subplot(121)

                _ = heatmapPlot(data[:510, :], add_panels=True,
                                y_label='Momery Cell ID', x_label='Tile ID',
                                lut_label=label, use_axis=ax,
                                panel_y_label=label, panel_x_label=label,
                                cmap='viridis',  # cb_loc='right',cb_aspect=15,
                                x_ticklabels=np.arange(16)+1,
                                x_ticks=np.arange(16)+0.5)

                if old_const[cap][qm][const] is not None:
                    ax = fig.add_subplot(122)

                    dataold = np.copy(old_const[cap][qm][const][:, :, :, gain])

                    label = '$\Delta$ {}'.format(label)

                    if const != 'BadPixelsDark':
                        if old_const[cap][qm]['BadPixelsDark'] is not None:
                            dataold[old_const[cap][qm]['BadPixelsDark'][:, :, :, gain] > 0] = np.nan
                        else:
                            dataold[:] = np.nan
                    else:
                        dataold[dataold>0]=1.0

                    dataold = dataold.reshape(
                        int(dataold.shape[0] / 32),
                        32,
                        int(dataold.shape[1] / 128),
                        128,
                        dataold.shape[2])
                    dataold = np.nanmean(dataold, axis=(
                        1, 3)).swapaxes(0, 2).reshape(512, 16)
                    dataold = dataold - data

                    _ = heatmapPlot(dataold[:510, :], add_panels=True,
                                    y_label='Momery Cell ID', x_label='Tile ID',
                                    lut_label=label, use_axis=ax,
                                    panel_y_label=label, panel_x_label=label,
                                    cmap='viridis',  # cb_loc='right',cb_aspect=15,
                                    x_ticklabels=np.arange(16)+1,
                                    x_ticks=np.arange(16)+0.5)
            plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Variation of offset and noise across Tiles and ASICs ##

 The following plots show a standard deviation $\sigma$ of the calibration constant. The plot of standard deviations across tiles show pixels of one tile ($128 \times 32$). Value for each pixel shows a standard deviation across 16 tiles. The standard deviation across ASICs are shown overall tiles. The plot shows pixels of one ASIC ($16 \times 32$), where the value shows a standard deviation across all ACIS of the module.

 %% Cell type:code id: tags:

 ``` python
 # Loop over capacitor settings, modules, constants
 for cap in res:
    for qm in res[cap]:
        for gain in range(3):
            display(Markdown('### Variation of offset and noise across ASICs - {} gain'.format(gain_names[gain])))

            fig = plt.figure(figsize=(15, 6))
            for iconst, const in enumerate(['Offset', 'Noise']):
                data = np.copy(res[cap][qm][const][:, :, :, gain])
                data[badpix_g[cap][qm][:, :, :, gain] > 0] = np.nan
                label = '$\sigma$ {} [ADU]'.format(const)

                dataA = np.nanmean(data, axis=2)  # average over cells
                dataA = dataA.reshape(8, 32, 16, 16)
                dataA = np.nanstd(dataA, axis=(0, 2))  # average across ASICs

                ax = fig.add_subplot(121+iconst)
                _ = heatmapPlot(dataA, add_panels=True,
                                y_label='rows', x_label='columns',
                                lut_label=label, use_axis=ax,
                                panel_y_label=label, panel_x_label=label,
                                cmap='viridis'
                                )

            plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Loop over capacitor settings, modules, constants
 for cap in res:
    for qm in res[cap]:
        for gain in range(3):
            display(Markdown('### Variation of offset and noise across tiles - {} gain'.format(gain_names[gain])))

            fig = plt.figure(figsize=(15, 6))
            for iconst, const in enumerate(['Offset', 'Noise']):
                data = np.copy(res[cap][qm][const][:, :, :, gain])
                data[badpix_g[cap][qm][:, :, :, gain] > 0] = np.nan
                label = '$\sigma$ {} [ADU]'.format(const)

                dataT = data.reshape(
                    int(data.shape[0] / 32),
                    32,
                    int(data.shape[1] / 128),
                    128,
                    data.shape[2])
                dataT = np.nanstd(dataT, axis=(0, 2))
                dataT = np.nanmean(dataT, axis=2)

                ax = fig.add_subplot(121+iconst)
                _ = heatmapPlot(dataT, add_panels=True,
                                y_label='rows', x_label='columns',
                                lut_label=label, use_axis=ax,
                                panel_y_label=label, panel_x_label=label,
                                cmap='viridis')
            plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Aggregate values and per cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per-cell behavior, averaged across pixels.

 %% Cell type:code id: tags:

 ``` python
 # Loop over capacitor settings, modules, constants
 for cap in res:
    for qm in res[cap]:
        for gain in range(3):
            display(Markdown('### Mean over pixels - {} gain'.format(gain_names[gain])))

            fig = plt.figure(figsize=(9,11))

            for iconst, const in enumerate(res[cap][qm]):

                ax = fig.add_subplot(311+iconst)

                data = res[cap][qm][const][:,:,:510,gain]
                if const == 'BadPixelsDark':
                    data[data>0] = 1.0

                dataBP = np.copy(data)
                dataBP[badpix_g[cap][qm][:,:,:510,gain]>0] = -10

                data = np.nanmean(data, axis=(0,1))
                dataBP = np.nanmean(dataBP, axis=(0,1))

                d = [{'y': data,
                      'x': np.arange(data.shape[0]),
                      'drawstyle': 'steps-mid',
                      'label' : 'All data'
                     }
                    ]

                if const != 'BadPixelsDark':
                    d.append({'y': dataBP,
                      'x': np.arange(data.shape[0]),
                      'drawstyle': 'steps-mid',
                      'label' : 'good pixels only'
                     })
                    y_title = "{} value [ADU]".format(const)
                    title = "{} value, {} gain".format(const, gain_names[gain])
                else:
                    y_title = "Fraction of Bad Pixels"
                    title = "Fraction of Bad Pixels, {} gain".format(gain_names[gain])

                data_min = np.min([data, dataBP])if const != 'BadPixelsDark' else np.min([data])
                data_max = np.max([data[20:], dataBP[20:]])
                data_dif = data_max - data_min

                local_max = np.max([data[200:300], dataBP[200:300]])
                frac = 0.35
                new_max = (local_max - data_min*(1-frac))/frac
                new_max = np.max([data_max, new_max])

                _ = simplePlot(d, figsize=(10,10), aspect=2, xrange=(-12, 510),
                                  x_label = 'Memory Cell ID',
                                  y_label=y_title, use_axis=ax,
                                  title=title,
                                  title_position=[0.5, 1.15],
                                  inset='xy-coord-right', inset_x_range=(0,20), inset_indicated=True,
                                  inset_labeled=True, inset_coord=[0.2,0.5,0.6,0.95],
                                    inset_lw = 1.0, y_range = [data_min-data_dif*0.05, new_max+data_dif*0.05],
                                  y_log=False, legend='outside-top-ncol2-frame', legend_size='18%',
                                     legend_pad=0.00)

                plt.tight_layout(pad=1.08, h_pad=0.35)

            plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 table = []
 bits = [BadPixels.NOISE_OUT_OF_THRESHOLD, BadPixels.OFFSET_OUT_OF_THRESHOLD, BadPixels.OFFSET_NOISE_EVAL_ERROR]
 for cap in res:
    for qm in res[cap]:
        for gain in range(3):

            l_data = []
            l_data_old = []

            data = np.copy(res[cap][qm]['BadPixelsDark'][:,:,:,gain])
            datau32 = data.astype(np.uint32)
            l_data.append(len(datau32[datau32>0].flatten()))
            for bit in bits:
                l_data.append(np.count_nonzero(badpix_g[cap][qm][:,:,:,gain].astype(np.uint32) & bit.value))

            if old_const[cap][qm]['BadPixelsDark'] is not None:
                dataold = np.copy(old_const[cap][qm]['BadPixelsDark'][:, :, :, gain])
                datau32old = dataold.astype(np.uint32)
                l_data_old.append(len(datau32old[datau32old>0].flatten()))
                for bit in bits:
                    l_data_old.append(np.count_nonzero(old_const[cap][qm]['BadPixelsDark'][:, :, :, gain].astype(np.uint32) & bit.value))

            l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',
                           'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR']

            l_threshold = ['', f'{thresholds_noise_sigma}', f'{thresholds_offset_sigma}',
                           f'{thresholds_offset_hard}/{thresholds_noise_hard}']

            for i in range(len(l_data)):
                line = [f'{l_data_name[i]}, gain {gain_names[gain]}', l_threshold[i], l_data[i]]

                if old_const[cap][qm]['BadPixelsDark'] is not None:
                    line += [l_data_old[i]]
                else:
                    line += ['-']

                table.append(line)
            table.append(['', '', '', ''])

 display(Markdown('''

 ### Number of bad pixels ###

 One pixel can be bad for different reasons, therefore, the sum of all types of bad pixels can be more than the number of all bad pixels.

 '''))
 if len(table)>0:
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Pixel type", "Threshold",
                                              "New constant", "Old constant"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 for const in ['Offset', 'Noise']:
    table = [['','High gain', 'High gain', 'Medium gain', 'Medium gain', 'Low gain', 'Low gain']]
    for cap in res:
        for qm in res[cap]:

            data = np.copy(res[cap][qm][const])
            data[res[cap][qm]['BadPixelsDark']>0] = np.nan

            if old_const[cap][qm][const] is not None and old_const[cap][qm]['BadPixelsDark'] is not None :
                dataold = np.copy(old_const[cap][qm][const])
                dataold[old_const[cap][qm]['BadPixelsDark']>0] = np.nan

            f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
            n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

            for i, f in enumerate(f_list):
                line = [n_list[i]]
                for gain in range(3):
                    line.append('{:6.1f}'.format(f(data[...,gain])))
                    if old_const[cap][qm][const] is not None and old_const[cap][qm]['BadPixelsDark'] is not None:
                        line.append('{:6.1f}'.format(f(dataold[...,gain])))
                    else:
                        line.append('-')

                table.append(line)

    display(Markdown('### {} [ADU], good pixels only ###'.format(const)))
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
 ```

--- a/notebooks/LPD/LPD_Correct_and_Verify.ipynb
+++ b/notebooks/LPD/LPD_Correct_and_Verify.ipynb
 %% Cell type:markdown id: tags:

 # LPD Offline Correction #

 Author: European XFEL Detector Group, Version: 1.0

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB" # cluster profile to use
 in_folder = "/gpfs/exfel/exp/FXE/201931/p900088/raw/" # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/karnem/test_1/lpd_correct_006" # the folder to output to, required
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 run = 270 # runs to process, required

 karabo_id = "FXE_DET_LPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = '/INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_idx = '/INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images

 use_dir_creation_date = True # use the creation date of the directory for database time derivation
 cal_db_interface = "tcp://max-exfl016:8015#8020" # the database interface to use
 cal_db_timeout = 30000 # timeout for calibration db requests in milliseconds


 calfile =  "/gpfs/exfel/data/scratch/xcal/lpd_store_0519.h5" # path to constants extracted from the db into a file
 mem_cells = 512 # memory cells in data
 overwrite = True # set to True if existing data should be overwritten
 no_relative_gain = False # do not do relative gain correction
 no_flat_fields = False # do not do flat field correction
 max_pulses = 512 # maximum number of pulses per train
 no_non_linear_corrections = False # do not apply non-linear corrections
 max_cells_db = 512 # maximum cells for data from the database
 rawversion = 2 # raw format version
 capacitor = '5pF' # capacitor setting: 5pF or 50pF
 photon_energy = 9.2 # the photon energy in keV
 nodb = False # set to true if db input is to be avoided
 bias_voltage = 250 # detector bias voltage
 geometry_file = "/gpfs/exfel/d/cal/exchange/lpdMF_00.h5" # the geometry file to use, MAR 2018
 beam_center_offset =  [1.5, 1] # offset from the beam center, MAR 2018
 sequences_per_node = 1 # sequence files to process per node

 dont_mark_non_lin_region = False # do not mark non-linear regions in BP map
 linear_between_high_gain = [-5000, 2500]  # region in which high gain is considered linear, in ADU
 linear_between_med_gain = [300, 3000]  # region in which medium gain is considered linear, in ADU
 linear_between_low_gain = [300, 3000]  # region in which low gain is considered linear, in ADU
 nlc_version = 2 # version of NLC to use

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import sys
 import warnings
 from datetime import datetime

 warnings.filterwarnings('ignore')

 max_cells = mem_cells

 if sequences[0] == -1:
    sequences = None

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(16))
    karabo_da = ['LPD{:02d}'.format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
 print("Process modules: ",
      ', '.join([f"Q{x // 4 + 1}M{x % 4 + 1}" for x in modules]))

 do_rel_gain = not no_relative_gain
 do_ff = not no_flat_fields
 index_v = rawversion

 #do_ff = False
 #relgain_store = "/gpfs/exfel/d/proc/FXE/201830/p900020/calibration/lpd_ci_store_{}_16_5pf.h5"
 print("Applying FF corrections: {}".format(do_ff))

 # make sure a cluster is running with ipcluster start --n=32, give it a while to start
 import os

 import h5py
 import matplotlib
 import numpy as np

 matplotlib.use("agg")
 from collections import OrderedDict
 from datetime import datetime

 import matplotlib.pyplot as plt
 from cal_tools.enums import BadPixels
 from cal_tools.lpdlib import LpdCorrections
 from cal_tools.plotting import create_constant_overview, plot_badpix_3d, show_overview
 from cal_tools.tools import (
    gain_map_files,
    get_constant_from_db,
    get_dir_creation_date,
    get_notebook_name,
    map_modules_from_folder,
    parse_runs,
    run_prop_seq_from_path,
 )
 from iCalibrationDB import Conditions, ConstantMetaData, Constants, Detectors, Versions
 from ipyparallel import Client

 print("Connecting to profile {}".format(cluster_profile))
 view = Client(profile=cluster_profile)[:]
 view.use_dill()
 gains = np.arange(3)
 cells = np.arange(max_cells)

 CHUNK_SIZE = 512
 MAX_PAR = 32

 if not os.path.exists(out_folder):
    os.makedirs(out_folder)
 elif not overwrite:
    raise AttributeError("Output path exists! Exiting")

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
 else:
    creation_time = datetime.now()

 print("Using {} as creation time".format(creation_time.isoformat()))

 _, proposal, seq = run_prop_seq_from_path(in_folder)
 instrument = karabo_id.split("_")[0]

 mark_non_lin_region = not dont_mark_non_lin_region
 linear_between = [linear_between_high_gain, linear_between_med_gain, linear_between_low_gain]

 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)
 mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf
 MAX_PAR = min(MAX_PAR, total_sequences)
 ```

 %% Cell type:markdown id: tags:

 ## Processed Files ##

 %% Cell type:code id: tags:

 ``` python
 import copy

 import tabulate
 from IPython.display import HTML, Latex, Markdown, display

 print("Processing a total of {} sequence files in chunks of {}".format(total_sequences, MAX_PAR))
 table = []
 mfc = copy.copy(mapped_files)
 ti = 0
 for k, files in mfc.items():
    i = 0
    while not files.empty():
        f = files.get()
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["#", "module", "# module", "file"])))
 # restore the queue
 mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)
 mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf
 ```

 %% Cell type:code id: tags:

 ``` python
 import copy
 from functools import partial


 def correct_module(max_cells, do_ff, index_v, CHUNK_SIZE, total_sequences, sequences_qm,
                   bins_gain_vs_signal, bins_signal_low_range, bins_signal_high_range, max_pulses,
                   dbparms, fileparms, nodb, no_non_linear_corrections, mark_non_lin_region, linear_between,
                   nlc_version, h5path, h5path_idx, karabo_id, inp):
    import copy
    import os
    import re
    import socket
    from datetime import datetime

    import h5py
    import numpy as np
    from cal_tools.enums import BadPixels
    from cal_tools.lpdlib import LpdCorrections

    hists_signal_low = None
    hists_signal_high = None
    hists_gain_vs_signal = None
    low_edges = None
    high_edges = None
    signal_edges = None
    when = None
    qm = None
    err = None
    try:
        start = datetime.now()
        success = True
        reason = ""
        filename, filename_out, channel, karabo_da, qm = inp

        infile = h5py.File(filename, "r", driver="core")
        outfile = h5py.File(filename_out, "w")

        # LPD correction requires path without the leading "/""
        if h5path[0] == '/':
            h5path = h5path[1:]
        if h5path_idx[0] == '/':
            h5path_idx = h5path_idx[1:]

        try:
            lpd_corr = LpdCorrections(infile, outfile, max_cells, channel, max_pulses,
                                      bins_gain_vs_signal, bins_signal_low_range,
                                      bins_signal_high_range, do_ff=do_ff, raw_fmt_version=index_v,
                                      correct_non_linear=(not no_non_linear_corrections),
                                      mark_non_lin_region=mark_non_lin_region, linear_between=linear_between,
                                      nlc_version=nlc_version,
                                      h5_data_path=h5path, h5_index_path=h5path_idx)


            try:
                lpd_corr.get_valid_image_idx()
            except IOError:
                return
            if not nodb:
                when = lpd_corr.initialize_from_db(dbparms, karabo_id, karabo_da, only_dark=(fileparms != ""))
                print(when)
            if fileparms != "":
                lpd_corr.initialize_from_file(fileparms, qm, with_dark=nodb)
            print("Initialized constants")

            for irange in lpd_corr.get_iteration_range():
                lpd_corr.correct_lpd(irange)

            print("All interations finished")
            hists, edges = lpd_corr.get_histograms()
            hists_signal_low, hists_signal_high, hists_gain_vs_signal = hists
            low_edges, high_edges, signal_edges = edges
            outfile.close()
            infile.close()
            print("Closed files")
        except Exception as e1:
            err = e1
            outfile.close()
            infile.close()

    except Exception as e:
        print(e)
        success = False
        reason = "Error"
        err = e

    return (hists_signal_low, hists_signal_high, hists_gain_vs_signal, low_edges,
            high_edges, signal_edges, when, qm, err)

 done = False
 first_files = []
 inp = []
 left = total_sequences

 bins_gain_vs_signal = (100, 4)
 bins_signal_low_range = 100
 bins_signal_high_range = 100
 hists_signal_low =  np.zeros((bins_signal_low_range, max_pulses), np.float64)
 hists_signal_high =  np.zeros((bins_signal_high_range, max_pulses), np.float64)
 hists_gain_vs_signal =  np.zeros((bins_gain_vs_signal), np.float64)
 low_edges, high_edges, signal_edges = None, None, None
 dbparms = cal_db_interface, creation_time, max_cells_db, capacitor, bias_voltage, photon_energy, cal_db_timeout
 fileparms = calfile

 whens = {}

 while not done:

    dones = []
    first = True
    for i, k_da in zip(modules, karabo_da):
        qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
        if qm in mapped_files and not mapped_files[qm].empty():
            fname_in = str(mapped_files[qm].get())
            dones.append(mapped_files[qm].empty())
        else:
            print("Skipping {}".format(qm))
            first_files.append((None, None))
            continue
        fout = os.path.abspath("{}/{}".format(out_folder, (os.path.split(fname_in)[-1]).replace("RAW", "CORR")))
        if first:
            first_files.append((fname_in, fout))
        inp.append((fname_in, fout, i, k_da, qm))
    first = False
    if len(inp) >= min(MAX_PAR, left):
        print("Running {} tasks parallel".format(len(inp)))
        p = partial(correct_module, max_cells, do_ff, index_v, CHUNK_SIZE, total_sequences, sequences_qm,
                   bins_gain_vs_signal, bins_signal_low_range, bins_signal_high_range, max_pulses, dbparms,
                   fileparms, nodb, no_non_linear_corrections, mark_non_lin_region, linear_between, nlc_version,
                   h5path, h5path_idx, karabo_id)

        r = view.map_sync(p, inp)
        #r = list(map(p, inp))
        inp = []
        left -= MAX_PAR

        for rr in r:
            if rr is not None:
                hl, hh, hg, low_edges, high_edges, signal_edges, when, qm, err = rr
                whens[qm] = {}
                whens[qm]['when'] = when
                whens[qm]['err'] = err
                if hl is not None:  # any one being None will also make the others None
                    hists_signal_low += hl.astype(np.float64)
                    hists_signal_high += hh.astype(np.float64)
                    hists_gain_vs_signal += hg.astype(np.float64)


    done = all(dones)
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Offset was injected on: ")
 for k, v in whens.items():
    if v['err'] is None:
        print("{}: {}".format(k, v['when']))
    else:
        print("{}: {}: {}".format(k, v['when'], v['err']))
 ```

 %% Cell type:code id: tags:

 ``` python
 import matplotlib.pyplot as plt
 import numpy as np
 from matplotlib import cm
 from matplotlib.ticker import FormatStrFormatter, LinearLocator
 from mpl_toolkits.mplot3d import Axes3D

 %matplotlib inline
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10,10))
    ax = fig.gca(projection='3d')

    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)

    Z = data.T

    # Plot the surface.
    surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,
                           linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")
 ```

 %% Cell type:markdown id: tags:

 ## Signal vs. Analogue Gain ##

 The following plot shows plots signal vs. gain for the first 1280 images.

 %% Cell type:code id: tags:

 ``` python
 do_3d_plot(hists_gain_vs_signal, signal_edges, "Signal (ADU)", "Gain Bit Value")
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis):
    from matplotlib.colors import LogNorm
    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1,:], extent=extent, aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(data)))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")


 do_2d_plot(hists_gain_vs_signal, signal_edges, "Signal (ADU)", "Gain Bit Value")
 ```

 %% Cell type:markdown id: tags:

 ## Mean Intensity per Pulse ##

 The following plots show the mean signal for each pulse in a detailed and expanded intensity region.

 %% Cell type:code id: tags:

 ``` python
 do_3d_plot(hists_signal_low, low_edges, "Signal (ADU)", "Pulse id")
 do_2d_plot(hists_signal_low, low_edges, "Signal (ADU)", "Pulse id")
 do_3d_plot(hists_signal_high, high_edges, "Signal (ADU)", "Pulse id")
 do_2d_plot(hists_signal_high, high_edges, "Signal (ADU)", "Pulse id")
 ```

 %% Cell type:markdown id: tags:

 ## Data Preview ##

 In the following geometry information from the LPD geometry file is applied. Quadrants are positioned to last known position. No bad pixel masking has been performed.

 %% Cell type:code id: tags:

 ``` python
 # geometry information
 dc = beam_center_offset
 #d_quads = [(-14+dc[0],-300+dc[1]),(10+dc[0],-9+dc[1]),(-256+dc[0],15+dc[1]),(-280+dc[0],-276+dc[1])] # MAR 2018
 d_quads = [(-19+dc[0],-300+dc[1]),(10+dc[0],-9+dc[1]),(-256+dc[0],19+dc[1]),(-285+dc[0],-271+dc[1])]  # MAY 2019

 import cal_tools.metrology as metro

-in_files = "{}/CORR*LPD*S{:05d}*.h5".format(out_folder, sequences[0] if sequences else 0)
+out_files = "{}/CORR*LPD*S{:05d}*.h5".format(out_folder, sequences[0] if sequences else 0)
 datapath = "{}/image/data".format(h5path)
-print("Preview is from {}".format(in_files))
+print("Preview is from {}".format(out_files))
 ```

 %% Cell type:code id: tags:

 ``` python
-posarr = metro.positionFileList(in_files, datapath, geometry_file, d_quads, nImages = 10)
+posarr = metro.positionFileList(out_files, datapath, geometry_file, d_quads, nImages = 10)
 maskpath = "{}/image/mask".format(h5path)
-maskedarr = metro.positionFileList(in_files, maskpath, geometry_file, d_quads, nImages = 10)
+maskedarr = metro.positionFileList(out_files, maskpath, geometry_file, d_quads, nImages = 10)
 ```

 %% Cell type:code id: tags:

 ``` python
 # convert the Carthesian coordinates of the detector to polar coordinates
 def mod_cart_to_pol(d, dx, dy, filter_by_val=True):
    """ Convert carthesian coords to polar coords
    """
    cx, cy = d.shape
    x = np.arange(cx)+dx
    y = np.arange(cy)+dy
    x = np.repeat(x[:,None], cy, axis=1)
    y = np.repeat(y[None,:], cx, axis=0)

    rho = np.sqrt(x**2 + y**2).flatten()
    phi = np.arctan2(y, x).flatten()
    flat = d.flatten()

    # we also perform a bit of filtering here

    if filter_by_val:
        good = np.isfinite(flat) & (flat > 0) & (flat < 1e5)

        return rho[good], phi[good], flat[good], good

    return rho, phi, flat, None
 ```

 %% Cell type:markdown id: tags:

 ### Single Short Preview ###

 A single shot image from cell 5 of the first train

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(15,15))
 ax = fig.add_subplot(111)
 parr = posarr[5,...]
 im=ax.imshow((parr),  vmin=0, vmax=max(10*np.median(parr[parr > 0]), 100))
 cb = fig.colorbar(im)
 cb.set_label("Intensity (ADU")
 ```

 %% Cell type:markdown id: tags:

 ### Pixel Mean Preview ###

 The per pixel mean value of the first 100 images

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(15,15))
 ax = fig.add_subplot(111)
 parr = np.mean(posarr, axis=0)
 im=ax.imshow((parr),  vmin=0, vmax=max(10*np.median(parr[parr > 0]), 100))
 cb = fig.colorbar(im)
 cb.set_label("Intensity (ADU")
 ```

 %% Cell type:markdown id: tags:

 ### Radial Profile ###

 The simple azimuthley integrated profile plotted assumes the beam centered in the hole, it is thus not always fully accurate.

 %% Cell type:code id: tags:

 ``` python
 # Here we create histograms of the data in a polar coordinate system.
 # We use numpys hist2d function, giving it the polar coordinates of
 # each pixels, and weighing that coordinate with the pixel's value.
 # We obtain a histogram for each module, according to its position defined
 # in the coord_list.
 from scipy.stats import binned_statistic_2d

 hs = []
 bins_nums = []
 edges = []

 goods = []
 bins = 5000

 dx, dy = -750, -750

 rho, phi, weights, good = mod_cart_to_pol(np.mean(posarr, axis=0), dy, dx, False)
 #h, phi_edges, rho_edges = np.histogram2d(phi, rho, bins=(1000,1000),
 #                                         range=((-np.pi, np.pi), (0, 1000)),
 #                                         weights=weights)
 h, phi_edges, rho_edges, bns = binned_statistic_2d(phi, rho, weights, bins=(bins,bins),
                                                       range=((-np.pi, np.pi), (0, 1000)),
                                                       statistic = "sum")
 bins_nums.append(bns)
 hs.append(h)
 edges.append((phi_edges, rho_edges))
 goods.append(good)
 ```

 %% Cell type:code id: tags:

 ``` python
 x = np.arange(bins)/bins*1000*500e-6
 y = np.arange(bins)/bins*2.
 ds = np.array(hs).sum(axis=0)
 ```

 %% Cell type:code id: tags:

 ``` python
 # With appropriate coordinates given, plotting a profile along the
 # vertical axis should give us the positions of the diffraction peaks,
 # Here still as distances on the detector plane. With knowledge of the
 # detector to sample distance, these could then be converted in
 # reciprocal coordinates.
 ds[ds == 0] = np.nan
 profile = np.nanmedian(ds, axis=0)
 fig = plt.figure(figsize=(15,5))
 ax = fig.add_subplot(111)
 p = ax.plot(x, profile)
 l =ax.set_ylabel("Median intensity (arb. units)")
 l = ax.set_xlabel("Radial distance (arb. units)")
 ```

 %% Cell type:markdown id: tags:

 ## Maxium Gain Value Reached ##

 The following plot shows the maximum gain value reached. It can be used as an indication of whether the detector went into saturation.

 %% Cell type:code id: tags:

 ``` python
-gainpath = "{}/gain".format(h5path)
-posarr = metro.positionFileList(in_files, gainpath, geometry_file, d_quads, nImages = 100)
+gainpath = "{}/image/gain".format(h5path)
+posarr = metro.positionFileList(out_files, gainpath, geometry_file, d_quads, nImages = 100)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(15,15))
 ax = fig.add_subplot(111)
 parr = np.max(posarr, axis=0)
 im=ax.imshow((parr),  vmin=0, vmax=3)
 cb = fig.colorbar(im)
 cb.set_label("Intensity (ADU")
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:markdown id: tags:

 # LPD Offline Correction #

 Author: European XFEL Detector Group, Version: 1.0

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB" # cluster profile to use
 in_folder = "/gpfs/exfel/exp/FXE/201931/p900088/raw/" # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/karnem/test_1/lpd_correct_006" # the folder to output to, required
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 run = 270 # runs to process, required

 karabo_id = "FXE_DET_LPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = '/INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_idx = '/INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images

 use_dir_creation_date = True # use the creation date of the directory for database time derivation
 cal_db_interface = "tcp://max-exfl016:8015#8020" # the database interface to use
 cal_db_timeout = 30000 # timeout for calibration db requests in milliseconds


 calfile =  "/gpfs/exfel/data/scratch/xcal/lpd_store_0519.h5" # path to constants extracted from the db into a file
 mem_cells = 512 # memory cells in data
 overwrite = True # set to True if existing data should be overwritten
 no_relative_gain = False # do not do relative gain correction
 no_flat_fields = False # do not do flat field correction
 max_pulses = 512 # maximum number of pulses per train
 no_non_linear_corrections = False # do not apply non-linear corrections
 max_cells_db = 512 # maximum cells for data from the database
 rawversion = 2 # raw format version
 capacitor = '5pF' # capacitor setting: 5pF or 50pF
 photon_energy = 9.2 # the photon energy in keV
 nodb = False # set to true if db input is to be avoided
 bias_voltage = 250 # detector bias voltage
 geometry_file = "/gpfs/exfel/d/cal/exchange/lpdMF_00.h5" # the geometry file to use, MAR 2018
 beam_center_offset =  [1.5, 1] # offset from the beam center, MAR 2018
 sequences_per_node = 1 # sequence files to process per node

 dont_mark_non_lin_region = False # do not mark non-linear regions in BP map
 linear_between_high_gain = [-5000, 2500]  # region in which high gain is considered linear, in ADU
 linear_between_med_gain = [300, 3000]  # region in which medium gain is considered linear, in ADU
 linear_between_low_gain = [300, 3000]  # region in which low gain is considered linear, in ADU
 nlc_version = 2 # version of NLC to use

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import sys
 import warnings
 from datetime import datetime

 warnings.filterwarnings('ignore')

 max_cells = mem_cells

 if sequences[0] == -1:
    sequences = None

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(16))
    karabo_da = ['LPD{:02d}'.format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
 print("Process modules: ",
      ', '.join([f"Q{x // 4 + 1}M{x % 4 + 1}" for x in modules]))

 do_rel_gain = not no_relative_gain
 do_ff = not no_flat_fields
 index_v = rawversion

 #do_ff = False
 #relgain_store = "/gpfs/exfel/d/proc/FXE/201830/p900020/calibration/lpd_ci_store_{}_16_5pf.h5"
 print("Applying FF corrections: {}".format(do_ff))

 # make sure a cluster is running with ipcluster start --n=32, give it a while to start
 import os

 import h5py
 import matplotlib
 import numpy as np

 matplotlib.use("agg")
 from collections import OrderedDict
 from datetime import datetime

 import matplotlib.pyplot as plt
 from cal_tools.enums import BadPixels
 from cal_tools.lpdlib import LpdCorrections
 from cal_tools.plotting import create_constant_overview, plot_badpix_3d, show_overview
 from cal_tools.tools import (
    gain_map_files,
    get_constant_from_db,
    get_dir_creation_date,
    get_notebook_name,
    map_modules_from_folder,
    parse_runs,
    run_prop_seq_from_path,
 )
 from iCalibrationDB import Conditions, ConstantMetaData, Constants, Detectors, Versions
 from ipyparallel import Client

 print("Connecting to profile {}".format(cluster_profile))
 view = Client(profile=cluster_profile)[:]
 view.use_dill()
 gains = np.arange(3)
 cells = np.arange(max_cells)

 CHUNK_SIZE = 512
 MAX_PAR = 32

 if not os.path.exists(out_folder):
    os.makedirs(out_folder)
 elif not overwrite:
    raise AttributeError("Output path exists! Exiting")

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
 else:
    creation_time = datetime.now()

 print("Using {} as creation time".format(creation_time.isoformat()))

 _, proposal, seq = run_prop_seq_from_path(in_folder)
 instrument = karabo_id.split("_")[0]

 mark_non_lin_region = not dont_mark_non_lin_region
 linear_between = [linear_between_high_gain, linear_between_med_gain, linear_between_low_gain]

 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)
 mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf
 MAX_PAR = min(MAX_PAR, total_sequences)
 ```

 %% Cell type:markdown id: tags:

 ## Processed Files ##

 %% Cell type:code id: tags:

 ``` python
 import copy

 import tabulate
 from IPython.display import HTML, Latex, Markdown, display

 print("Processing a total of {} sequence files in chunks of {}".format(total_sequences, MAX_PAR))
 table = []
 mfc = copy.copy(mapped_files)
 ti = 0
 for k, files in mfc.items():
    i = 0
    while not files.empty():
        f = files.get()
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["#", "module", "# module", "file"])))
 # restore the queue
 mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)
 mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf
 ```

 %% Cell type:code id: tags:

 ``` python
 import copy
 from functools import partial


 def correct_module(max_cells, do_ff, index_v, CHUNK_SIZE, total_sequences, sequences_qm,
                   bins_gain_vs_signal, bins_signal_low_range, bins_signal_high_range, max_pulses,
                   dbparms, fileparms, nodb, no_non_linear_corrections, mark_non_lin_region, linear_between,
                   nlc_version, h5path, h5path_idx, karabo_id, inp):
    import copy
    import os
    import re
    import socket
    from datetime import datetime

    import h5py
    import numpy as np
    from cal_tools.enums import BadPixels
    from cal_tools.lpdlib import LpdCorrections

    hists_signal_low = None
    hists_signal_high = None
    hists_gain_vs_signal = None
    low_edges = None
    high_edges = None
    signal_edges = None
    when = None
    qm = None
    err = None
    try:
        start = datetime.now()
        success = True
        reason = ""
        filename, filename_out, channel, karabo_da, qm = inp

        infile = h5py.File(filename, "r", driver="core")
        outfile = h5py.File(filename_out, "w")

        # LPD correction requires path without the leading "/""
        if h5path[0] == '/':
            h5path = h5path[1:]
        if h5path_idx[0] == '/':
            h5path_idx = h5path_idx[1:]

        try:
            lpd_corr = LpdCorrections(infile, outfile, max_cells, channel, max_pulses,
                                      bins_gain_vs_signal, bins_signal_low_range,
                                      bins_signal_high_range, do_ff=do_ff, raw_fmt_version=index_v,
                                      correct_non_linear=(not no_non_linear_corrections),
                                      mark_non_lin_region=mark_non_lin_region, linear_between=linear_between,
                                      nlc_version=nlc_version,
                                      h5_data_path=h5path, h5_index_path=h5path_idx)


            try:
                lpd_corr.get_valid_image_idx()
            except IOError:
                return
            if not nodb:
                when = lpd_corr.initialize_from_db(dbparms, karabo_id, karabo_da, only_dark=(fileparms != ""))
                print(when)
            if fileparms != "":
                lpd_corr.initialize_from_file(fileparms, qm, with_dark=nodb)
            print("Initialized constants")

            for irange in lpd_corr.get_iteration_range():
                lpd_corr.correct_lpd(irange)

            print("All interations finished")
            hists, edges = lpd_corr.get_histograms()
            hists_signal_low, hists_signal_high, hists_gain_vs_signal = hists
            low_edges, high_edges, signal_edges = edges
            outfile.close()
            infile.close()
            print("Closed files")
        except Exception as e1:
            err = e1
            outfile.close()
            infile.close()

    except Exception as e:
        print(e)
        success = False
        reason = "Error"
        err = e

    return (hists_signal_low, hists_signal_high, hists_gain_vs_signal, low_edges,
            high_edges, signal_edges, when, qm, err)

 done = False
 first_files = []
 inp = []
 left = total_sequences

 bins_gain_vs_signal = (100, 4)
 bins_signal_low_range = 100
 bins_signal_high_range = 100
 hists_signal_low =  np.zeros((bins_signal_low_range, max_pulses), np.float64)
 hists_signal_high =  np.zeros((bins_signal_high_range, max_pulses), np.float64)
 hists_gain_vs_signal =  np.zeros((bins_gain_vs_signal), np.float64)
 low_edges, high_edges, signal_edges = None, None, None
 dbparms = cal_db_interface, creation_time, max_cells_db, capacitor, bias_voltage, photon_energy, cal_db_timeout
 fileparms = calfile

 whens = {}

 while not done:

    dones = []
    first = True
    for i, k_da in zip(modules, karabo_da):
        qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
        if qm in mapped_files and not mapped_files[qm].empty():
            fname_in = str(mapped_files[qm].get())
            dones.append(mapped_files[qm].empty())
        else:
            print("Skipping {}".format(qm))
            first_files.append((None, None))
            continue
        fout = os.path.abspath("{}/{}".format(out_folder, (os.path.split(fname_in)[-1]).replace("RAW", "CORR")))
        if first:
            first_files.append((fname_in, fout))
        inp.append((fname_in, fout, i, k_da, qm))
    first = False
    if len(inp) >= min(MAX_PAR, left):
        print("Running {} tasks parallel".format(len(inp)))
        p = partial(correct_module, max_cells, do_ff, index_v, CHUNK_SIZE, total_sequences, sequences_qm,
                   bins_gain_vs_signal, bins_signal_low_range, bins_signal_high_range, max_pulses, dbparms,
                   fileparms, nodb, no_non_linear_corrections, mark_non_lin_region, linear_between, nlc_version,
                   h5path, h5path_idx, karabo_id)

        r = view.map_sync(p, inp)
        #r = list(map(p, inp))
        inp = []
        left -= MAX_PAR

        for rr in r:
            if rr is not None:
                hl, hh, hg, low_edges, high_edges, signal_edges, when, qm, err = rr
                whens[qm] = {}
                whens[qm]['when'] = when
                whens[qm]['err'] = err
                if hl is not None:  # any one being None will also make the others None
                    hists_signal_low += hl.astype(np.float64)
                    hists_signal_high += hh.astype(np.float64)
                    hists_gain_vs_signal += hg.astype(np.float64)


    done = all(dones)
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Offset was injected on: ")
 for k, v in whens.items():
    if v['err'] is None:
        print("{}: {}".format(k, v['when']))
    else:
        print("{}: {}: {}".format(k, v['when'], v['err']))
 ```

 %% Cell type:code id: tags:

 ``` python
 import matplotlib.pyplot as plt
 import numpy as np
 from matplotlib import cm
 from matplotlib.ticker import FormatStrFormatter, LinearLocator
 from mpl_toolkits.mplot3d import Axes3D

 %matplotlib inline
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10,10))
    ax = fig.gca(projection='3d')

    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)

    Z = data.T

    # Plot the surface.
    surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,
                           linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")
 ```

 %% Cell type:markdown id: tags:

 ## Signal vs. Analogue Gain ##

 The following plot shows plots signal vs. gain for the first 1280 images.

 %% Cell type:code id: tags:

 ``` python
 do_3d_plot(hists_gain_vs_signal, signal_edges, "Signal (ADU)", "Gain Bit Value")
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis):
    from matplotlib.colors import LogNorm
    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1,:], extent=extent, aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(data)))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")


 do_2d_plot(hists_gain_vs_signal, signal_edges, "Signal (ADU)", "Gain Bit Value")
 ```

 %% Cell type:markdown id: tags:

 ## Mean Intensity per Pulse ##

 The following plots show the mean signal for each pulse in a detailed and expanded intensity region.

 %% Cell type:code id: tags:

 ``` python
 do_3d_plot(hists_signal_low, low_edges, "Signal (ADU)", "Pulse id")
 do_2d_plot(hists_signal_low, low_edges, "Signal (ADU)", "Pulse id")
 do_3d_plot(hists_signal_high, high_edges, "Signal (ADU)", "Pulse id")
 do_2d_plot(hists_signal_high, high_edges, "Signal (ADU)", "Pulse id")
 ```

 %% Cell type:markdown id: tags:

 ## Data Preview ##

 In the following geometry information from the LPD geometry file is applied. Quadrants are positioned to last known position. No bad pixel masking has been performed.

 %% Cell type:code id: tags:

 ``` python
 # geometry information
 dc = beam_center_offset
 #d_quads = [(-14+dc[0],-300+dc[1]),(10+dc[0],-9+dc[1]),(-256+dc[0],15+dc[1]),(-280+dc[0],-276+dc[1])] # MAR 2018
 d_quads = [(-19+dc[0],-300+dc[1]),(10+dc[0],-9+dc[1]),(-256+dc[0],19+dc[1]),(-285+dc[0],-271+dc[1])]  # MAY 2019

 import cal_tools.metrology as metro

-in_files = "{}/CORR*LPD*S{:05d}*.h5".format(out_folder, sequences[0] if sequences else 0)
+out_files = "{}/CORR*LPD*S{:05d}*.h5".format(out_folder, sequences[0] if sequences else 0)
 datapath = "{}/image/data".format(h5path)
-print("Preview is from {}".format(in_files))
+print("Preview is from {}".format(out_files))
 ```

 %% Cell type:code id: tags:

 ``` python
-posarr = metro.positionFileList(in_files, datapath, geometry_file, d_quads, nImages = 10)
+posarr = metro.positionFileList(out_files, datapath, geometry_file, d_quads, nImages = 10)
 maskpath = "{}/image/mask".format(h5path)
-maskedarr = metro.positionFileList(in_files, maskpath, geometry_file, d_quads, nImages = 10)
+maskedarr = metro.positionFileList(out_files, maskpath, geometry_file, d_quads, nImages = 10)
 ```

 %% Cell type:code id: tags:

 ``` python
 # convert the Carthesian coordinates of the detector to polar coordinates
 def mod_cart_to_pol(d, dx, dy, filter_by_val=True):
    """ Convert carthesian coords to polar coords
    """
    cx, cy = d.shape
    x = np.arange(cx)+dx
    y = np.arange(cy)+dy
    x = np.repeat(x[:,None], cy, axis=1)
    y = np.repeat(y[None,:], cx, axis=0)

    rho = np.sqrt(x**2 + y**2).flatten()
    phi = np.arctan2(y, x).flatten()
    flat = d.flatten()

    # we also perform a bit of filtering here

    if filter_by_val:
        good = np.isfinite(flat) & (flat > 0) & (flat < 1e5)

        return rho[good], phi[good], flat[good], good

    return rho, phi, flat, None
 ```

 %% Cell type:markdown id: tags:

 ### Single Short Preview ###

 A single shot image from cell 5 of the first train

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(15,15))
 ax = fig.add_subplot(111)
 parr = posarr[5,...]
 im=ax.imshow((parr),  vmin=0, vmax=max(10*np.median(parr[parr > 0]), 100))
 cb = fig.colorbar(im)
 cb.set_label("Intensity (ADU")
 ```

 %% Cell type:markdown id: tags:

 ### Pixel Mean Preview ###

 The per pixel mean value of the first 100 images

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(15,15))
 ax = fig.add_subplot(111)
 parr = np.mean(posarr, axis=0)
 im=ax.imshow((parr),  vmin=0, vmax=max(10*np.median(parr[parr > 0]), 100))
 cb = fig.colorbar(im)
 cb.set_label("Intensity (ADU")
 ```

 %% Cell type:markdown id: tags:

 ### Radial Profile ###

 The simple azimuthley integrated profile plotted assumes the beam centered in the hole, it is thus not always fully accurate.

 %% Cell type:code id: tags:

 ``` python
 # Here we create histograms of the data in a polar coordinate system.
 # We use numpys hist2d function, giving it the polar coordinates of
 # each pixels, and weighing that coordinate with the pixel's value.
 # We obtain a histogram for each module, according to its position defined
 # in the coord_list.
 from scipy.stats import binned_statistic_2d

 hs = []
 bins_nums = []
 edges = []

 goods = []
 bins = 5000

 dx, dy = -750, -750

 rho, phi, weights, good = mod_cart_to_pol(np.mean(posarr, axis=0), dy, dx, False)
 #h, phi_edges, rho_edges = np.histogram2d(phi, rho, bins=(1000,1000),
 #                                         range=((-np.pi, np.pi), (0, 1000)),
 #                                         weights=weights)
 h, phi_edges, rho_edges, bns = binned_statistic_2d(phi, rho, weights, bins=(bins,bins),
                                                       range=((-np.pi, np.pi), (0, 1000)),
                                                       statistic = "sum")
 bins_nums.append(bns)
 hs.append(h)
 edges.append((phi_edges, rho_edges))
 goods.append(good)
 ```

 %% Cell type:code id: tags:

 ``` python
 x = np.arange(bins)/bins*1000*500e-6
 y = np.arange(bins)/bins*2.
 ds = np.array(hs).sum(axis=0)
 ```

 %% Cell type:code id: tags:

 ``` python
 # With appropriate coordinates given, plotting a profile along the
 # vertical axis should give us the positions of the diffraction peaks,
 # Here still as distances on the detector plane. With knowledge of the
 # detector to sample distance, these could then be converted in
 # reciprocal coordinates.
 ds[ds == 0] = np.nan
 profile = np.nanmedian(ds, axis=0)
 fig = plt.figure(figsize=(15,5))
 ax = fig.add_subplot(111)
 p = ax.plot(x, profile)
 l =ax.set_ylabel("Median intensity (arb. units)")
 l = ax.set_xlabel("Radial distance (arb. units)")
 ```

 %% Cell type:markdown id: tags:

 ## Maxium Gain Value Reached ##

 The following plot shows the maximum gain value reached. It can be used as an indication of whether the detector went into saturation.

 %% Cell type:code id: tags:

 ``` python
-gainpath = "{}/gain".format(h5path)
-posarr = metro.positionFileList(in_files, gainpath, geometry_file, d_quads, nImages = 100)
+gainpath = "{}/image/gain".format(h5path)
+posarr = metro.positionFileList(out_files, gainpath, geometry_file, d_quads, nImages = 100)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(15,15))
 ax = fig.add_subplot(111)
 parr = np.max(posarr, axis=0)
 im=ax.imshow((parr),  vmin=0, vmax=3)
 cb = fig.colorbar(im)
 cb.set_label("Intensity (ADU")
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

--- a/notebooks/ePix100/Correction_ePix100_NBC.ipynb
+++ b/notebooks/ePix100/Correction_ePix100_NBC.ipynb
 %% Cell type:markdown id: tags:

 # ePIX Data Correction

 Authors: Q. Tian S. Hauf M. Cascella, Version 1.0

 The following notebook provides Offset correction of images acquired with the ePix100 detector.

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB"  # ipcluster profile to use
-in_folder = "/gpfs/exfel/exp/CALLAB/202031/p900113/raw"  # input folder, required
+in_folder = "/gpfs/exfel/exp/MID/202121/p002929/raw"  # input folder, required
 out_folder = ""  # output folder, required
 sequences = [-1]  # sequences to correct, set to -1 for all, range allowed
-run = 9988  # which run to read data from, required
+run = 126  # which run to read data from, required

 karabo_id = "MID_EXP_EPIX-1"  # karabo karabo_id
 karabo_da = "EPIX01"  # data aggregators
 db_module = "ePix100_M15"  # module id in the database
 receiver_id = "RECEIVER"  # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5'  # the template to use to access data
 h5path = '/INSTRUMENT/{}/DET/{}:daqOutput/data/image'  # path in the HDF5 file to images
 h5path_t = '/INSTRUMENT/{}/DET/{}:daqOutput/data/backTemp'  # path to find temperature at
 h5path_cntrl = '/CONTROL/{}/DET'  # path to control data

 use_dir_creation_date = True  # date constants injected before directory creation time
 cal_db_interface = "tcp://max-exfl016:8015#8025"  # calibration DB interface to use
 cal_db_timeout = 300000  # timeout on caldb requests

 cpuCores = 4  # Specifies the number of running cpu cores
 chunk_size_idim = 1  # H5 chunking size of output data
 overwrite = True  # overwrite output folder
 limit_images = 0  # process only first N images, 0 - process all
 sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel

 bias_voltage = 200  # bias voltage
 in_vacuum = False  # detector operated in vacuum
 fix_temperature = 290.  # fix temperature to this value
 gain_photon_energy = 9.0  # Photon energy used for gain calibration
-photon_energy = 8.0  # Photon energy to calibrate in number of photons, 0 for calibration in keV
+photon_energy = 0.  # Photon energy to calibrate in number of photons, 0 for calibration in keV

-relative_gain = False  # Apply relative gain correction.
+pattern_classification = True  # do clustering.
+relative_gain = True  # Apply relative gain correction.
+absolute_gain = True  # Apply absolute gain correction (implies relative gain).
 common_mode = True  # Apply common mode correction.
 cm_min_frac = 0.25 # No CM correction is performed if after masking the ratio of good pixels falls below this
 cm_noise_sigma = 5. # CM correction noise standard deviation

 split_evt_primary_threshold = 7.  # primary threshold for split event correction
 split_evt_secondary_threshold = 5.  # secondary threshold for split event correction
 split_evt_mip_threshold = 1000.  # minimum ionizing particle threshold


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import tabulate
 import warnings

 import h5py
 import numpy as np
+import matplotlib.pyplot as plt
 from IPython.display import Latex, display
 from pathlib import Path

 import XFELDetAna.xfelprofiler as xprof
 from XFELDetAna import xfelpyanatools as xana
 from XFELDetAna import xfelpycaltools as xcal
 from XFELDetAna.plotting.util import prettyPlotting
 from XFELDetAna.util import env
 from cal_tools.tools import (
    get_constant_from_db,
    get_dir_creation_date,
 )
 from iCalibrationDB import (
    Conditions,
    Constants,
 )

 warnings.filterwarnings('ignore')

 prettyPlotting = True

 profiler = xprof.Profiler()
 profiler.disable()
 env.iprofile = cluster_profile

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
-# TODO: expose to first cell after fixing clustering.
-pattern_classification = False  # do clustering.
+if absolute_gain :
+    relative_gain = True
+```
+
+%% Cell type:code id: tags:

+``` python
 h5path = h5path.format(karabo_id, receiver_id)
 h5path_t = h5path_t.format(karabo_id, receiver_id)
 h5path_cntrl = h5path_cntrl.format(karabo_id)
 plot_unit = 'ADU'
-
-if relative_gain:
-    plot_unit = 'keV'
-    if photon_energy > 0:
-        plot_unit = '$\gamma$'
 ```

 %% Cell type:code id: tags:

 ``` python
 x = 708  # rows of the ePix100
 y = 768  # columns of the ePix100

 in_folder = Path(in_folder)
 ped_dir = in_folder / f"r{run:04d}"
 fp_name = path_template.format(run, karabo_da)

 print(f"Reading from: {ped_dir / fp_name}")
 print(f"Run is: {run}")
 print(f"HDF5 path: {h5path}")
 print(f"Data is output to: {out_folder}")

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
 if creation_time:
    print(f"Using {creation_time.isoformat()} as creation time")
 ```

 %% Cell type:code id: tags:

 ``` python
 sensorSize = [x, y]
 chunkSize = 100  # Number of images to read per chunk
 # Sensor area will be analysed according to blocksize
 blockSize = [sensorSize[0]//2, sensorSize[1]//2]
 xcal.defaultBlockSize = blockSize
 memoryCells = 1  # ePIX has no memory cells
 run_parallel = True

 # Read slow data from the first available sequence file.
 filename = ped_dir / fp_name.format(
    sequences[0] if sequences[0] != -1 else 0)
 with h5py.File(filename, 'r') as f:
    integration_time = int(
        f[f"{h5path_cntrl}/CONTROL/expTime/value"][0])
    temperature = np.mean(f[h5path_t]) / 100.
    temperature_k = temperature + 273.15
    if fix_temperature != 0:
        temperature_k = fix_temperature
        print("Temperature is fixed!")
    print(f"Bias voltage is {bias_voltage} V")
    print(f"Detector integration time is set to {integration_time}")
    print(
        f"Mean temperature was {temperature:0.2f} °C "
        f"/ {temperature_k:0.2f} K at beginning of run"
    )
    print(f"Operated in vacuum: {in_vacuum} ")

 out_folder = Path(out_folder)
 if out_folder.is_dir() and not overwrite:
    raise AttributeError("Output path exists! Exiting")
 out_folder.mkdir(parents=True, exist_ok=True)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Glob the right *.h5 fast data files.
 seq_files = sorted(ped_dir.glob(f"*{karabo_da}*.h5"))

 # If a set of sequences requested to correct,
 # adapt seq_files list.
 if sequences != [-1]:
    seq_files = [f for f in seq_files
                 if any(
                     f.match(f"*-S{s:05d}.h5") for s in sequences)]

 print(f"Processing a total of {len(seq_files)} sequence files")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Table of sequence files to process
 table = [(k, f) for k, f in enumerate(seq_files)]

 if len(table):
    md = display(Latex(tabulate.tabulate(
        table,
        tablefmt='latex',
        headers=["#", "file"]
    )))
 ```

 %% Cell type:markdown id: tags:

 As a first step, dark maps have to be loaded.

 %% Cell type:code id: tags:

 ``` python
 temp_limits = 5.

 cond_dict = {
    "bias_voltage": bias_voltage,
    "integration_time": integration_time,
    "temperature": temperature_k,
    "in_vacuum": in_vacuum,
 }

 dark_condition = Conditions.Dark.ePix100(**cond_dict)

 # update conditions with illuminated conditins.
 cond_dict.update({
        "photon_energy": gain_photon_energy
    })

 illum_condition = Conditions.Illuminated.ePix100(**cond_dict)

 const_cond = {
    "Offset": dark_condition,
    "Noise": dark_condition,
    "RelativeGain": illum_condition,
 }

 const_data = dict()

 for cname, condition in const_cond.items():
    if cname == "RelativeGain" and not relative_gain:
        continue
    # TODO: Fix this logic.
    for parm in condition.parameters:
        if parm.name == "Sensor Temperature":
            parm.lower_deviation = temp_limits
            parm.upper_deviation = temp_limits

    const_data[cname] = get_constant_from_db(
        karabo_id=karabo_id,
        karabo_da=karabo_da,
        constant=getattr(Constants.ePix100, cname)(),
        condition=condition,
        empty_constant=None,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time,
        print_once=2,
        timeout=cal_db_timeout
    )

 if relative_gain and const_data["RelativeGain"] is None:
    print(
        "WARNING: RelativeGain map is requested, but not found./n"
        "No gain correction will be applied"
    )
    relative_gain = False
    plot_unit = 'ADU'
 ```

 %% Cell type:code id: tags:

 ``` python
-# ************************Calculators******************** #
+hrange = np.array([-50, 1000])
+nbins = hrange[1] - hrange[0]
+hscale = 1
+
+commonModeBlockSize = [x//2, y//2]
+stats = True
+```
+
+%% Cell type:code id: tags:
+
+``` python
 offsetCorrection = xcal.OffsetCorrection(
    sensorSize,
    const_data["Offset"],
    nCells=memoryCells,
    cores=cpuCores,
    gains=None,
    blockSize=blockSize,
    parallel=run_parallel
 )

-if relative_gain:
-    gainCorrection = xcal.RelativeGainCorrection(
-        sensorSize,
-        1./const_data["RelativeGain"][..., None],
-        nCells=memoryCells,
-        parallel=run_parallel,
-        cores=cpuCores,
-        blockSize=blockSize,
-        gains=None,
-    )
-```
-
-%% Cell type:code id: tags:
-
-``` python
-# *****************Histogram Calculators****************** #
 histCalOffsetCor = xcal.HistogramCalculator(
    sensorSize,
-    bins=1050,
-    range=[-50, 1000],
+    bins=nbins,
+    range=hrange,
    parallel=run_parallel,
    nCells=memoryCells,
    cores=cpuCores,
    blockSize=blockSize
 )
 ```

-%% Cell type:markdown id: tags:
-
-Applying corrections
-
-%% Cell type:code id: tags:
-
-``` python
-histCalOffsetCor.debug()
-offsetCorrection.debug()
-if relative_gain:
-    gainCorrection.debug()
-```
-
 %% Cell type:code id: tags:

 ``` python
-# ************************Calculators******************** #
 if common_mode:
-    commonModeBlockSize = [x//2, y//2]
-    stats = True
+    histCalCMCor = xcal.HistogramCalculator(
+        sensorSize,
+        bins=nbins,
+        range=hrange,
+        parallel=run_parallel,
+        nCells=memoryCells,
+        cores=cpuCores,
+        blockSize=blockSize,
+    )

    cmCorrectionB = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='block',
        nCells=memoryCells,
        noiseMap=const_data['Noise'],
        runParallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
    cmCorrectionR = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='row',
        nCells=memoryCells,
        noiseMap=const_data['Noise'],
        runParallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
    cmCorrectionC = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='col',
        nCells=memoryCells,
        noiseMap=const_data['Noise'],
        runParallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
-    histCalCMCor = xcal.HistogramCalculator(
+```
+
+%% Cell type:code id: tags:
+
+``` python
+if relative_gain:
+    gain_cnst = np.median(const_data["RelativeGain"])
+    hscale = gain_cnst
+    plot_unit = 'keV'
+    if photon_energy > 0:
+        plot_unit = '$\gamma$'
+        hscale /= photon_energy
+
+    gainCorrection = xcal.RelativeGainCorrection(
        sensorSize,
-        bins=1050,
-        range=[-50, 1000],
-        parallel=run_parallel,
+        gain_cnst/const_data["RelativeGain"][..., None],
        nCells=memoryCells,
+        parallel=run_parallel,
        cores=cpuCores,
        blockSize=blockSize,
+        gains=None,
+    )
+
+    histCalRelGainCor = xcal.HistogramCalculator(
+        sensorSize,
+        bins=nbins,
+        range=hrange,
+        parallel=run_parallel,
+        nCells=memoryCells,
+        cores=cpuCores,
+        blockSize=blockSize
    )
+
+    if absolute_gain:
+        histCalAbsGainCor = xcal.HistogramCalculator(
+            sensorSize,
+            bins=nbins,
+            range=hrange*hscale,
+            parallel=run_parallel,
+            nCells=memoryCells,
+            cores=cpuCores,
+            blockSize=blockSize
+        )
 ```

 %% Cell type:code id: tags:

 ``` python
-if pattern_classification:
+if pattern_classification :
    patternClassifier = xcal.PatternClassifier(
        [x, y],
        const_data["Noise"],
        split_evt_primary_threshold,
        split_evt_secondary_threshold,
        split_evt_mip_threshold,
        tagFirstSingles=0,
        nCells=memoryCells,
        cores=cpuCores,
        allowElongated=False,
        blockSize=[x, y],
        runParallel=run_parallel,
    )

    histCalSECor = xcal.HistogramCalculator(
        sensorSize,
-        bins=1050,
-        range=[-50, 1000],
+        bins=nbins,
+        range=hrange,
        parallel=run_parallel,
        nCells=memoryCells,
        cores=cpuCores,
        blockSize=blockSize,
    )
+    histCalGainCorSingles = xcal.HistogramCalculator(
+        sensorSize,
+        bins=nbins,
+        range=hrange*hscale,
+        parallel=run_parallel,
+        nCells=memoryCells,
+        cores=cpuCores,
+        blockSize=blockSize
+    )
 ```

-%% Cell type:code id: tags:
-
-``` python
-if common_mode:
-    cmCorrectionB.debug()
-    cmCorrectionR.debug()
-    cmCorrectionC.debug()
-    histCalCMCor.debug()
+%% Cell type:markdown id: tags:

-if pattern_classification:
-    patternClassifier.debug()
-    histCalSECor.debug()
-```
+Applying corrections

 %% Cell type:code id: tags:

 ``` python
 def copy_and_sanitize_non_cal_data(
    infile: h5py,
    outfile: h5py,
    h5base: str
 ):
    """ Copy and sanitize data in `infile`
    that is not touched by `correctEPIX`. """

    if h5base.startswith("/"):
        h5base = h5base[1:]
    dont_copy = [h5base+"/pixels"]

    def visitor(k, item):
        if k not in dont_copy:
            if isinstance(item, h5py.Group):
                outfile.create_group(k)
            elif isinstance(item, h5py.Dataset):
                group = str(k).split("/")
                group = "/".join(group[:-1])
                infile.copy(k, outfile[group])

    infile.visititems(visitor)
 ```

 %% Cell type:code id: tags:

 ``` python
 for f in seq_files:
    data = None
    out_file = out_folder / f.name.replace("RAW", "CORR")
    with h5py.File(f, "r") as infile, h5py.File(out_file, "w") as ofile:  # noqa
        try:
            copy_and_sanitize_non_cal_data(infile, ofile, h5path)
            data = infile[h5path+"/pixels"][()]
            data = np.compress(
                np.any(data > 0, axis=(1, 2)), data, axis=0)
            if limit_images > 0:
                data = data[:limit_images, ...]

            oshape = data.shape
            data = np.moveaxis(data, 0, 2)
            ddset = ofile.create_dataset(
                h5path+"/pixels",
                oshape,
                chunks=(chunk_size_idim, oshape[1], oshape[2]),
                dtype=np.float32)

            # Offset correction.
            data = offsetCorrection.correct(data.astype(np.float32))
+            histCalOffsetCor.fill(data)

            # Common Mode correction.
            if common_mode:
                # Block CM
                data = cmCorrectionB.correct(data)
                # Row CM
                data = cmCorrectionR.correct(data)
                # COL CM
                data = cmCorrectionC.correct(data)
-
                histCalCMCor.fill(data)

            # relative gain correction.
            if relative_gain:
                data = gainCorrection.correct(data.astype(np.float32))
-                if photon_energy > 0:
-                    data /= photon_energy
+                histCalRelGainCor.fill(data)

-            histCalOffsetCor.fill(data)
            ddset[...] = np.moveaxis(data, 2, 0)

-            """The gain correction is currently applying
-            an absolute correction (not a relative correction
-            as the implied by the name);
-            it changes the scale (the unit of measurement)
-            of the data from ADU to either keV or n_of_photons.
-            But the pattern classification relies on comparing
-            data with the noise map, which is still in ADU.
-
-            The best solution is to do a relative gain
-            correction first and apply the global absolute
-            gain to the data at the end, after clustering.
-            """
-
-            # TODO: Fix conflict between pattern classification
-            # and gain corr.
            if pattern_classification:
                ddsetc = ofile.create_dataset(
                    h5path+"/pixels_classified",
                    oshape,
                    chunks=(chunk_size_idim, oshape[1], oshape[2]),
                    dtype=np.float32, compression="gzip")

                ddsetp = ofile.create_dataset(
                    h5path+"/patterns",
                    oshape,
                    chunks=(chunk_size_idim, oshape[1], oshape[2]),
                    dtype=np.int32, compression="gzip")

+                data_clu, patterns = patternClassifier.classify(data)

-                data, patterns = patternClassifier.classify(data)
-
-                data[data < (split_evt_primary_threshold*const_data["Noise"])] = 0  # noqa
-                ddsetc[...] = np.moveaxis(data, 2, 0)
+                data_clu[data_clu < (split_evt_primary_threshold*const_data["Noise"])] = 0  # noqa
+                ddsetc[...] = np.moveaxis(data_clu, 2, 0)
                ddsetp[...] = np.moveaxis(patterns, 2, 0)

-                data[patterns != 100] = np.nan
-                histCalSECor.fill(data)
+                data_clu[patterns != 100] = np.nan
+                histCalSECor.fill(data_clu)
+
+            # absolute gain correction
+            # changes data from ADU to keV (or n. of photons)
+            if absolute_gain:
+                data = data * gain_cnst
+                if photon_energy > 0:
+                    data /= photon_energy
+                histCalAbsGainCor.fill(data)
+
+                if pattern_classification:
+                    data_clu = data_clu *gain_cnst
+                    if photon_energy > 0:
+                        data_clu /= photon_energy
+                    ddsetc[...] = np.moveaxis(data_clu, 2, 0)
+                    histCalGainCorSingles.fill(data_clu)
+
        except Exception as e:
            print(f"ERROR applying common mode correction for {f}: {e}")
 ```

 %% Cell type:code id: tags:

 ``` python
 ho, eo, co, so = histCalOffsetCor.get()

 d = [{
    'x': co,
    'y': ho,
    'y_err': np.sqrt(ho[:]),
    'drawstyle': 'steps-mid',
    'errorstyle': 'bars',
    'errorcoarsing': 2,
    'label': 'Offset corr.'
 }]

 if common_mode:
    ho, eo, co, so = histCalCMCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'CM corr.'
    })

+if relative_gain :
+    ho, eo, co, so = histCalRelGainCor.get()
+    d.append({
+        'x': co,
+        'y': ho,
+        'y_err': np.sqrt(ho[:]),
+        'drawstyle': 'steps-mid',
+        'errorstyle': 'bars',
+        'errorcoarsing': 2,
+        'label': 'Relative gain corr.'
+    })
+
+
 if pattern_classification:
    ho, eo, co, so = histCalSECor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
-        'label': 'Single split events'
+        'label': 'Isolated photons (singles)'
    })

 fig = xana.simplePlot(
-    d, aspect=1, x_label=f'Energy({plot_unit})',
+    d, aspect=1, x_label=f'Energy (ADU)',
    y_label='Number of occurrences', figsize='2col',
    y_log=True, x_range=(-50, 500),
-    legend='top-center-frame-2col'
+    legend='top-center-frame-2col',
 )
+plt.title(f'run {run} - {karabo_da}')
+plt.grid()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+if absolute_gain :
+    d=[]
+    ho, eo, co, so = histCalAbsGainCor.get()
+    d.append({
+        'x': co,
+        'y': ho,
+        'y_err': np.sqrt(ho[:]),
+        'drawstyle': 'steps-mid',
+        'errorstyle': 'bars',
+        'errorcoarsing': 2,
+        'label': 'Absolute gain corr.'
+    })
+
+    if pattern_classification:
+        ho, eo, co, so = histCalGainCorSingles.get()
+        d.append({
+            'x': co,
+            'y': ho,
+            'y_err': np.sqrt(ho[:]),
+            'drawstyle': 'steps-mid',
+            'errorstyle': 'bars',
+            'errorcoarsing': 2,
+            'label': 'Isolated photons (singles)'
+        })
+
+    fig = xana.simplePlot(
+        d, aspect=1, x_label=f'Energy ({plot_unit})',
+        y_label='Number of occurrences', figsize='2col',
+        y_log=True,
+        x_range=np.array((-50, 500))*hscale,
+        legend='top-center-frame-2col',
+    )
+    plt.grid()
+    plt.title(f'run {run} - {karabo_da}')
 ```

 %% Cell type:markdown id: tags:

 ## Mean Image of last Sequence ##

 %% Cell type:code id: tags:

 ``` python
 fig = xana.heatmapPlot(
    np.nanmedian(data, axis=2),
    x_label='Columns', y_label='Rows',
    lut_label=f'Signal ({plot_unit})',
    x_range=(0, y),
    y_range=(0, x),
    vmin=-50, vmax=50)
 ```

 %% Cell type:markdown id: tags:

 ## Single Shot of last Sequence ##

 %% Cell type:code id: tags:

 ``` python
 fig = xana.heatmapPlot(
    data[..., 0],
    x_label='Columns', y_label='Rows',
    lut_label=f'Signal ({plot_unit})',
    x_range=(0, y),
    y_range=(0, x),
    vmin=-50, vmax=50)
 ```

 %% Cell type:markdown id: tags:

 # ePIX Data Correction

 Authors: Q. Tian S. Hauf M. Cascella, Version 1.0

 The following notebook provides Offset correction of images acquired with the ePix100 detector.

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB"  # ipcluster profile to use
-in_folder = "/gpfs/exfel/exp/CALLAB/202031/p900113/raw"  # input folder, required
+in_folder = "/gpfs/exfel/exp/MID/202121/p002929/raw"  # input folder, required
 out_folder = ""  # output folder, required
 sequences = [-1]  # sequences to correct, set to -1 for all, range allowed
-run = 9988  # which run to read data from, required
+run = 126  # which run to read data from, required

 karabo_id = "MID_EXP_EPIX-1"  # karabo karabo_id
 karabo_da = "EPIX01"  # data aggregators
 db_module = "ePix100_M15"  # module id in the database
 receiver_id = "RECEIVER"  # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5'  # the template to use to access data
 h5path = '/INSTRUMENT/{}/DET/{}:daqOutput/data/image'  # path in the HDF5 file to images
 h5path_t = '/INSTRUMENT/{}/DET/{}:daqOutput/data/backTemp'  # path to find temperature at
 h5path_cntrl = '/CONTROL/{}/DET'  # path to control data

 use_dir_creation_date = True  # date constants injected before directory creation time
 cal_db_interface = "tcp://max-exfl016:8015#8025"  # calibration DB interface to use
 cal_db_timeout = 300000  # timeout on caldb requests

 cpuCores = 4  # Specifies the number of running cpu cores
 chunk_size_idim = 1  # H5 chunking size of output data
 overwrite = True  # overwrite output folder
 limit_images = 0  # process only first N images, 0 - process all
 sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel

 bias_voltage = 200  # bias voltage
 in_vacuum = False  # detector operated in vacuum
 fix_temperature = 290.  # fix temperature to this value
 gain_photon_energy = 9.0  # Photon energy used for gain calibration
-photon_energy = 8.0  # Photon energy to calibrate in number of photons, 0 for calibration in keV
+photon_energy = 0.  # Photon energy to calibrate in number of photons, 0 for calibration in keV

-relative_gain = False  # Apply relative gain correction.
+pattern_classification = True  # do clustering.
+relative_gain = True  # Apply relative gain correction.
+absolute_gain = True  # Apply absolute gain correction (implies relative gain).
 common_mode = True  # Apply common mode correction.
 cm_min_frac = 0.25 # No CM correction is performed if after masking the ratio of good pixels falls below this
 cm_noise_sigma = 5. # CM correction noise standard deviation

 split_evt_primary_threshold = 7.  # primary threshold for split event correction
 split_evt_secondary_threshold = 5.  # secondary threshold for split event correction
 split_evt_mip_threshold = 1000.  # minimum ionizing particle threshold


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import tabulate
 import warnings

 import h5py
 import numpy as np
+import matplotlib.pyplot as plt
 from IPython.display import Latex, display
 from pathlib import Path

 import XFELDetAna.xfelprofiler as xprof
 from XFELDetAna import xfelpyanatools as xana
 from XFELDetAna import xfelpycaltools as xcal
 from XFELDetAna.plotting.util import prettyPlotting
 from XFELDetAna.util import env
 from cal_tools.tools import (
    get_constant_from_db,
    get_dir_creation_date,
 )
 from iCalibrationDB import (
    Conditions,
    Constants,
 )

 warnings.filterwarnings('ignore')

 prettyPlotting = True

 profiler = xprof.Profiler()
 profiler.disable()
 env.iprofile = cluster_profile

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
-# TODO: expose to first cell after fixing clustering.
-pattern_classification = False  # do clustering.
+if absolute_gain :
+    relative_gain = True
+```
+
+%% Cell type:code id: tags:

+``` python
 h5path = h5path.format(karabo_id, receiver_id)
 h5path_t = h5path_t.format(karabo_id, receiver_id)
 h5path_cntrl = h5path_cntrl.format(karabo_id)
 plot_unit = 'ADU'
-
-if relative_gain:
-    plot_unit = 'keV'
-    if photon_energy > 0:
-        plot_unit = '$\gamma$'
 ```

 %% Cell type:code id: tags:

 ``` python
 x = 708  # rows of the ePix100
 y = 768  # columns of the ePix100

 in_folder = Path(in_folder)
 ped_dir = in_folder / f"r{run:04d}"
 fp_name = path_template.format(run, karabo_da)

 print(f"Reading from: {ped_dir / fp_name}")
 print(f"Run is: {run}")
 print(f"HDF5 path: {h5path}")
 print(f"Data is output to: {out_folder}")

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
 if creation_time:
    print(f"Using {creation_time.isoformat()} as creation time")
 ```

 %% Cell type:code id: tags:

 ``` python
 sensorSize = [x, y]
 chunkSize = 100  # Number of images to read per chunk
 # Sensor area will be analysed according to blocksize
 blockSize = [sensorSize[0]//2, sensorSize[1]//2]
 xcal.defaultBlockSize = blockSize
 memoryCells = 1  # ePIX has no memory cells
 run_parallel = True

 # Read slow data from the first available sequence file.
 filename = ped_dir / fp_name.format(
    sequences[0] if sequences[0] != -1 else 0)
 with h5py.File(filename, 'r') as f:
    integration_time = int(
        f[f"{h5path_cntrl}/CONTROL/expTime/value"][0])
    temperature = np.mean(f[h5path_t]) / 100.
    temperature_k = temperature + 273.15
    if fix_temperature != 0:
        temperature_k = fix_temperature
        print("Temperature is fixed!")
    print(f"Bias voltage is {bias_voltage} V")
    print(f"Detector integration time is set to {integration_time}")
    print(
        f"Mean temperature was {temperature:0.2f} °C "
        f"/ {temperature_k:0.2f} K at beginning of run"
    )
    print(f"Operated in vacuum: {in_vacuum} ")

 out_folder = Path(out_folder)
 if out_folder.is_dir() and not overwrite:
    raise AttributeError("Output path exists! Exiting")
 out_folder.mkdir(parents=True, exist_ok=True)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Glob the right *.h5 fast data files.
 seq_files = sorted(ped_dir.glob(f"*{karabo_da}*.h5"))

 # If a set of sequences requested to correct,
 # adapt seq_files list.
 if sequences != [-1]:
    seq_files = [f for f in seq_files
                 if any(
                     f.match(f"*-S{s:05d}.h5") for s in sequences)]

 print(f"Processing a total of {len(seq_files)} sequence files")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Table of sequence files to process
 table = [(k, f) for k, f in enumerate(seq_files)]

 if len(table):
    md = display(Latex(tabulate.tabulate(
        table,
        tablefmt='latex',
        headers=["#", "file"]
    )))
 ```

 %% Cell type:markdown id: tags:

 As a first step, dark maps have to be loaded.

 %% Cell type:code id: tags:

 ``` python
 temp_limits = 5.

 cond_dict = {
    "bias_voltage": bias_voltage,
    "integration_time": integration_time,
    "temperature": temperature_k,
    "in_vacuum": in_vacuum,
 }

 dark_condition = Conditions.Dark.ePix100(**cond_dict)

 # update conditions with illuminated conditins.
 cond_dict.update({
        "photon_energy": gain_photon_energy
    })

 illum_condition = Conditions.Illuminated.ePix100(**cond_dict)

 const_cond = {
    "Offset": dark_condition,
    "Noise": dark_condition,
    "RelativeGain": illum_condition,
 }

 const_data = dict()

 for cname, condition in const_cond.items():
    if cname == "RelativeGain" and not relative_gain:
        continue
    # TODO: Fix this logic.
    for parm in condition.parameters:
        if parm.name == "Sensor Temperature":
            parm.lower_deviation = temp_limits
            parm.upper_deviation = temp_limits

    const_data[cname] = get_constant_from_db(
        karabo_id=karabo_id,
        karabo_da=karabo_da,
        constant=getattr(Constants.ePix100, cname)(),
        condition=condition,
        empty_constant=None,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time,
        print_once=2,
        timeout=cal_db_timeout
    )

 if relative_gain and const_data["RelativeGain"] is None:
    print(
        "WARNING: RelativeGain map is requested, but not found./n"
        "No gain correction will be applied"
    )
    relative_gain = False
    plot_unit = 'ADU'
 ```

 %% Cell type:code id: tags:

 ``` python
-# ************************Calculators******************** #
+hrange = np.array([-50, 1000])
+nbins = hrange[1] - hrange[0]
+hscale = 1
+
+commonModeBlockSize = [x//2, y//2]
+stats = True
+```
+
+%% Cell type:code id: tags:
+
+``` python
 offsetCorrection = xcal.OffsetCorrection(
    sensorSize,
    const_data["Offset"],
    nCells=memoryCells,
    cores=cpuCores,
    gains=None,
    blockSize=blockSize,
    parallel=run_parallel
 )

-if relative_gain:
-    gainCorrection = xcal.RelativeGainCorrection(
-        sensorSize,
-        1./const_data["RelativeGain"][..., None],
-        nCells=memoryCells,
-        parallel=run_parallel,
-        cores=cpuCores,
-        blockSize=blockSize,
-        gains=None,
-    )
-```
-
-%% Cell type:code id: tags:
-
-``` python
-# *****************Histogram Calculators****************** #
 histCalOffsetCor = xcal.HistogramCalculator(
    sensorSize,
-    bins=1050,
-    range=[-50, 1000],
+    bins=nbins,
+    range=hrange,
    parallel=run_parallel,
    nCells=memoryCells,
    cores=cpuCores,
    blockSize=blockSize
 )
 ```

-%% Cell type:markdown id: tags:
-
-Applying corrections
-
-%% Cell type:code id: tags:
-
-``` python
-histCalOffsetCor.debug()
-offsetCorrection.debug()
-if relative_gain:
-    gainCorrection.debug()
-```
-
 %% Cell type:code id: tags:

 ``` python
-# ************************Calculators******************** #
 if common_mode:
-    commonModeBlockSize = [x//2, y//2]
-    stats = True
+    histCalCMCor = xcal.HistogramCalculator(
+        sensorSize,
+        bins=nbins,
+        range=hrange,
+        parallel=run_parallel,
+        nCells=memoryCells,
+        cores=cpuCores,
+        blockSize=blockSize,
+    )

    cmCorrectionB = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='block',
        nCells=memoryCells,
        noiseMap=const_data['Noise'],
        runParallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
    cmCorrectionR = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='row',
        nCells=memoryCells,
        noiseMap=const_data['Noise'],
        runParallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
    cmCorrectionC = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='col',
        nCells=memoryCells,
        noiseMap=const_data['Noise'],
        runParallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
-    histCalCMCor = xcal.HistogramCalculator(
+```
+
+%% Cell type:code id: tags:
+
+``` python
+if relative_gain:
+    gain_cnst = np.median(const_data["RelativeGain"])
+    hscale = gain_cnst
+    plot_unit = 'keV'
+    if photon_energy > 0:
+        plot_unit = '$\gamma$'
+        hscale /= photon_energy
+
+    gainCorrection = xcal.RelativeGainCorrection(
        sensorSize,
-        bins=1050,
-        range=[-50, 1000],
-        parallel=run_parallel,
+        gain_cnst/const_data["RelativeGain"][..., None],
        nCells=memoryCells,
+        parallel=run_parallel,
        cores=cpuCores,
        blockSize=blockSize,
+        gains=None,
+    )
+
+    histCalRelGainCor = xcal.HistogramCalculator(
+        sensorSize,
+        bins=nbins,
+        range=hrange,
+        parallel=run_parallel,
+        nCells=memoryCells,
+        cores=cpuCores,
+        blockSize=blockSize
    )
+
+    if absolute_gain:
+        histCalAbsGainCor = xcal.HistogramCalculator(
+            sensorSize,
+            bins=nbins,
+            range=hrange*hscale,
+            parallel=run_parallel,
+            nCells=memoryCells,
+            cores=cpuCores,
+            blockSize=blockSize
+        )
 ```

 %% Cell type:code id: tags:

 ``` python
-if pattern_classification:
+if pattern_classification :
    patternClassifier = xcal.PatternClassifier(
        [x, y],
        const_data["Noise"],
        split_evt_primary_threshold,
        split_evt_secondary_threshold,
        split_evt_mip_threshold,
        tagFirstSingles=0,
        nCells=memoryCells,
        cores=cpuCores,
        allowElongated=False,
        blockSize=[x, y],
        runParallel=run_parallel,
    )

    histCalSECor = xcal.HistogramCalculator(
        sensorSize,
-        bins=1050,
-        range=[-50, 1000],
+        bins=nbins,
+        range=hrange,
        parallel=run_parallel,
        nCells=memoryCells,
        cores=cpuCores,
        blockSize=blockSize,
    )
+    histCalGainCorSingles = xcal.HistogramCalculator(
+        sensorSize,
+        bins=nbins,
+        range=hrange*hscale,
+        parallel=run_parallel,
+        nCells=memoryCells,
+        cores=cpuCores,
+        blockSize=blockSize
+    )
 ```

-%% Cell type:code id: tags:
-
-``` python
-if common_mode:
-    cmCorrectionB.debug()
-    cmCorrectionR.debug()
-    cmCorrectionC.debug()
-    histCalCMCor.debug()
+%% Cell type:markdown id: tags:

-if pattern_classification:
-    patternClassifier.debug()
-    histCalSECor.debug()
-```
+Applying corrections

 %% Cell type:code id: tags:

 ``` python
 def copy_and_sanitize_non_cal_data(
    infile: h5py,
    outfile: h5py,
    h5base: str
 ):
    """ Copy and sanitize data in `infile`
    that is not touched by `correctEPIX`. """

    if h5base.startswith("/"):
        h5base = h5base[1:]
    dont_copy = [h5base+"/pixels"]

    def visitor(k, item):
        if k not in dont_copy:
            if isinstance(item, h5py.Group):
                outfile.create_group(k)
            elif isinstance(item, h5py.Dataset):
                group = str(k).split("/")
                group = "/".join(group[:-1])
                infile.copy(k, outfile[group])

    infile.visititems(visitor)
 ```

 %% Cell type:code id: tags:

 ``` python
 for f in seq_files:
    data = None
    out_file = out_folder / f.name.replace("RAW", "CORR")
    with h5py.File(f, "r") as infile, h5py.File(out_file, "w") as ofile:  # noqa
        try:
            copy_and_sanitize_non_cal_data(infile, ofile, h5path)
            data = infile[h5path+"/pixels"][()]
            data = np.compress(
                np.any(data > 0, axis=(1, 2)), data, axis=0)
            if limit_images > 0:
                data = data[:limit_images, ...]

            oshape = data.shape
            data = np.moveaxis(data, 0, 2)
            ddset = ofile.create_dataset(
                h5path+"/pixels",
                oshape,
                chunks=(chunk_size_idim, oshape[1], oshape[2]),
                dtype=np.float32)

            # Offset correction.
            data = offsetCorrection.correct(data.astype(np.float32))
+            histCalOffsetCor.fill(data)

            # Common Mode correction.
            if common_mode:
                # Block CM
                data = cmCorrectionB.correct(data)
                # Row CM
                data = cmCorrectionR.correct(data)
                # COL CM
                data = cmCorrectionC.correct(data)
-
                histCalCMCor.fill(data)

            # relative gain correction.
            if relative_gain:
                data = gainCorrection.correct(data.astype(np.float32))
-                if photon_energy > 0:
-                    data /= photon_energy
+                histCalRelGainCor.fill(data)

-            histCalOffsetCor.fill(data)
            ddset[...] = np.moveaxis(data, 2, 0)

-            """The gain correction is currently applying
-            an absolute correction (not a relative correction
-            as the implied by the name);
-            it changes the scale (the unit of measurement)
-            of the data from ADU to either keV or n_of_photons.
-            But the pattern classification relies on comparing
-            data with the noise map, which is still in ADU.
-
-            The best solution is to do a relative gain
-            correction first and apply the global absolute
-            gain to the data at the end, after clustering.
-            """
-
-            # TODO: Fix conflict between pattern classification
-            # and gain corr.
            if pattern_classification:
                ddsetc = ofile.create_dataset(
                    h5path+"/pixels_classified",
                    oshape,
                    chunks=(chunk_size_idim, oshape[1], oshape[2]),
                    dtype=np.float32, compression="gzip")

                ddsetp = ofile.create_dataset(
                    h5path+"/patterns",
                    oshape,
                    chunks=(chunk_size_idim, oshape[1], oshape[2]),
                    dtype=np.int32, compression="gzip")

+                data_clu, patterns = patternClassifier.classify(data)

-                data, patterns = patternClassifier.classify(data)
-
-                data[data < (split_evt_primary_threshold*const_data["Noise"])] = 0  # noqa
-                ddsetc[...] = np.moveaxis(data, 2, 0)
+                data_clu[data_clu < (split_evt_primary_threshold*const_data["Noise"])] = 0  # noqa
+                ddsetc[...] = np.moveaxis(data_clu, 2, 0)
                ddsetp[...] = np.moveaxis(patterns, 2, 0)

-                data[patterns != 100] = np.nan
-                histCalSECor.fill(data)
+                data_clu[patterns != 100] = np.nan
+                histCalSECor.fill(data_clu)
+
+            # absolute gain correction
+            # changes data from ADU to keV (or n. of photons)
+            if absolute_gain:
+                data = data * gain_cnst
+                if photon_energy > 0:
+                    data /= photon_energy
+                histCalAbsGainCor.fill(data)
+
+                if pattern_classification:
+                    data_clu = data_clu *gain_cnst
+                    if photon_energy > 0:
+                        data_clu /= photon_energy
+                    ddsetc[...] = np.moveaxis(data_clu, 2, 0)
+                    histCalGainCorSingles.fill(data_clu)
+
        except Exception as e:
            print(f"ERROR applying common mode correction for {f}: {e}")
 ```

 %% Cell type:code id: tags:

 ``` python
 ho, eo, co, so = histCalOffsetCor.get()

 d = [{
    'x': co,
    'y': ho,
    'y_err': np.sqrt(ho[:]),
    'drawstyle': 'steps-mid',
    'errorstyle': 'bars',
    'errorcoarsing': 2,
    'label': 'Offset corr.'
 }]

 if common_mode:
    ho, eo, co, so = histCalCMCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'CM corr.'
    })

+if relative_gain :
+    ho, eo, co, so = histCalRelGainCor.get()
+    d.append({
+        'x': co,
+        'y': ho,
+        'y_err': np.sqrt(ho[:]),
+        'drawstyle': 'steps-mid',
+        'errorstyle': 'bars',
+        'errorcoarsing': 2,
+        'label': 'Relative gain corr.'
+    })
+
+
 if pattern_classification:
    ho, eo, co, so = histCalSECor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
-        'label': 'Single split events'
+        'label': 'Isolated photons (singles)'
    })

 fig = xana.simplePlot(
-    d, aspect=1, x_label=f'Energy({plot_unit})',
+    d, aspect=1, x_label=f'Energy (ADU)',
    y_label='Number of occurrences', figsize='2col',
    y_log=True, x_range=(-50, 500),
-    legend='top-center-frame-2col'
+    legend='top-center-frame-2col',
 )
+plt.title(f'run {run} - {karabo_da}')
+plt.grid()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+if absolute_gain :
+    d=[]
+    ho, eo, co, so = histCalAbsGainCor.get()
+    d.append({
+        'x': co,
+        'y': ho,
+        'y_err': np.sqrt(ho[:]),
+        'drawstyle': 'steps-mid',
+        'errorstyle': 'bars',
+        'errorcoarsing': 2,
+        'label': 'Absolute gain corr.'
+    })
+
+    if pattern_classification:
+        ho, eo, co, so = histCalGainCorSingles.get()
+        d.append({
+            'x': co,
+            'y': ho,
+            'y_err': np.sqrt(ho[:]),
+            'drawstyle': 'steps-mid',
+            'errorstyle': 'bars',
+            'errorcoarsing': 2,
+            'label': 'Isolated photons (singles)'
+        })
+
+    fig = xana.simplePlot(
+        d, aspect=1, x_label=f'Energy ({plot_unit})',
+        y_label='Number of occurrences', figsize='2col',
+        y_log=True,
+        x_range=np.array((-50, 500))*hscale,
+        legend='top-center-frame-2col',
+    )
+    plt.grid()
+    plt.title(f'run {run} - {karabo_da}')
 ```

 %% Cell type:markdown id: tags:

 ## Mean Image of last Sequence ##

 %% Cell type:code id: tags:

 ``` python
 fig = xana.heatmapPlot(
    np.nanmedian(data, axis=2),
    x_label='Columns', y_label='Rows',
    lut_label=f'Signal ({plot_unit})',
    x_range=(0, y),
    y_range=(0, x),
    vmin=-50, vmax=50)
 ```

 %% Cell type:markdown id: tags:

 ## Single Shot of last Sequence ##

 %% Cell type:code id: tags:

 ``` python
 fig = xana.heatmapPlot(
    data[..., 0],
    x_label='Columns', y_label='Rows',
    lut_label=f'Signal ({plot_unit})',
    x_range=(0, y),
    y_range=(0, x),
    vmin=-50, vmax=50)
 ```

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,6 +10,9 @@ disable = "C0330, C0326"
 [tool.pylint.format]
 max-line-length = "88"

+[flake8]
+max-line-length = 88
+
 [tool.pytest.ini_options]
 norecursedirs = [
    "legacy",

--- a/reportservice/report_service.py
+++ b/reportservice/report_service.py
@@ -64,7 +64,7 @@ async def wait_jobs(joblist):
            for job in joblist:
                if str(job) in line:
                    found_jobs.add(job)
-        if len(found_jobs) == 0:
+        if not found_jobs:
            logging.info('Jobs are finished')
            break
        await asyncio.sleep(10)

--- a/setup.py
+++ b/setup.py
@@ -26,10 +26,8 @@ class PreInstallCommand(build):
    def run(self):
        version = check_output(["git", "describe", "--tag"]).decode("utf8")
        version = version.replace("\n", "")
-        file = open("src/xfel_calibrate/VERSION.py", "w")
-        file.write('__version__="{}"'.format(version))
-        file.close()
-
+        with open("src/xfel_calibrate/VERSION.py", "w") as file:
+            file.write('__version__="{}"'.format(version))
        build.run(self)


@@ -82,6 +80,7 @@ setup(
        "astcheck==0.2.5",
        "astsearch==0.2.0",
        "dill==0.3.0",
+        "dynaconf==3.1.4",
        "extra_data==1.4.1",
        "extra_geom==1.1.1",
        "gitpython==3.1.0",

--- a/src/cal_tools/agipdlib.py
+++ b/src/cal_tools/agipdlib.py
+import posixpath
 import traceback
 import zlib
 from multiprocessing.pool import ThreadPool
@@ -21,6 +22,7 @@ from cal_tools.agipdutils import (
    melt_snowy_pixels,
 )
 from cal_tools.enums import AgipdGainMode, BadPixels, SnowResolution
+from cal_tools.h5_copy_except import h5_copy_except_paths
 from cal_tools.tools import get_constant_from_db_and_time


@@ -247,8 +249,12 @@ class AgipdCorrections:
        self.h5_index_path = h5_index_path
        self.rng_pulses = max_pulses
        # avoid list(range(*[0]]))
-        self.pulses_lst = list(range(*max_pulses)) \
-            if not (len(max_pulses) == 1 and max_pulses[0] == 0) else max_pulses  # noqa
+        self.pulses_lst = (
+            list(range(*max_pulses))
+            if max_pulses != [0]
+            else max_pulses
+        )
+
        self.max_cells = max_cells
        self.gain_mode = gain_mode
        self.comp_threads = comp_threads
@@ -868,11 +874,7 @@ class AgipdCorrections:
        """

        # Calculate the pulse step from the chosen max_pulse range
-        if len(self.rng_pulses) == 3:
-            pulse_step = self.rng_pulses[2]
-        else:
-            pulse_step = 1
-
+        pulse_step = self.rng_pulses[2] if len(self.rng_pulses) == 3 else 1
        # Validate selected pulses range:
        # 1) Make sure the range max doesn't have non-valid idx.
        if self.pulses_lst[-1] + pulse_step > int(allpulses[-1]):
@@ -983,31 +985,13 @@ class AgipdCorrections:
        # these are touched in the correct function, do not copy them here
        dont_copy = ["data", "cellId", "trainId", "pulseId", "status",
                     "length"]
-        dont_copy = [agipd_base + "image/{}".format(do)
-                     for do in dont_copy]
-
-        # don't copy these as we may need to adjust if we filter trains
-        dont_copy += [idx_base + "{}/first".format(do)
-                      for do in ["image", ]]
-        dont_copy += [idx_base + "{}/count".format(do)
-                      for do in ["image", ]]
-        dont_copy += [idx_base + "{}/last".format(do)
-                      for do in ["image", ]]
-        dont_copy += [idx_base + "{}/status".format(do)
-                      for do in ["image", ]]
-
-        # a visitor to copy everything else
-        def visitor(k, item):
-            if k not in dont_copy:
-
-                if isinstance(item, h5py.Group):
-                    outfile.create_group(k)
-                elif isinstance(item, h5py.Dataset):
-                    group = str(k).split("/")
-                    group = "/".join(group[:-1])
-                    infile.copy(k, outfile[group])
-
-        infile.visititems(visitor)
+        dont_copy = [posixpath.join(agipd_base, "image", ds)
+                     for ds in dont_copy]
+
+        # don't copy index as we may need to adjust if we filter trains
+        dont_copy.append(posixpath.join(idx_base, "image"))
+
+        h5_copy_except_paths(infile, outfile, dont_copy)

        # sanitize indices
        for do in ["image", ]:
@@ -1034,10 +1018,7 @@ class AgipdCorrections:
                if diff:
                    if i < len(cntsv):
                        cntsv = np.insert(cntsv, i, 0)
-                        if i == 0:
-                            fidxv = np.insert(fidxv, i, 0)
-                        else:
-                            fidxv = np.insert(fidxv, i, fidxv[i])
+                        fidxv = np.insert(fidxv, i, 0) if i == 0 else np.insert(fidxv, i, fidxv[i])
                    else:
                        # append if at the end of the array
                        cntsv = np.append(cntsv, 0)
@@ -1215,7 +1196,7 @@ class AgipdCorrections:

                # This will handle some historical data in a different format
                # constant dimension injected first
-                if slopesPC.shape[0] == 10 or slopesPC.shape[0] == 11:
+                if slopesPC.shape[0] in [10, 11]:
                    slopesPC = np.moveaxis(slopesPC, 0, 3)
                    slopesPC = np.moveaxis(slopesPC, 0, 2)


--- a/src/cal_tools/agipdutils.py
+++ b/src/cal_tools/agipdutils.py
@@ -111,7 +111,7 @@ def get_shadowed_stripe(data, threshold, fraction):
    for idx, i in enumerate(A[1:-1]):
        if i - 1 not in A:
            continue
-        if len(tmp_idx) == 0:
+        if not tmp_idx:
            tmp_idx.append(i)
            continue
        if tmp_idx[-1] + 1 == i and (
No results found