Compare revisions

c4f3ae2e · e11b01f4 · 5d7d2fbd · 6af3e90b · a6ed8c85 · a720be57
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -3,7 +3,10 @@ stages:
  - test
  - automated_test

-.before_script: &before_script
+default:
+  image: python:3.11
+  tags:
+    - docker
  before_script:
    - eval $(ssh-agent -s)
    - echo "$SSH_PRIVATE_KEY_GITLAB" | tr -d '\r' | ssh-add -
@@ -16,7 +19,7 @@ stages:
    - ssh-keyscan -p 10022 git.xfel.eu > $CI_PROJECT_DIR/.ssh/known_hosts
    - ls  $CI_PROJECT_DIR/.ssh
    - echo $GIT_SSH_COMMAND
-    - python3 -m venv .venv --clear
+    - python3 -m venv .venv
    - source .venv/bin/activate
    - python3 -m pip install --upgrade pip setuptools wheel

@@ -24,7 +27,6 @@ checks:
  stage: check
  only: [merge_requests]
  allow_failure: true
-  <<: *before_script
  script:
    - export PATH=/home/gitlab-runner/.local/bin:$PATH
    #  We'd like to run the pre-commit hooks only on files that are being
@@ -41,7 +43,6 @@ checks:
 pytest:
  stage: test
  only: [merge_requests]
-  <<: *before_script
  script:
    - export LANG=C  # Hopefully detect anything relying on locale
    - python3 -m pip install ".[test]"
@@ -52,28 +53,32 @@ pytest:
  #    reports:
  #      cobertura: coverage.xml

+cython-editable-install-test:
+  stage: test
+  only: [merge_requests]
+  script:
+    - python3 -m pip install -e ".[test]"
+    - python3 -m pytest --color yes --verbose ./tests/test_cythonalgs.py
+
 automated_test:
  variables:
    OUTPUT: $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME
    DETECTORS: all
    CALIBRATION: all
+    PYENV_VERSION: "3.11"
+    CAL_CAL_TOOLS_CALCAT: "{base-api-url='http://exflcalproxy:8080/api', use-oauth2=false}"
  stage: automated_test
  only: [merge_requests]
  when: manual
  allow_failure: false
-  <<: *before_script
+  tags:
+    - integration
  script:
    - export LANG=C  # Hopefully detect anything relying on locale
    - python3 -m pip install ".[test]"
+    - python3 -c "from cal_tools.restful_config import restful_config; print(restful_config.get('calcat')['base-api-url'])"
    - echo "Running automated test. This can take sometime to finish depending on the test data."
    - echo "Given variables are REFERENCE=$REFERENCE, OUTPUT=$OUTPUT, DETECTORS=$DETECTORS, CALIBRATION=$CALIBRATION"
    - python3 -m pytest ./tests/test_reference_runs --color yes --verbose --release-test --reference-folder /gpfs/exfel/d/cal_tst/reference_folder --out-folder /gpfs/exfel/data/scratch/xcaltst/test/$OUTPUT  --detectors $DETECTORS --calibration $CALIBRATION
  timeout: 24 hours
-
-cython-editable-install-test:
-  stage: test
-  only: [merge_requests]
-  <<: *before_script
-  script:
-    - python3 -m pip install -e ".[test]"
-    - python3 -m pytest --color yes --verbose ./tests/test_cythonalgs.py
+  resource_group: automated_test
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,10 +7,10 @@ repos:
    hooks:
    - id: nbqa-check-ast
  - repo: https://github.com/pycqa/isort
-    rev: 5.7.0
+    rev: 5.13.2
    hooks:
      - id: isort
-  - repo: https://gitlab.com/pycqa/flake8
+  - repo: https://github.com/pycqa/flake8
    rev: 3.9.0
    hooks:
    - id: flake8

--- a/README.rst
+++ b/README.rst
--- a/ansible/README.md
+++ b/ansible/README.md
+# Automated deployment
+
+Ansible playbook for automated deployment on the online cluster
+
+## Setup Ansible
+
+```bash
+$ git clone https://github.com/ansible/ansible.git
+$ cd ansible
+$ git checkout v2.8.1
+```
+
+## Activate the environment
+
+```bash
+$ source ansible/hacking/env-setup
+```
+
+Alternatively you can use the provided environment on `exflgateway`
+
+```bash
+$ source /opt/karabo/ansible/hacking/env-setup
+```
+
+## Python dependencies
+
+Ensure `yaml` and `jinja2` packages are installed in your Python environment. If not:
+
+```bash
+pip install pyYaml jinja2
+```
+
+## Install pycalibration
+
+```bash
+./install.yml GROUP_NAME
+```
+
--- a/ansible/ansible.cfg
+++ b/ansible/ansible.cfg
+[defaults]
+forks = 30
+inventory = ./hosts
+stdout_callback = debug
+
+[ssh_connection]
+pipelining = True
+ssh_args = -o ControlMaster=auto -o ControlPersist=1200
\ No newline at end of file
--- a/ansible/group_vars/HED
+++ b/ansible/group_vars/HED
+---
+ansible_user: "xcal"
+python_executable: "/gpfs/exfel/sw/calsoft/.pyenv/versions/3.11.9/bin/python3"
+install_dir: "/scratch/xcal"
+pycal_dir: "{{ install_dir }}/pycalibration"
+pycal_version: "3.15.0"
--- a/ansible/hosts
+++ b/ansible/hosts
+[HED]
+sa2-onc-0[1:6]
+sa2-onc-hed
\ No newline at end of file
--- a/ansible/install.yml
+++ b/ansible/install.yml
+#!play
+
+- hosts: all
+
+  tasks:
+  - name: Find all backup directory
+    find:
+      paths: "{{ install_dir }}"
+      patterns: "^pycalibration_backup_\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}$"
+      file_type: directory
+      use_regex: True
+    register: backup_dirs
+
+  - name: Display matching directories
+    debug:
+      msg: "Matching directories: {{ backup_dirs.files | map(attribute='path') | list }}"
+
+  - name: Sort backup directories
+    set_fact:
+      sorted_backup_dirs: "{{ backup_dirs.files | sort(attribute='mtime', reverse=true) | list }}"
+
+  - name: Delete all but the most recent backup
+    file:
+      path: "{{ item.path }}"
+      state: absent
+    loop: "{{ sorted_backup_dirs[1:] }}"
+    when: sorted_backup_dirs | length > 1
+    register: deleted_dirs
+
+  - name: Information about backup retention
+    debug:
+      msg: |
+        Kept directory: {{ sorted_backup_dirs[0].path if sorted_backup_dirs else 'None' }}
+        Deleted directories ({{ deleted_dirs.results | length }}):
+        {% for dir in deleted_dirs.results %}
+        - {{ dir.path }}
+        {% endfor %}
+
+  - name: Get current date
+    command: date +%Y-%m-%dT%H:%M:%S
+    register: current_date
+    changed_when: false
+
+  - name: Set backup directory name
+    set_fact:
+      backup_dir_name: "pycalibration_backup_{{ current_date.stdout }}"
+
+  - name: Create backup directory
+    file:
+      state: directory
+      path: "{{ install_dir }}/{{ backup_dir_name }}"
+      mode: 0755
+    register: backup_dir
+
+  - name: Check for previous installation
+    stat:
+      path: "{{ pycal_dir }}"
+    register: pycal_dir_stat
+
+  - name: Backup previous installation
+    command: "mv {{ pycal_dir }} {{ backup_dir.path }}"
+    when: pycal_dir_stat.stat.exists
+
+  - name: Clone pycalibration
+    shell: git clone ssh://git@git.xfel.eu:10022/calibration/pycalibration.git -b {{ pycal_version }} {{ pycal_dir }}
+
+  - name: create venv
+    shell: "{{ python_executable }} -m venv --prompt pycal-{{ pycal_version }} {{ pycal_dir }}/.venv"
+
+  - name: Install Pycalibration
+    shell: all_proxy="http://exflproxy01.desy.de:3128" {{ pycal_dir }}/.venv/bin/pip install -e {{ pycal_dir }}
--- a/ansible/play
+++ b/ansible/play
+#!/bin/bash
+if [[ $# -lt 2 ]]
+then
+ echo "USAGE: $1 GROUP_NAME"
+ exit 1
+fi
+PLAYBOOK="$1"
+shift
+exec ansible-playbook "$PLAYBOOK" -l "$@" 
+
--- a/bin/slurm_calibrate.sh
+++ b/bin/slurm_calibrate.sh
@@ -25,15 +25,11 @@ echo "job ID: ${SLURM_JOB_ID:-none}"

 export CAL_NOTEBOOK_NAME="$notebook"

-# set-up enviroment
-source /etc/profile.d/modules.sh
-module load anaconda/3
-
 # make sure we use agg backend
 export MPLBACKEND=AGG

-# Ensure Python uses UTF-8 for files by default
-export LANG=en_US.UTF-8
+# Use the default locale (Python will still use UTF-8 for text files)
+export LANG=C

 # start an ip cluster if requested
 if [ "${ipcluster_profile}" != "NO_CLUSTER" ]

--- a/bin/slurm_finalize.sh
+++ b/bin/slurm_finalize.sh
@@ -16,13 +16,13 @@ echo "job ID: ${SLURM_JOB_ID:-none}"

 # set-up enviroment
 source /etc/profile.d/modules.sh
-module load texlive/2019
+module load texlive/2022

 # make sure we use agg backend
 export MPLBACKEND=AGG

-# Ensure Python uses UTF-8 for files by default
-export LANG=en_US.UTF-8
+# Use the default locale (Python will still use UTF-8 for text files)
+export LANG=C

 if [ -n "$report_to" ]; then
    shopt -s failglob  # Fail fast if there are no notebooks found

--- a/docs/development/installation.md
+++ b/docs/development/installation.md
@@ -11,54 +11,75 @@ to do this.

 ## Installation using python virtual environment - recommended

-`pycalibration` uses Python 3.8. Currently, the default
-python installation on Maxwell is still Python 3.6.8, so Python 3.8
-needs to be loaded from a different location.
-
-One option is to use the Maxwell Spack installation, running `module
-load maxwell` will activate the test Spack instance from
-DESY, then you can use `module load
-python-3.8.6-gcc-10.2.0-622qtxd` to Python 3.8. Note that
-this Spack instance is currently a trial phase and may not be stable.
-
-Another option is to use `pyenv`, we provide a pyenv
-installation at `/gpfs/exfel/sw/calsoft/.pyenv` which we use
-to manage different versions of python. This can be activated with
-`source /gpfs/exfel/sw/calsoft/.pyenv/bin/activate`
-
-A quick setup would be:
-
-1.  `source /gpfs/exfel/sw/calsoft/.pyenv/bin/activate`
-2.  `git clone ssh://git@git.xfel.eu:10022/detectors/pycalibration.git && cd pycalibration` -
-    clone the offline calibration package from EuXFEL GitLab
-3.  `pyenv shell 3.8.11` - load required version of python
-4.  `python3 -m venv .venv` - create the virtual environment
-5.  `source .venv/bin/activate` - activate the virtual environment
-6.  `python3 -m pip install --upgrade pip` - upgrade version of pip
-7.  `python3 -m pip install .` - install the pycalibration package (add
-    `-e` flag for editable development installation)
-
-Copy/paste script:
+`pycalibration` uses Python 3.11. Currently the default python installation on Maxwell
+is still Python 3.9, so Python 3.11 needs to be loaded from a different
+location.
+
+We use `pyenv` to manage different Python versions. A pyenv installation is provided at
+`/gpfs/exfel/sw/calsoft/.pyenv`.
+
+To install pycalibration, follow these steps:

 ```bash
+# Activate pyenv
 source /gpfs/exfel/sw/calsoft/.pyenv/bin/activate
+
+# Clone the repository
 git clone ssh://git@git.xfel.eu:10022/detectors/pycalibration.git
 cd pycalibration
-pyenv shell 3.8.11
+
+# Set up Python environment
+pyenv shell 3.11.9
 python3 -m venv .venv
 source .venv/bin/activate
+
+# Install pycalibration
 python3 -m pip install --upgrade pip
-python3 -m pip install .  # `-e` flag for editable install, e.g. `pip install -e .`
+# Use 'pip install -e .' for editable install
+python3 -m pip install .
 ```

+## Working with Jupyter Notebooks
+
+If you plan to work with Jupyter notebooks interactively, you have two main options:
+
+### Option 1: Install Jupyter Notebook locally
+
+if you prefer to run Jupyter notebooks on your local machine or on Maxwell, you can install the `notebook` package in your virtual environment:
+
+```bash
+python3 -m pip install notebook==6.2.0
+```
+
+After installation, you can start a Jupyter notebook server by running:
+
+```bash
+jupyter notebook
+```
+
+### Option 2: Use max-jhub (Recommended)
+
+Alternatively, we recommend using max-jhub, a JupyterHub instance available at DESY.
+This option provides a convenient web-based environment for running Jupyter notebooks without needing to set up everything locally.
+
+For detailed instructions on how to use max-jhub, please refer to these documentations:
+
+- [Max-jhub DESY Documentation](https://confluence.desy.de/display/MXW/JupyterHub+on+Maxwell)
+- [Max-jhub EuXFEL User Documentation](https://rtd.xfel.eu/docs/data-analysis-user-documentation/en/latest/jhub/#via-max-jhub-recommended)
+
+To use max-jhub effectively with pycalibration, make sure you've created an ipython kernel as
+described in the [Creating an ipython kernel for virtual environments](#creating-an-ipython-kernel-for-virtual-environments) section below.
+
 ## Creating an ipython kernel for virtual environments

 To create an ipython kernel with pycalibration available you should (if
 using a venv) activate the virtual environment first, and then run:

 ```bash
-python3 -m pip install ipykernel  # If not using a venv add `--user` flag
-python3 -m ipykernel install --user --name pycalibration --display-name "pycalibration"  # If not using a venv pick different name
+# If not using a venv add `--user` flag
+python3 -m pip install ipykernel
+# If not using a venv pick different name
+python3 -m ipykernel install --user --name pycalibration --display-name "pycalibration"
 ```

 This can be useful for Jupyter notebook tools as [max-jhub documentation](https://rtd.xfel.eu/docs/data-analysis-user-documentation/en/latest/jhub/)([max-jhub](https://max-jhub.desy.de/hub/login))
@@ -71,17 +92,12 @@ GitLab.

 To set up the keys:

-1.  Connect to Maxwell
-2.  Generate a new keypair with `ssh-keygen -o -a 100 -t ed25519`, you
-    can either leave this in the default location (`~/.ssh/id_ed25519`)
-    or place it into a separate directory to make management of keys
-    easier if you already have multiple ones. If you are using a
-    password for your keys please check this page to learn how to manage
-    them:
-    <https://docs.github.com/en/github/authenticating-to-github/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent#adding-your-ssh-key-to-the-ssh-agent>
-3.  Add the public key (`id_ed25519.pub`) to your account on GitLab:
+1. Connect to Maxwell
+2. Generate a new keypair with `ssh-keygen -o -a 100 -t ed25519`, you can either leave this in the default location (`~/.ssh/id_ed25519`) or place it into a separate directory to make management of keys easier if you already have multiple ones.
+   If you are using a password for your keys please check this page to learn how to manage them: <https://docs.github.com/en/github/authenticating-to-github/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent#adding-your-ssh-key-to-the-ssh-agent>
+3. Add the public key (`id_ed25519.pub`) to your account on GitLab:
    <https://git.xfel.eu/gitlab/profile/keys>
-4.  Add the following to your `~/.ssh/config` file
+4. Add the following to your `~/.ssh/config` file

 ```bash
 # Special flags for gitlab over SSH

--- a/docs/references/changelog.md
+++ b/docs/references/changelog.md
 # Release Notes

+## 3.15.0
+
+Moving to python 3.11: updating dependencies and update calibration notebooks and modules accordingly
+
+# 3.14.4
+
+[AGIPD][Correct] New feature for lit-pixel counter
+[AGIPD][Correct] Refactor write_file on AgipdCorrections to create indexes using the DataFile
+[AGIPD][PC] Fixing plotting in summary notebook
+[AGIPD][LPD] Add DataFile.create_legacy_source
+[LPD][Correct] Write corrected data as new source and link to old source name
+[Jungfrau][Dark] use CalibrationData to retrieve prior CCVs
+[Jungfrau][Correct] Test ROI for JF1M
+[Jungfrau][Correct] use new correct data source and link to old data source
+[Jungfrau] roi-definitions to update_config.py script
+[PNCCD][Correct] New corrected data source and link to legacy source
+[Epix100][Correct] New corrected data source and a link to old data source
+[Gotthard2]: Avoid getting wrong data sources or including control devices' sources.
+[xfel-calibrate]: expose mincpus argument as --slurm-mincpus
+[Fix] Pin the pillow version
+[Test] Configure and Include new test runs
+[Test] Swap order of dicts for DeepDiff
+[Webservice] Remove extra slash from reports-folder config
+[Webservice] Use MUNGE credential with update_conf requests
+[CalCatAPI] port markdown tables of found constants
+[CalCatAPI] Convert digits and boolean to float and the rest to string
+Make constant tables in both markdown & latex format
+
+## 3.14.3
+
+Update report creation to use texlive/2022
+fix: Update documentations after EL9 update
+
+## 3.14.2
+
+[AGIPD][Correct][CS] Fix badpixelsCS before we have multiple CCVss to have same BadPixels shapes.
+[AGIPD] Modernize use of extra_data to get AGIPD control data
+[AGIPD][Correct] use np.nanmin and np.nanmax to avoid nans when modules are missing
+[AGIPD][Correct] bug when plotting gain data of only 0 after it's info was discarded by DAQ
+[Shimadzu][Correct] fix for less expected images
+Remove unnecessary 'module load' command from jobs to be repeated
+Remove reference to anaconda/3 module
+flake8 repository has moved from gitlab to github
+
 ## 3.14.1

 [AGIPD][CORRECT][DARK] Break wrong assumption for availability for 1st module.

--- a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
+++ b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
 %% Cell type:markdown id: tags:

 # AGIPD Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/MID/202201/p002834/raw" # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/esobolev/pycal_litfrm/p002834/r0225"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 overwrite = False  # IGNORED, NEEDED FOR COMPATIBILITY.
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 train_ids = [-1] # train IDs to correct, set to -1 for all, range allowed
 run = 225 # runs to process, required

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_template = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 index_source_template = 'INDEX/{}/DET/{}:xtdf/'  # path in the HDF5 file to images
 ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information
 karabo_id_control = "MID_EXP_AGIPD1M1" # karabo-id for control device

 slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants, loaded in precorrection notebook

 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
 cal_db_interface = "tcp://max-exfl-cal001:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milliseconds
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
 cal_db_root = ""  # The calibration database root path to access constant files. e.g. accessing constants from the test database /gpfs/exfel/d/cal_tst/caldb_store.

 mem_cells = -1  # Number of memory cells used, set to 0 to automatically infer
 bias_voltage = -1  # bias voltage, set to 0 to use stored value in slow data.
 acq_rate = -1. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)
 max_pulses = [0, 352, 1] # range list [st, end, step] of memory cell indices to be processed within a train. 3 allowed maximum list input elements.
 mem_cells_db = -1  # set to a value different than 0 to use this value for DB queries
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction parameters
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
 cm_dark_fraction = 0.66 # threshold for fraction of  empty pixels to consider module enough dark to perform CM correction
 cm_dark_range = [-50.,30] # range for signal value ADU for pixel to be consider as a dark pixel
 cm_n_itr = 4 # number of iterations for common mode correction
 hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
 mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
 noisy_adc_threshold = 0.25 # threshold to mask complete adc
 ff_gain = 7.2 # conversion gain for absolute FlatField constants, while applying xray_gain
 photon_energy = -1.0 # photon energy in keV, non-positive value for XGM autodetection
 rounding_threshold = 0.5 # the fraction to round to down, 0.5 for standard rounding rule
 cs_mg_adjust = 7e3  # Value to adjust medium gain when correcting with current source. This is used when `adjust_mg_baseline` is True.

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 # TODO: Remove this boolean parameter an replace rel_gain_mode with it.
 rel_gain = False
 rel_gain_mode = "off"  # Select relative gain correction. Choices [`PC`, `CS`, `off`]. (`PC`: Pulse Capacitor, `CS`: Current Source, `off`: Disable relative gain correction). Default: off.
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 zero_nans = False # set NaN values in corrected data to 0
 zero_orange = False # set to 0 very negative and very large values in corrected data
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
 corr_asic_diag = False # if set, diagonal drop offs on ASICs are corrected
 force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold
 force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold
 mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
 common_mode = False # Common mode correction
 melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
 mask_zero_std = False # Mask pixels with zero standard deviation across train
 low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain
 round_photons = False  # Round to absolute number of photons, only use with gain corrections

+# Additional processing
+count_lit_pixels = False # Count the number of pixels registering photons
+spi_hitfinding = False  # Find hits using lit-pixel counter
+
 # Optional auxiliary devices
 use_ppu_device = ''  # Device ID for a pulse picker device to only process picked trains, empty string to disable
 ppu_train_offset = 0  # When using the pulse picker, offset between the PPU's sequence start and actually picked train
 require_ppu_trigger = False  # Optional protection against running without PPU or without triggering trains.

 use_litframe_finder = 'off' # Process only illuminated frames: 'off' - disable, 'device' - use online device data, 'offline' - use offline algorithm, 'auto' - choose online/offline source automatically (default)
 litframe_device_id = '' # Device ID for a lit frame finder device, empty string to auto detection
 energy_threshold = -1000 # The low limit for the energy (uJ) exposed by frames subject to processing. If -1000, selection by pulse energy is disabled
 use_super_selection = 'cm' # Make a common selection for entire run: 'off' - disable, 'final' - enable for final selection, 'cm' - enable only for common mode correction

 use_xgm_device = ''  # DoocsXGM device ID to obtain actual photon energy, operating condition else.

 # Output parameters
 recast_image_data = ''  # Cast data to a different dtype before saving
 compress_fields = ['gain', 'mask']  # Datasets in image group to compress.

+# SPI hit-finder parameters
+spi_hf_modules = [3, 4, 8, 15]  # Use specified modules for SPI hitfinding
+spi_hf_mode = "adaptive"  # The method to compute threshold for hitscores in SPI hitfinding: `fixed` or `adaptive`
+spi_hf_snr = 4.0  # Siginal-to-noise ration for adaptive threshold in SPI hitfinding
+spi_hf_min_scores = 100  # The minimal size of events to compute adaptive threshold in SPI hitfinding
+spi_hf_fixed_threshold = 0  # The fixed threshold value
+spi_hf_hitrate_window_size = 200  # The window size for runnig average of hitrate in trains
+spi_hf_miss_fraction = 1  # The fraction of misses to select along with hits
+spi_hf_miss_fraction_base = "hit"  # The base to compute the number of misses to select: the number of hits (`hit`) or misses (`miss`)
+
 # Plotting parameters
 skip_plots = False # exit after writing corrected files and metadata
 cell_id_preview = 1 # cell Id used for preview in single-shot plots
 cmap = "viridis"  # matplolib.colormap for almost all heatmap. Other options ['plasma', 'inferno', 'magma', 'cividis', 'jet', ...]

 # Parallelization parameters
 chunk_size = 1000  # Size of chunk for image-wise correction
 n_cores_correct = 16 # Number of chunks to be processed in parallel
 n_cores_files = 4 # Number of files to be processed in parallel
 sequences_per_node = 2 # number of sequence files per cluster node if run as SLURM job, set to 0 to not run SLURM parallel
 max_nodes = 8 # Maximum number of SLURM jobs to split correction work into
 max_tasks_per_worker = 1  # the number of tasks a correction pool worker process can complete before it will exit and be replaced with a fresh worker process. Leave as -1 to keep worker alive as long as pool.


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import math
 import multiprocessing
 import os
+import sys
 import warnings
 from datetime import timedelta
 from logging import warning
 from pathlib import Path

 import tabulate
 from dateutil import parser
 from IPython.display import Latex, Markdown, display

 warnings.filterwarnings('ignore')
 import h5py
 import matplotlib
 import matplotlib.pyplot as plt
 import yaml
 from extra_data import by_id, RunDirectory, stack_detector_data
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from matplotlib.colors import LogNorm

 matplotlib.use("agg")
 %matplotlib inline
 import numpy as np
 import seaborn as sns

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")

 import cal_tools.restful_config as rest_cfg
 from cal_tools import agipdalgs as calgs
 from cal_tools.agipdlib import (
    AgipdCorrections,
    AgipdCtrl,
    CellRange,
    LitFrameSelection,
 )
 from cal_tools.ana_tools import get_range
 from cal_tools.calcat_interface import (
    AGIPD_CalibrationData,
    CalCatError,
 )
 from cal_tools.enums import AgipdGainMode, BadPixels
 from cal_tools.plotting import agipd_single_module_geometry
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    calcat_creation_time,
+    latex_warning,
    map_modules_from_folder,
    module_index_to_qm,
    write_constants_fragment,
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_folder = in_folder / f'r{run:04d}'

 step_timer = StepTimer()
 ```

 %% Cell type:markdown id: tags:

 ## Evaluated parameters ##

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the hierarchy and dependability for correction booleans are defined
 corr_bools = {}

 cs_corr = False
 pc_corr = False
 if rel_gain_mode.lower() == "off":
    # TODO: Remove this part after replacing rel_gain with rel_gain_mode
    if rel_gain:
        pc_corr = True

 elif rel_gain_mode.lower() == "cs":
    cs_corr = True

 elif rel_gain_mode.lower() == "pc":
    pc_corr = True

 else:
    raise ValueError(
        "Selected `rel_gain_mode` is unexpected. "
        "Please select between CS or PC.")

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["cs_corr"] = cs_corr
    corr_bools["pc_corr"] = pc_corr
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["zero_nans"] = zero_nans
    corr_bools["zero_orange"] = zero_orange
    corr_bools["mask_noisy_adc"] = mask_noisy_adc
    corr_bools["force_hg_if_below"] = force_hg_if_below
    corr_bools["force_mg_if_below"] = force_mg_if_below
    corr_bools["common_mode"] = common_mode
    corr_bools["melt_snow"] = melt_snow
    corr_bools["mask_zero_std"] = mask_zero_std
    corr_bools["low_medium_gap"] = low_medium_gap
    corr_bools["round_photons"] = round_photons
+    corr_bools["count_lit_pixels"] = count_lit_pixels

 # Many corrections don't apply to fixed gain mode; will explicitly disable later if detected
 disable_for_fixed_gain = [
    "adjust_mg_baseline",
    "blc_set_min",
    "force_hg_if_below",
    "force_mg_if_below",
    "low_medium_gap",
    "melt_snow",
    "pc_corr",
    "cs_corr",
 ]
 ```

 %% Cell type:code id: tags:

 ``` python
 if sequences == [-1]:
    sequences = None

 dc = RunDirectory(run_folder)

 ctrl_src = ctrl_source_template.format(karabo_id_control)
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 index_src = index_source_template.format(karabo_id, receiver_template)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create output folder
 out_folder.mkdir(parents=True, exist_ok=True)

 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if "AGIPD1M" in karabo_id:
    nmods = 16
 elif "AGIPD500K" in karabo_id:
    nmods = 8
 else:
    nmods = 1

 # Evaluate requested modules
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    mod_indices = modules if nmods > 1 else [0]
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:  # TODO: fix this with the new CALCAT metadata for module indices.
    modules = [int(x[-2:]) for x in karabo_da]
    mod_indices = modules if nmods > 1 else [0]

 print("Process modules:", ', '.join(module_index_to_qm(x) for x in mod_indices))
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 ```

 %% Cell type:code id: tags:

 ``` python
+train_available = False
+
+for m in modules:
+    try:
+        # Attempt to select the module. If no trains are available, ValueError might be raised
+        if len(dc[instrument_src.format(m), 'image.data'].drop_empty_trains().train_ids) > 0:
+            train_available = True
+            break  # Found a module with available trains.
+    except ValueError as e:
+        warning(f"Missing module {m} from data: {e}")
+
+if not train_available:
+    # Execute this block if no modules with trains were found.
+    latex_warning("No trains available to correct for selected modules.")
+    sys.exit(0)
+```
+
+%% Cell type:code id: tags:
+
+``` python
 if use_ppu_device and use_ppu_device in dc.control_sources:
    # Obtain trains to process if using a pulse picker device and it's present.

    seq_start = dc[use_ppu_device, 'trainTrigger.sequenceStart.value'].ndarray()

    # The trains picked are the unique values of trainTrigger.sequenceStart
    # minus the first (previous trigger before this run).
    start_train_ids = np.unique(seq_start)[1:] + ppu_train_offset

    train_ids = []
    for train_id in start_train_ids:
        n_trains = dc[
            use_ppu_device, 'trainTrigger.numberOfTrains'
        ].select_trains(by_id[[train_id]]).ndarray()[0]
        train_ids.extend(list(range(train_id, train_id + n_trains)))

    if train_ids:
        print(f'PPU device {use_ppu_device} triggered for {len(train_ids)} train(s)')
    elif require_ppu_trigger:
        raise RuntimeError(f'PPU device {use_ppu_device} not triggered but required, aborting!')
    else:
        print(f'PPU device {use_ppu_device} not triggered, processing all valid trains')
        train_ids = None

 elif use_ppu_device:
    # PPU configured but not present.

    if require_ppu_trigger:
        raise RuntimeError(f'PPU device {use_ppu_device} required but not found, aborting!')
    else:
        print(f'PPU device {use_ppu_device} configured but not found, processing all valid trains')
        train_ids = None

 elif train_ids != [-1]:
    # Specific trains passed by parameter, convert to ndarray.
    train_ids = np.array(train_ids)

    print(f'Processing up to {len(train_ids)} manually selected train(s)')

 else:
    # No PPU configured.
    print(f'Processing all valid trains')
    train_ids = None
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mapped_files, _, total_sequences, _, _ =  map_modules_from_folder(
    str(in_folder), run, path_template, karabo_da, sequences
 )
 file_list = []

 # ToDo: Split table over pages
 print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
 table = []
 ti = 0
 for k, files in mapped_files.items():
    i = 0
    for f in list(files.queue):
        file_list.append(f)
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["#", "module", "# module", "file"])))
 file_list = sorted(file_list, key=lambda name: name[-10:])
 ```

 %% Cell type:code id: tags:

 ``` python
 first_mod_channel = sorted(modules)[0]
 instrument_src_first_mod = [
    s for s in list(dc.all_sources) if f"{first_mod_channel}CH" in s][0]

 agipd_cond = AgipdCtrl(
    run_dc=dc,
    image_src=instrument_src_first_mod,
    ctrl_src=ctrl_src,
    raise_error=False,  # to be able to process very old data without gain_setting value
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 offset = parser.parse(creation_date_offset)
 delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
 creation_time += delta
 print(f"Creation time: {creation_time}")

 if acq_rate == -1.:
    acq_rate = agipd_cond.get_acq_rate()
 if mem_cells == -1:
    mem_cells = agipd_cond.get_num_cells()
 # TODO: look for alternative for passing creation_time
 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == -1:
    for m in modules:
        bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control, module=m)
        # Accept non-zero value for a bias voltage from any module.
        if bias_voltage != 0.:
            break
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()
 if gain_mode == -1:
    gain_mode = agipd_cond.get_gain_mode()
 else:
    gain_mode = AgipdGainMode(gain_mode)
 ```

 %% Cell type:code id: tags:

 ``` python
 mem_cells_db = mem_cells if mem_cells_db == -1 else mem_cells_db

 print(f"Maximum memory cells to calibrate: {mem_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Using {creation_time} as creation time")
 print("Operating conditions are:")
 print(f"• Bias voltage: {bias_voltage}")
 print(f"• Memory cells: {mem_cells_db}")
 print(f"• Acquisition rate: {acq_rate}")
 print(f"• Gain setting: {gain_setting}")
 print(f"• Gain mode: {gain_mode.name}")
 print(f"• Integration time: {integration_time}")
 print(f"• Photon Energy: 9.2")
 ```

 %% Cell type:code id: tags:

 ``` python
 if gain_mode:
    for to_disable in disable_for_fixed_gain:
        if corr_bools.get(to_disable, False):
            warning(f"{to_disable} correction was requested, but does not apply to fixed gain mode")
            corr_bools[to_disable] = False
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_litframe_finder != 'off':
    from extra_redu import make_litframe_finder, LitFrameFinderError

    if use_litframe_finder not in ['auto', 'offline', 'online']:
        raise ValueError("Unexpected value in 'use_litframe_finder'.")

    inst = karabo_id_control[:3]
    litfrm = make_litframe_finder(inst, dc, litframe_device_id)
    try:
        get_data = {'auto': litfrm.read_or_process, 'offline': litfrm.process, 'online': litfrm.read}
        r = get_data[use_litframe_finder]()
        cell_sel = LitFrameSelection(r, train_ids, max_pulses, energy_threshold, use_super_selection)
+        print(f"{cell_sel.msg()}\n")
        cell_sel.print_report()
+        if np.count_nonzero(r.nLitFrame.value) == 0:  # No lit frames.
+            latex_warning(
+                "No lit frames identified using AGIPD LitFrameFinder."
+                " Offline correction will not be performed.")
+            sys.exit(0)
    except LitFrameFinderError as err:
+        print(cell_sel.msg())
        warning(f"Cannot use AgipdLitFrameFinder due to:\n{err}")
        cell_sel = CellRange(max_pulses, max_cells=mem_cells)
 else:
    # Use range selection
    cell_sel = CellRange(max_pulses, max_cells=mem_cells)
-
-print(cell_sel.msg())
+    print(cell_sel.msg())
 ```

 %% Cell type:code id: tags:

 ``` python
-if round_photons and photon_energy <= 0.0:
+photon_energy_warn = None
+if photon_energy <= 0.0:
    if use_xgm_device:
        # Try to obtain photon energy from XGM device.
        wavelength_data = dc[use_xgm_device, 'pulseEnergy.wavelengthUsed']

        try:
            from scipy.constants import h, c, e

            # Read wavelength as a single value and convert to hv.
            photon_energy = (h * c / e) / (wavelength_data.as_single_value(rtol=1e-2) * 1e-6)
            print(f'Obtained photon energy {photon_energy:.3f} keV from {use_xgm_device}')
        except ValueError:
-            warning('XGM source available but photon energy varies greater than 1%, '
-                 'photon rounding disabled!')
-            round_photons = False
+            photon_energy_warn = 'XGM source available but photon energy varies greater than 1%'
    else:
-        warning('Neither explicit photon energy nor XGM device configured, photon rounding disabled!')
-        round_photons = False
-elif round_photons:
-    print(f'Photon energy for rounding: {photon_energy:.3f} keV')
+        photon_energy_warn = 'Neither explicit photon energy nor XGM device configured'

-if round_photons and (rounding_threshold <= .0 or 1. <= rounding_threshold):
-    warning('Round threshould is out of (0, 1) range. Use standard 0.5 value.')
+rounding_threshold_warn = None
+if rounding_threshold <= .0 or 1. <= rounding_threshold:
+    rounding_threshold_warn = 'Round threshold is out of (0, 1) range. Use standard 0.5 value.'
    rounding_threshold = 0.5
+
+if round_photons:
+    if photon_energy_warn:
+        warning(photon_energy_warn + ', photon rounding disabled!')
+        round_photons = False
+    else:
+        print(f'Photon energy for rounding: {photon_energy:.3f} keV')
+        if rounding_threshold_warn:
+            warning(rounding_threshold_warn)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+if count_lit_pixels:
+    if round_photons:
+        data_units = 'photon'
+        litpx_threshold = 1.
+    else:
+        data_units = 'keV'
+        if photon_energy_warn:
+            warning(photon_energy_warn + '. Use 12 keV for lit-pixel counter threshold')
+            litpx_threshold = 12.
+        else:
+            litpx_threshold = photon_energy
+
+        if rounding_threshold_warn:
+            warning(rounding_threshold_warn)
+
+    if not xray_gain:
+        # convert photon energy to ADU (for AGIPD approx. 1 keV = 7 ADU)
+        # it looks that rounding to photons can be applied to data in ADU as well
+        litpx_threshold *= 7.
+        data_units = 'ADU'
+
+    litpx_threshold *= rounding_threshold
+    print(f"Count lit-pixels with signal above {litpx_threshold:.3g} {data_units}")
+else:
+    # dummy value, that is not expected to be used
+    litpx_threshold = 42
 ```

 %% Cell type:code id: tags:

 ``` python
 agipd_corr = AgipdCorrections(
    mem_cells,
    cell_sel,
    h5_data_path=instrument_src,
    h5_index_path=index_src,
    corr_bools=corr_bools,
    gain_mode=gain_mode,
    comp_threads=os.cpu_count() // n_cores_files,
    train_ids=train_ids
 )

 agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
 agipd_corr.hg_hard_threshold = hg_hard_threshold
 agipd_corr.mg_hard_threshold = mg_hard_threshold

 agipd_corr.cm_dark_min = cm_dark_range[0]
 agipd_corr.cm_dark_max = cm_dark_range[1]
 agipd_corr.cm_dark_fraction = cm_dark_fraction
 agipd_corr.cm_n_itr = cm_n_itr
 agipd_corr.noisy_adc_threshold = noisy_adc_threshold
 agipd_corr.ff_gain = ff_gain
 agipd_corr.photon_energy = photon_energy
 agipd_corr.rounding_threshold = rounding_threshold
 agipd_corr.cs_mg_adjust = cs_mg_adjust
+agipd_corr.litpx_threshold = litpx_threshold

 agipd_corr.compress_fields = compress_fields
 if recast_image_data:
    agipd_corr.recast_image_fields['data'] = np.dtype(recast_image_data)
 ```

 %% Cell type:markdown id: tags:

 ## Retrieving constants

 %% Cell type:code id: tags:

 ``` python
 def get_constants_and_update_metadata(cal_data, main_metadata, constants):
    try:
        metadata = cal_data.metadata(constants)
        for key, value in metadata.items():
            main_metadata.setdefault(key, {}).update(value)
    except CalCatError as e:  # TODO: replace when API errors are improved.
        warning(f"CalCatError: {e}")
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 # Instantiate agipd_cal with the read operating conditions.
 agipd_cal = AGIPD_CalibrationData(
    detector_name=karabo_id,
    modules=karabo_da,
    sensor_bias_voltage=bias_voltage,
    memory_cells=mem_cells,
    acquisition_rate=acq_rate,
    integration_time=integration_time,
    source_energy=9.2,
    gain_mode=gain_mode,
    gain_setting=gain_setting,
    event_at=creation_time,
    client=rest_cfg.calibration_client(),
    caldb_root=Path(cal_db_root) if cal_db_root else None,
 )

 # Prepare lists of expected calibrations
 dark_constants = ["Offset", "Noise", "BadPixelsDark"]
 if not gain_mode:  # Adaptive gain
    dark_constants.append("ThresholdsDark")

 agipd_metadata = agipd_cal.metadata(dark_constants)

 agipd_cal.gain_mode = None  # gain_mode is not used for gain constants
 pc_constants, ff_constants, cs_constants = [], [], []

 if agipd_corr.corr_bools.get('xray_corr'):
    ff_constants = list(agipd_cal.illuminated_calibrations)
    get_constants_and_update_metadata(
        agipd_cal, agipd_metadata, ff_constants)

 if any(agipd_corr.relgain_bools):

    if cs_corr:
        # Integration time is not used with CS
        agipd_cal.integration_time = None
        cs_constants = ["SlopesCS", "BadPixelsCS"]
        get_constants_and_update_metadata(
            agipd_cal, agipd_metadata, cs_constants)


    else:  # rel_gain_mode == "pc" or "off"
        pc_constants = ["SlopesPC", "BadPixelsPC"]
        get_constants_and_update_metadata(
            agipd_cal, agipd_metadata, pc_constants)

 step_timer.done_step("Constants were retrieved in")

 relgain_alias = "CS" if cs_corr else "PC"
 print("Preparing constants ("
      f"FF: {agipd_corr.corr_bools.get('xray_corr', False)}, "
      f"{relgain_alias}: {any(agipd_corr.relgain_bools)}, "
      f"BLC: {any(agipd_corr.blc_bools)})")
 # Display retrieved calibration constants timestamps
 agipd_cal.display_markdown_retrieved_constants(metadata=agipd_metadata)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Validate constants availability and exclude modules with no offsets.
 for da, calibrations in agipd_metadata.items():
    mod = modules[karabo_da.index(da)]
    # Constants to error out for when missing.
    error_missing_constants = {"Offset"}
    if not gain_mode:
        error_missing_constants |= {"ThresholdsDark"}

    error_missing_constants -= set(calibrations)
    if error_missing_constants:
        warning(f"Offset constant is not available to correct {da}.")
        # Remove module from files to process.
        del mapped_files[module_index_to_qm(mod)]
        karabo_da.remove(da)
        modules.remove(mod)

    warn_missing_constants = set(dark_constants + pc_constants + ff_constants + cs_constants)
    warn_missing_constants -= error_missing_constants
    warn_missing_constants -= set(calibrations)
    if warn_missing_constants:
        warning(f"Constants {warn_missing_constants} were not retrieved for {da}.")

 if not mapped_files:  # Offsets are missing for all modules.
    raise Exception("Could not find offset constants for any modules, will not correct data.")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Record constant details in YAML metadata
 write_constants_fragment(
    out_folder=(metadata_folder or out_folder),
    det_metadata=agipd_metadata,
    caldb_root=agipd_cal.caldb_root)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Load calibration constants to RAM
 agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))

 def load_constants(da, module):
    """
    Initialize constants data from previously retrieved metadata.

    Args:
        da (str): Data Aggregator (Karabo DA)
        module (int): Module index

    Returns:
        (int, dict, str): Module index, {constant name: creation time}, Karabo DA
    """
    const_data = dict()
    variant = dict()
    for cname, mdata in agipd_metadata[da].items():
        dataset = mdata["dataset"]
        with h5py.File(agipd_cal.caldb_root / mdata["path"], "r") as cf:  # noqa
            const_data[cname] = np.copy(cf[f"{dataset}/data"])
            variant[cname] = cf[dataset].attrs["variant"] if cf[dataset].attrs.keys() else 0  # noqa
    agipd_corr.init_constants(const_data, module, variant)


 step_timer.start()
 with multiprocessing.Pool(processes=len(modules)) as pool:
    pool.starmap(load_constants, zip(karabo_da, modules))
 step_timer.done_step(f'Constants were loaded in ')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Store timestamps for Offset, SlopesPC/SlopesCS, and SlopesFF
 # in YAML file for time-summary table.
 timestamps = {}

 for mod, mod_mdata in agipd_metadata.items():
    modno = int(mod[-2:])

    module_timestamps = {}

    # Store few time stamps if exists
    # Add NA to keep array structure
    for key in ['Offset', f'Slopes{relgain_alias}', 'SlopesFF']:
        if key in mod_mdata:
            module_timestamps[key] = mod_mdata[key]["begin_validity_at"]
        else:
            module_timestamps[key] = "NA"

    timestamps[module_index_to_qm(modno)] = module_timestamps

 seq = sequences[0] if sequences else 0

 with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fd:
    yaml.safe_dump({"time-summary": {f"S{seq}": timestamps}}, fd)
 ```

 %% Cell type:markdown id: tags:

 ## Data processing ##

 %% Cell type:code id: tags:

 ``` python
 # allocate memory for images and hists
 n_images_max = mem_cells * 256
 data_shape = (n_images_max, 512, 128)
 agipd_corr.allocate_images(data_shape, n_cores_files)
 ```

 %% Cell type:code id: tags:

 ``` python
 def batches(l, batch_size):
    """Group a list into batches of (up to) batch_size elements"""
    start = 0
    while start < len(l):
        yield l[start:start + batch_size]
        start += batch_size
 ```

 %% Cell type:code id: tags:

 ``` python
 def imagewise_chunks(img_counts):
    """Break up the loaded data into chunks of up to chunk_size

    Yields (file data slot, start index, stop index)
    """
-
-
    for i_proc, n_img in enumerate(img_counts):
        n_chunks = math.ceil(n_img / chunk_size)
        for i in range(n_chunks):
            yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
+all_imgs_counts = []
 if max_tasks_per_worker == -1:
    max_tasks_per_worker = None
 with multiprocessing.Pool(maxtasksperchild=max_tasks_per_worker) as pool:
    step_timer.done_step('Started pool')

    for file_batch in batches(file_list, n_cores_files):
        # TODO: Move some printed output to logging or similar
        print(f"Processing next {len(file_batch)} files")
        step_timer.start()
        img_counts = pool.starmap(
            agipd_corr.read_file,
            zip(range(len(file_batch)), file_batch, [not common_mode]*len(file_batch))
        )
        step_timer.done_step(f'Loading data from files')

-        if img_counts == 0:
+        all_imgs_counts += img_counts
+        if not np.any(img_counts):
            # Skip any further processing and output if there are no images to
            # correct in this file.
            continue

        if mask_zero_std:
            # Evaluate zero-data-std mask
            pool.starmap(
                agipd_corr.mask_zero_std, itertools.product(
                    range(len(file_batch)),
                    np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
                )
            )
            step_timer.done_step('Mask 0 std')

        # Perform offset image-wise correction
        pool.starmap(agipd_corr.offset_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Offset correction")

        if blc_noise or blc_stripes or blc_hmatch:
            # Perform image-wise correction
            pool.starmap(agipd_corr.baseline_correction, imagewise_chunks(img_counts))
            step_timer.done_step("Base-line shift correction")

        if common_mode:
            # In common mode corrected is enabled.
            # Cell selection is only activated after common mode correction.
            # Perform cross-file correction parallel over asics
            image_files_idx = [i_proc for i_proc, n_img in enumerate(img_counts) if n_img > 0]
            pool.starmap(agipd_corr.cm_correction, itertools.product(
                image_files_idx, range(16)  # 16 ASICs per module
            ))
            step_timer.done_step("Common-mode correction")

            img_counts = pool.map(agipd_corr.apply_selected_pulses, image_files_idx)
            step_timer.done_step("Applying selected cells after common mode correction")

        # Perform image-wise correction"
        pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Gain corrections")

+        # Peform additional processing
+        if count_lit_pixels:
+            pool.starmap(agipd_corr.litpixel_counter, imagewise_chunks(img_counts))
+            step_timer.done_step("Lit-pixel counting")
+
        # Save corrected data
        pool.starmap(agipd_corr.write_file, [
            (i_proc, file_name, str(out_folder / Path(file_name).name.replace("RAW", "CORR")))
            for i_proc, file_name in enumerate(file_batch)
        ])
        step_timer.done_step("Save")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Correction of {len(file_list)} files is finished")
 print(f"Total processing time {step_timer.timespan():.01f} s")
 print(f"Timing summary per batch of {n_cores_files} files:")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
-    print('Skipping plots')
-    import sys
+    print("Skipping plots as configured.")
+    sys.exit(0)
+elif not np.any(all_imgs_counts):
+    latex_warning(f"All sequence files contain no data for correction.")
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis, title=""):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
-    extent = [np.min(edges[1]), np.max(edges[1]),
-              np.min(edges[0]), np.max(edges[0])]
+    extent = np.array(
+        [np.nanmin(edges[1]), np.nanmax(edges[1]),
+         np.nanmin(edges[0]), np.nanmax(edges[0])])
    im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
                   norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_title(title)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 def get_trains_data(data_folder, source, include, detector_id, tid=None, modules=16, fillvalue=None, mod_starts_at=0):
    """Load single train for all module

    :param data_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param detector_id: The karabo id of the detector to get data for
    :param tid: Train Id to be loaded. First train is considered if None is given
    :param path: Path to find image data inside h5 file
    """
    try:
        run_data = RunDirectory(data_folder, include)
    except FileNotFoundError:
        warning(f'No corrected files for {include}. Skipping plots.')
-        import sys
        sys.exit(0)
    if tid is not None:
        tid, data = run_data.select(
            f'{detector_id}/DET/*', source).train_from_id(tid, keep_dims=True)
    else:
        # A first full trainId for all available modules is of interest.
        tid, data = next(run_data.select(
            f'{detector_id}/DET/*', source).trains(require_all=True, keep_dims=True))

    stacked_data = stack_detector_data(
        train=data, data=source, fillvalue=fillvalue, modules=modules,
        starts_at=mod_starts_at,
    )

    return tid, stacked_data
 ```

 %% Cell type:code id: tags:

 ``` python
 if "AGIPD500K" in karabo_id:
    geom = AGIPD_500K2GGeometry.from_origin()
 elif "AGIPD1M" in karabo_id:
    geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
        (-525, 625),
        (-550, -10),
        (520, -160),
        (542.5, 475),
    ])
 else:  # single module AGIPD detector
    geom = agipd_single_module_geometry()
 ```

 %% Cell type:code id: tags:

 ``` python
 include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
 mod_starts_at = 0 if nmods > 1 else modules[0]  # TODO: use CALCAT metadata for the detector.
 tid, corrected = get_trains_data(out_folder, 'image.data', include, karabo_id, modules=nmods, mod_starts_at=mod_starts_at)

 _, gains = get_trains_data(out_folder, 'image.gain', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 _, mask = get_trains_data(out_folder, 'image.mask', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 _, blshift = get_trains_data(out_folder, 'image.blShift', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 _, cellId = get_trains_data(out_folder, 'image.cellId', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 _, pulseId = get_trains_data(out_folder, 'image.pulseId', include, karabo_id, tid, modules=nmods, fillvalue=0, mod_starts_at=mod_starts_at)
 _, raw = get_trains_data(run_folder, 'image.data', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
 ```

-%% Cell type:markdown id: tags:
-
-### Signal vs. Analogue Gain ###
-
 %% Cell type:code id: tags:

 ``` python
-raw_float = raw.astype(np.float32)
-signal = raw[:, 0, ...]
+# As part of data reduction efforts, the DAQ now has an option to discard AGIPD gain data
+# when it is known that all data is in the same gain stage. In such cases, the gain data
+# will be set to zeros. Consequently, the signal vs. analog gain 2D histogram can be skipped.
 gain = raw[:, 1, ...]
-hist, bins_x, bins_y = calgs.histogram2d(
-    signal.flatten().astype(np.float32),
-    gain.flatten().astype(np.float32),
-    bins=(100, 100),
-    range=[
-        np.percentile(signal, [0.02, 99.8]),
-        np.percentile(gain, [0.02, 99.8]),
-        ],
+if gain.max() > 0:
+    signal = raw[:, 0, ...]
+    display(Markdown("### Signal vs. Analogue Gain"))
+    hist, bins_x, bins_y = calgs.histogram2d(
+        signal.flatten().astype(np.float32),
+        gain.flatten().astype(np.float32),
+        bins=(100, 100),
+        range=[
+            np.percentile(signal, [0.02, 99.8]),
+            np.percentile(gain, [0.02, 99.8]),
+            ],
    )
-do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
+    do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Digitized Gain ###

 The following plot shows plots signal vs. digitized gain

 %% Cell type:code id: tags:

 ``` python
 vmin, vmax = np.nanmin(corrected), np.nanmax(corrected)
 hist, bins_x, bins_y = calgs.histogram2d(
    corrected.flatten().astype(np.float32),
    gains.flatten().astype(np.float32), bins=(100, 3),
    range=[
        # The range boundaries and decided by DET expert.
        [max(vmin, -50), min(vmax, 8192)],
        [0, 3]
        ],
    )
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Gain statistics in %")
 table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
          f'{gains[gains==1].size/gains.size*100:.03f}',
          f'{gains[gains==2].size/gains.size*100:.03f}']]
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["High", "Medium", "Low"])))
 ```

 %% Cell type:markdown id: tags:

 ### Intensity per Pulse ###

 %% Cell type:code id: tags:

 ``` python
-pulse_range = [np.min(pulseId[pulseId>=0]), np.max(pulseId[pulseId>=0])]
+pulse_range = [np.nanmin(pulseId[pulseId>=0]), np.nanmax(pulseId[pulseId>=0])]


 def clamp(value, min_value, max_value):
    return max(min_value, min(value, max_value))


 # Modify pulse_range, if only one pulse is selected.
 if pulse_range[0] == pulse_range[1]:
    pulse_range = [0, pulse_range[1]+int(acq_rate)]

 mean_data = np.nanmean(corrected, axis=(2, 3))
-vmin, vmax = mean_data.min(), mean_data.max()
+vmin, vmax = np.nanmin(mean_data), np.nanmax(mean_data)
 hist, bins_x, bins_y = calgs.histogram2d(
    mean_data.flatten().astype(np.float32),
    pulseId.flatten().astype(np.float32),
    bins=(100, int(pulse_range[1])),
    range=[[clamp(vmin, -50, -0.2), min(vmax, 1000)], pulse_range],
 )
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id", title="Signal-Pulse ID")

 if vmax > 1000:  # a zoom out plot.
    hist, bins_x, bins_y = calgs.histogram2d(
        mean_data.flatten().astype(np.float32),
        pulseId.flatten().astype(np.float32),
        bins=(100,  int(pulse_range[1])),
        range=[[clamp(vmin, -50, -0.2), min(vmax, 20000)], pulse_range]
    )
    do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id", title="Signal-Pulse ID (Extended View)")
 ```

 %% Cell type:markdown id: tags:

 ### Baseline shift ###

 Estimated base-line shift with respect to the total ADU counts of corrected image.

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(blshift.flatten(), bins=100, log=True)
 _ = plt.xlabel('Baseline shift [ADU]')
 _ = plt.ylabel('Counts')
 _ = ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 10))
 corrected_ave = np.nansum(corrected, axis=(2, 3))
 plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)
 plt.xlim(np.nanpercentile(corrected_ave/10**6, [2, 98]))
 plt.grid()
 plt.xlabel('Illuminated corrected [MADU] ')
 _ = plt.ylabel('Estimated baseline shift [ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
 if cell_id_preview not in cellId[:, 0]:
    print(f"WARNING: The selected cell_id_preview value {cell_id_preview} is not available in the corrected data.")
    cell_id_preview = cellId[:, 0][0]
    cell_idx_preview = 0
    print(f"Previewing the first available cellId: {cell_id_preview}.")
 else:
    cell_idx_preview = np.where(cellId[:, 0] == cell_id_preview)[0][0]
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Raw preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown(f'Mean over images of the RAW data\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(raw[slice(*cell_sel.crange), 0, ...], axis=0)
    vmin, vmax = np.percentile(data, [5, 95])
    ax = geom.plot_data_fast(data, ax=ax, vmin=vmin, vmax=vmax, cmap=cmap)
    pass
 else:
    print("Skipping mean RAW preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'Single shot of the RAW data from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = np.percentile(raw[cell_idx_preview, 0, ...], [5, 95])
 ax = geom.plot_data_fast(
    raw[cell_idx_preview, 0, ...], ax=ax, vmin=vmin, vmax=vmax, cmap=cmap)
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Corrected preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown('### Mean CORRECTED Preview ###\n'))
    display(Markdown(f'A mean across train: {tid}\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(corrected, axis=0)
    vmax = np.nanpercentile(data, 99.8)
    ax = geom.plot_data_fast(data, ax=ax, vmin=max(-50, np.nanmin(data)), vmax=vmax, cmap=cmap)
    pass
 else:
    print("Skipping mean CORRECTED preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'A single shot of the CORRECTED image from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmax = np.nanpercentile(corrected[cell_idx_preview], 99.8)
 ax = geom.plot_data_fast(
    corrected[cell_idx_preview],
    ax=ax,
    vmin=max(-50, np.nanmin(corrected[cell_idx_preview])),
    vmax=vmax,
    cmap=cmap,
 )
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_idx_preview], 5, -50)
 nbins = int((vmax + 50) / 2)
 h = ax.hist(corrected[cell_idx_preview].flatten(),
            bins=nbins, range=(-50, vmax),
            histtype='stepfilled', log=True)
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ax.grid()
 plt.title(f'Log-scaled histogram for corrected data for cell {cell_idx_preview}')
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected, 10, -100)
 vmax = np.nanmax(corrected)
 if vmax > 50000:
    vmax = 50000
 nbins = int((vmax + 100) / 5)
 h = ax.hist(corrected.flatten(), bins=nbins,
            range=(-100, vmax), histtype='step', log=True, label = 'All')
 ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='High gain', color='green')
 ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Medium gain', color='red')
 ax.hist(corrected[gains == 2].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Low gain', color='yellow')
 ax.legend()
 ax.grid()
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 plt.title(f'Overlaid Histograms for corrected data for multiple gains')
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Maximum GAIN Preview ###\n'))
 display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(
    np.max(gains, axis=0), ax=ax,
    cmap=cmap, vmin=-0.3, vmax=2.3)  # Extend cmap for wrong gain values.
 pass
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'### Single Shot Bad Pixels ### \n'))
 display(Markdown(f'A single shot bad pixel map from cell {cell_id_preview} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(
    np.log2(mask[cell_idx_preview]), ax=ax, vmin=0, vmax=32, cmap=cmap)
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 if round_photons:
    display(Markdown('### Photonization histograms ###'))

    x_preround = (agipd_corr.hist_bins_preround[1:] + agipd_corr.hist_bins_preround[:-1]) / 2
    x_postround = (agipd_corr.hist_bins_postround[1:] + agipd_corr.hist_bins_postround[:-1]) / 2
    x_photons = np.arange(0, (x_postround[-1] + 1) / photon_energy)

    fig, ax = plt.subplots(ncols=1, nrows=1, clear=True)
    ax.plot(x_preround, agipd_corr.shared_hist_preround, '.-', color='C0')
    ax.bar(x_postround, agipd_corr.shared_hist_postround, photon_energy, color='C1', alpha=0.5)
    ax.set_yscale('log')
    ax.set_ylim(0, max(agipd_corr.shared_hist_preround.max(), agipd_corr.shared_hist_postround.max())*3)
    ax.set_xlim(x_postround[0], x_postround[-1]+1)
    ax.set_xlabel('Photon energy / keV')
    ax.set_ylabel('Intensity')
    ax.vlines(x_photons * photon_energy, *ax.get_ylim(), color='k', linestyle='dashed')

    phx = ax.twiny()
    phx.set_xlim(x_postround[0] / photon_energy, (x_postround[-1]+1)/photon_energy)
    phx.set_xticks(x_photons)
    phx.set_xlabel('# Photons')
    pass
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train  ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(
    np.mean(mask>0, axis=0), vmin=0, ax=ax, vmax=1, cmap=cmap)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train. Only Dark Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 cm = np.copy(mask)
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 ax = geom.plot_data_fast(
    np.mean(cm>0, axis=0), vmin=0, ax=ax, vmax=1, cmap=cmap)
 pass
 ```

 %% Cell type:markdown id: tags:

 # AGIPD Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/MID/202201/p002834/raw" # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/esobolev/pycal_litfrm/p002834/r0225"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 overwrite = False  # IGNORED, NEEDED FOR COMPATIBILITY.
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 train_ids = [-1] # train IDs to correct, set to -1 for all, range allowed
 run = 225 # runs to process, required

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_template = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 index_source_template = 'INDEX/{}/DET/{}:xtdf/'  # path in the HDF5 file to images
 ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information
 karabo_id_control = "MID_EXP_AGIPD1M1" # karabo-id for control device

 slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants, loaded in precorrection notebook

 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
 cal_db_interface = "tcp://max-exfl-cal001:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milliseconds
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
 cal_db_root = ""  # The calibration database root path to access constant files. e.g. accessing constants from the test database /gpfs/exfel/d/cal_tst/caldb_store.

 mem_cells = -1  # Number of memory cells used, set to 0 to automatically infer
 bias_voltage = -1  # bias voltage, set to 0 to use stored value in slow data.
 acq_rate = -1. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)
 max_pulses = [0, 352, 1] # range list [st, end, step] of memory cell indices to be processed within a train. 3 allowed maximum list input elements.
 mem_cells_db = -1  # set to a value different than 0 to use this value for DB queries
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction parameters
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
 cm_dark_fraction = 0.66 # threshold for fraction of  empty pixels to consider module enough dark to perform CM correction
 cm_dark_range = [-50.,30] # range for signal value ADU for pixel to be consider as a dark pixel
 cm_n_itr = 4 # number of iterations for common mode correction
 hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
 mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
 noisy_adc_threshold = 0.25 # threshold to mask complete adc
 ff_gain = 7.2 # conversion gain for absolute FlatField constants, while applying xray_gain
 photon_energy = -1.0 # photon energy in keV, non-positive value for XGM autodetection
 rounding_threshold = 0.5 # the fraction to round to down, 0.5 for standard rounding rule
 cs_mg_adjust = 7e3  # Value to adjust medium gain when correcting with current source. This is used when `adjust_mg_baseline` is True.

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 # TODO: Remove this boolean parameter an replace rel_gain_mode with it.
 rel_gain = False
 rel_gain_mode = "off"  # Select relative gain correction. Choices [`PC`, `CS`, `off`]. (`PC`: Pulse Capacitor, `CS`: Current Source, `off`: Disable relative gain correction). Default: off.
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 zero_nans = False # set NaN values in corrected data to 0
 zero_orange = False # set to 0 very negative and very large values in corrected data
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
 corr_asic_diag = False # if set, diagonal drop offs on ASICs are corrected
 force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold
 force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold
 mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
 common_mode = False # Common mode correction
 melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
 mask_zero_std = False # Mask pixels with zero standard deviation across train
 low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain
 round_photons = False  # Round to absolute number of photons, only use with gain corrections

+# Additional processing
+count_lit_pixels = False # Count the number of pixels registering photons
+spi_hitfinding = False  # Find hits using lit-pixel counter
+
 # Optional auxiliary devices
 use_ppu_device = ''  # Device ID for a pulse picker device to only process picked trains, empty string to disable
 ppu_train_offset = 0  # When using the pulse picker, offset between the PPU's sequence start and actually picked train
 require_ppu_trigger = False  # Optional protection against running without PPU or without triggering trains.

 use_litframe_finder = 'off' # Process only illuminated frames: 'off' - disable, 'device' - use online device data, 'offline' - use offline algorithm, 'auto' - choose online/offline source automatically (default)
 litframe_device_id = '' # Device ID for a lit frame finder device, empty string to auto detection
 energy_threshold = -1000 # The low limit for the energy (uJ) exposed by frames subject to processing. If -1000, selection by pulse energy is disabled
 use_super_selection = 'cm' # Make a common selection for entire run: 'off' - disable, 'final' - enable for final selection, 'cm' - enable only for common mode correction

 use_xgm_device = ''  # DoocsXGM device ID to obtain actual photon energy, operating condition else.

 # Output parameters
 recast_image_data = ''  # Cast data to a different dtype before saving
 compress_fields = ['gain', 'mask']  # Datasets in image group to compress.

+# SPI hit-finder parameters
+spi_hf_modules = [3, 4, 8, 15]  # Use specified modules for SPI hitfinding
+spi_hf_mode = "adaptive"  # The method to compute threshold for hitscores in SPI hitfinding: `fixed` or `adaptive`
+spi_hf_snr = 4.0  # Siginal-to-noise ration for adaptive threshold in SPI hitfinding
+spi_hf_min_scores = 100  # The minimal size of events to compute adaptive threshold in SPI hitfinding
+spi_hf_fixed_threshold = 0  # The fixed threshold value
+spi_hf_hitrate_window_size = 200  # The window size for runnig average of hitrate in trains
+spi_hf_miss_fraction = 1  # The fraction of misses to select along with hits
+spi_hf_miss_fraction_base = "hit"  # The base to compute the number of misses to select: the number of hits (`hit`) or misses (`miss`)
+
 # Plotting parameters
 skip_plots = False # exit after writing corrected files and metadata
 cell_id_preview = 1 # cell Id used for preview in single-shot plots
 cmap = "viridis"  # matplolib.colormap for almost all heatmap. Other options ['plasma', 'inferno', 'magma', 'cividis', 'jet', ...]

 # Parallelization parameters
 chunk_size = 1000  # Size of chunk for image-wise correction
 n_cores_correct = 16 # Number of chunks to be processed in parallel
 n_cores_files = 4 # Number of files to be processed in parallel
 sequences_per_node = 2 # number of sequence files per cluster node if run as SLURM job, set to 0 to not run SLURM parallel
 max_nodes = 8 # Maximum number of SLURM jobs to split correction work into
 max_tasks_per_worker = 1  # the number of tasks a correction pool worker process can complete before it will exit and be replaced with a fresh worker process. Leave as -1 to keep worker alive as long as pool.


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import math
 import multiprocessing
 import os
+import sys
 import warnings
 from datetime import timedelta
 from logging import warning
 from pathlib import Path

 import tabulate
 from dateutil import parser
 from IPython.display import Latex, Markdown, display

 warnings.filterwarnings('ignore')
 import h5py
 import matplotlib
 import matplotlib.pyplot as plt
 import yaml
 from extra_data import by_id, RunDirectory, stack_detector_data
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from matplotlib.colors import LogNorm

 matplotlib.use("agg")
 %matplotlib inline
 import numpy as np
 import seaborn as sns

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")

 import cal_tools.restful_config as rest_cfg
 from cal_tools import agipdalgs as calgs
 from cal_tools.agipdlib import (
    AgipdCorrections,
    AgipdCtrl,
    CellRange,
    LitFrameSelection,
 )
 from cal_tools.ana_tools import get_range
 from cal_tools.calcat_interface import (
    AGIPD_CalibrationData,
    CalCatError,
 )
 from cal_tools.enums import AgipdGainMode, BadPixels
 from cal_tools.plotting import agipd_single_module_geometry
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    calcat_creation_time,
+    latex_warning,
    map_modules_from_folder,
    module_index_to_qm,
    write_constants_fragment,
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_folder = in_folder / f'r{run:04d}'

 step_timer = StepTimer()
 ```

 %% Cell type:markdown id: tags:

 ## Evaluated parameters ##

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the hierarchy and dependability for correction booleans are defined
 corr_bools = {}

 cs_corr = False
 pc_corr = False
 if rel_gain_mode.lower() == "off":
    # TODO: Remove this part after replacing rel_gain with rel_gain_mode
    if rel_gain:
        pc_corr = True

 elif rel_gain_mode.lower() == "cs":
    cs_corr = True

 elif rel_gain_mode.lower() == "pc":
    pc_corr = True

 else:
    raise ValueError(
        "Selected `rel_gain_mode` is unexpected. "
        "Please select between CS or PC.")

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["cs_corr"] = cs_corr
    corr_bools["pc_corr"] = pc_corr
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["zero_nans"] = zero_nans
    corr_bools["zero_orange"] = zero_orange
    corr_bools["mask_noisy_adc"] = mask_noisy_adc
    corr_bools["force_hg_if_below"] = force_hg_if_below
    corr_bools["force_mg_if_below"] = force_mg_if_below
    corr_bools["common_mode"] = common_mode
    corr_bools["melt_snow"] = melt_snow
    corr_bools["mask_zero_std"] = mask_zero_std
    corr_bools["low_medium_gap"] = low_medium_gap
    corr_bools["round_photons"] = round_photons
+    corr_bools["count_lit_pixels"] = count_lit_pixels

 # Many corrections don't apply to fixed gain mode; will explicitly disable later if detected
 disable_for_fixed_gain = [
    "adjust_mg_baseline",
    "blc_set_min",
    "force_hg_if_below",
    "force_mg_if_below",
    "low_medium_gap",
    "melt_snow",
    "pc_corr",
    "cs_corr",
 ]
 ```

 %% Cell type:code id: tags:

 ``` python
 if sequences == [-1]:
    sequences = None

 dc = RunDirectory(run_folder)

 ctrl_src = ctrl_source_template.format(karabo_id_control)
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 index_src = index_source_template.format(karabo_id, receiver_template)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create output folder
 out_folder.mkdir(parents=True, exist_ok=True)

 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if "AGIPD1M" in karabo_id:
    nmods = 16
 elif "AGIPD500K" in karabo_id:
    nmods = 8
 else:
    nmods = 1

 # Evaluate requested modules
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    mod_indices = modules if nmods > 1 else [0]
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:  # TODO: fix this with the new CALCAT metadata for module indices.
    modules = [int(x[-2:]) for x in karabo_da]
    mod_indices = modules if nmods > 1 else [0]

 print("Process modules:", ', '.join(module_index_to_qm(x) for x in mod_indices))
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 ```

 %% Cell type:code id: tags:

 ``` python
+train_available = False
+
+for m in modules:
+    try:
+        # Attempt to select the module. If no trains are available, ValueError might be raised
+        if len(dc[instrument_src.format(m), 'image.data'].drop_empty_trains().train_ids) > 0:
+            train_available = True
+            break  # Found a module with available trains.
+    except ValueError as e:
+        warning(f"Missing module {m} from data: {e}")
+
+if not train_available:
+    # Execute this block if no modules with trains were found.
+    latex_warning("No trains available to correct for selected modules.")
+    sys.exit(0)
+```
+
+%% Cell type:code id: tags:
+
+``` python
 if use_ppu_device and use_ppu_device in dc.control_sources:
    # Obtain trains to process if using a pulse picker device and it's present.

    seq_start = dc[use_ppu_device, 'trainTrigger.sequenceStart.value'].ndarray()

    # The trains picked are the unique values of trainTrigger.sequenceStart
    # minus the first (previous trigger before this run).
    start_train_ids = np.unique(seq_start)[1:] + ppu_train_offset

    train_ids = []
    for train_id in start_train_ids:
        n_trains = dc[
            use_ppu_device, 'trainTrigger.numberOfTrains'
        ].select_trains(by_id[[train_id]]).ndarray()[0]
        train_ids.extend(list(range(train_id, train_id + n_trains)))

    if train_ids:
        print(f'PPU device {use_ppu_device} triggered for {len(train_ids)} train(s)')
    elif require_ppu_trigger:
        raise RuntimeError(f'PPU device {use_ppu_device} not triggered but required, aborting!')
    else:
        print(f'PPU device {use_ppu_device} not triggered, processing all valid trains')
        train_ids = None

 elif use_ppu_device:
    # PPU configured but not present.

    if require_ppu_trigger:
        raise RuntimeError(f'PPU device {use_ppu_device} required but not found, aborting!')
    else:
        print(f'PPU device {use_ppu_device} configured but not found, processing all valid trains')
        train_ids = None

 elif train_ids != [-1]:
    # Specific trains passed by parameter, convert to ndarray.
    train_ids = np.array(train_ids)

    print(f'Processing up to {len(train_ids)} manually selected train(s)')

 else:
    # No PPU configured.
    print(f'Processing all valid trains')
    train_ids = None
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mapped_files, _, total_sequences, _, _ =  map_modules_from_folder(
    str(in_folder), run, path_template, karabo_da, sequences
 )
 file_list = []

 # ToDo: Split table over pages
 print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
 table = []
 ti = 0
 for k, files in mapped_files.items():
    i = 0
    for f in list(files.queue):
        file_list.append(f)
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["#", "module", "# module", "file"])))
 file_list = sorted(file_list, key=lambda name: name[-10:])
 ```

 %% Cell type:code id: tags:

 ``` python
 first_mod_channel = sorted(modules)[0]
 instrument_src_first_mod = [
    s for s in list(dc.all_sources) if f"{first_mod_channel}CH" in s][0]

 agipd_cond = AgipdCtrl(
    run_dc=dc,
    image_src=instrument_src_first_mod,
    ctrl_src=ctrl_src,
    raise_error=False,  # to be able to process very old data without gain_setting value
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 offset = parser.parse(creation_date_offset)
 delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
 creation_time += delta
 print(f"Creation time: {creation_time}")

 if acq_rate == -1.:
    acq_rate = agipd_cond.get_acq_rate()
 if mem_cells == -1:
    mem_cells = agipd_cond.get_num_cells()
 # TODO: look for alternative for passing creation_time
 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == -1:
    for m in modules:
        bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control, module=m)
        # Accept non-zero value for a bias voltage from any module.
        if bias_voltage != 0.:
            break
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()
 if gain_mode == -1:
    gain_mode = agipd_cond.get_gain_mode()
 else:
    gain_mode = AgipdGainMode(gain_mode)
 ```

 %% Cell type:code id: tags:

 ``` python
 mem_cells_db = mem_cells if mem_cells_db == -1 else mem_cells_db

 print(f"Maximum memory cells to calibrate: {mem_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Using {creation_time} as creation time")
 print("Operating conditions are:")
 print(f"• Bias voltage: {bias_voltage}")
 print(f"• Memory cells: {mem_cells_db}")
 print(f"• Acquisition rate: {acq_rate}")
 print(f"• Gain setting: {gain_setting}")
 print(f"• Gain mode: {gain_mode.name}")
 print(f"• Integration time: {integration_time}")
 print(f"• Photon Energy: 9.2")
 ```

 %% Cell type:code id: tags:

 ``` python
 if gain_mode:
    for to_disable in disable_for_fixed_gain:
        if corr_bools.get(to_disable, False):
            warning(f"{to_disable} correction was requested, but does not apply to fixed gain mode")
            corr_bools[to_disable] = False
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_litframe_finder != 'off':
    from extra_redu import make_litframe_finder, LitFrameFinderError

    if use_litframe_finder not in ['auto', 'offline', 'online']:
        raise ValueError("Unexpected value in 'use_litframe_finder'.")

    inst = karabo_id_control[:3]
    litfrm = make_litframe_finder(inst, dc, litframe_device_id)
    try:
        get_data = {'auto': litfrm.read_or_process, 'offline': litfrm.process, 'online': litfrm.read}
        r = get_data[use_litframe_finder]()
        cell_sel = LitFrameSelection(r, train_ids, max_pulses, energy_threshold, use_super_selection)
+        print(f"{cell_sel.msg()}\n")
        cell_sel.print_report()
+        if np.count_nonzero(r.nLitFrame.value) == 0:  # No lit frames.
+            latex_warning(
+                "No lit frames identified using AGIPD LitFrameFinder."
+                " Offline correction will not be performed.")
+            sys.exit(0)
    except LitFrameFinderError as err:
+        print(cell_sel.msg())
        warning(f"Cannot use AgipdLitFrameFinder due to:\n{err}")
        cell_sel = CellRange(max_pulses, max_cells=mem_cells)
 else:
    # Use range selection
    cell_sel = CellRange(max_pulses, max_cells=mem_cells)
-
-print(cell_sel.msg())
+    print(cell_sel.msg())
 ```

 %% Cell type:code id: tags:

 ``` python
-if round_photons and photon_energy <= 0.0:
+photon_energy_warn = None
+if photon_energy <= 0.0:
    if use_xgm_device:
        # Try to obtain photon energy from XGM device.
        wavelength_data = dc[use_xgm_device, 'pulseEnergy.wavelengthUsed']

        try:
            from scipy.constants import h, c, e

            # Read wavelength as a single value and convert to hv.
            photon_energy = (h * c / e) / (wavelength_data.as_single_value(rtol=1e-2) * 1e-6)
            print(f'Obtained photon energy {photon_energy:.3f} keV from {use_xgm_device}')
        except ValueError:
-            warning('XGM source available but photon energy varies greater than 1%, '
-                 'photon rounding disabled!')
-            round_photons = False
+            photon_energy_warn = 'XGM source available but photon energy varies greater than 1%'
    else:
-        warning('Neither explicit photon energy nor XGM device configured, photon rounding disabled!')
-        round_photons = False
-elif round_photons:
-    print(f'Photon energy for rounding: {photon_energy:.3f} keV')
+        photon_energy_warn = 'Neither explicit photon energy nor XGM device configured'

-if round_photons and (rounding_threshold <= .0 or 1. <= rounding_threshold):
-    warning('Round threshould is out of (0, 1) range. Use standard 0.5 value.')
+rounding_threshold_warn = None
+if rounding_threshold <= .0 or 1. <= rounding_threshold:
+    rounding_threshold_warn = 'Round threshold is out of (0, 1) range. Use standard 0.5 value.'
    rounding_threshold = 0.5
+
+if round_photons:
+    if photon_energy_warn:
+        warning(photon_energy_warn + ', photon rounding disabled!')
+        round_photons = False
+    else:
+        print(f'Photon energy for rounding: {photon_energy:.3f} keV')
+        if rounding_threshold_warn:
+            warning(rounding_threshold_warn)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+if count_lit_pixels:
+    if round_photons:
+        data_units = 'photon'
+        litpx_threshold = 1.
+    else:
+        data_units = 'keV'
+        if photon_energy_warn:
+            warning(photon_energy_warn + '. Use 12 keV for lit-pixel counter threshold')
+            litpx_threshold = 12.
+        else:
+            litpx_threshold = photon_energy
+
+        if rounding_threshold_warn:
+            warning(rounding_threshold_warn)
+
+    if not xray_gain:
+        # convert photon energy to ADU (for AGIPD approx. 1 keV = 7 ADU)
+        # it looks that rounding to photons can be applied to data in ADU as well
+        litpx_threshold *= 7.
+        data_units = 'ADU'
+
+    litpx_threshold *= rounding_threshold
+    print(f"Count lit-pixels with signal above {litpx_threshold:.3g} {data_units}")
+else:
+    # dummy value, that is not expected to be used
+    litpx_threshold = 42
 ```

 %% Cell type:code id: tags:

 ``` python
 agipd_corr = AgipdCorrections(
    mem_cells,
    cell_sel,
    h5_data_path=instrument_src,
    h5_index_path=index_src,
    corr_bools=corr_bools,
    gain_mode=gain_mode,
    comp_threads=os.cpu_count() // n_cores_files,
    train_ids=train_ids
 )

 agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
 agipd_corr.hg_hard_threshold = hg_hard_threshold
 agipd_corr.mg_hard_threshold = mg_hard_threshold

 agipd_corr.cm_dark_min = cm_dark_range[0]
 agipd_corr.cm_dark_max = cm_dark_range[1]
 agipd_corr.cm_dark_fraction = cm_dark_fraction
 agipd_corr.cm_n_itr = cm_n_itr
 agipd_corr.noisy_adc_threshold = noisy_adc_threshold
 agipd_corr.ff_gain = ff_gain
 agipd_corr.photon_energy = photon_energy
 agipd_corr.rounding_threshold = rounding_threshold
 agipd_corr.cs_mg_adjust = cs_mg_adjust
+agipd_corr.litpx_threshold = litpx_threshold

 agipd_corr.compress_fields = compress_fields
 if recast_image_data:
    agipd_corr.recast_image_fields['data'] = np.dtype(recast_image_data)
 ```

 %% Cell type:markdown id: tags:

 ## Retrieving constants

 %% Cell type:code id: tags:

 ``` python
 def get_constants_and_update_metadata(cal_data, main_metadata, constants):
    try:
        metadata = cal_data.metadata(constants)
        for key, value in metadata.items():
            main_metadata.setdefault(key, {}).update(value)
    except CalCatError as e:  # TODO: replace when API errors are improved.
        warning(f"CalCatError: {e}")
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 # Instantiate agipd_cal with the read operating conditions.
 agipd_cal = AGIPD_CalibrationData(
    detector_name=karabo_id,
    modules=karabo_da,
    sensor_bias_voltage=bias_voltage,
    memory_cells=mem_cells,
    acquisition_rate=acq_rate,
    integration_time=integration_time,
    source_energy=9.2,
    gain_mode=gain_mode,
    gain_setting=gain_setting,
    event_at=creation_time,
    client=rest_cfg.calibration_client(),
    caldb_root=Path(cal_db_root) if cal_db_root else None,
 )

 # Prepare lists of expected calibrations
 dark_constants = ["Offset", "Noise", "BadPixelsDark"]
 if not gain_mode:  # Adaptive gain
    dark_constants.append("ThresholdsDark")

 agipd_metadata = agipd_cal.metadata(dark_constants)

 agipd_cal.gain_mode = None  # gain_mode is not used for gain constants
 pc_constants, ff_constants, cs_constants = [], [], []

 if agipd_corr.corr_bools.get('xray_corr'):
    ff_constants = list(agipd_cal.illuminated_calibrations)
    get_constants_and_update_metadata(
        agipd_cal, agipd_metadata, ff_constants)

 if any(agipd_corr.relgain_bools):

    if cs_corr:
        # Integration time is not used with CS
        agipd_cal.integration_time = None
        cs_constants = ["SlopesCS", "BadPixelsCS"]
        get_constants_and_update_metadata(
            agipd_cal, agipd_metadata, cs_constants)


    else:  # rel_gain_mode == "pc" or "off"
        pc_constants = ["SlopesPC", "BadPixelsPC"]
        get_constants_and_update_metadata(
            agipd_cal, agipd_metadata, pc_constants)

 step_timer.done_step("Constants were retrieved in")

 relgain_alias = "CS" if cs_corr else "PC"
 print("Preparing constants ("
      f"FF: {agipd_corr.corr_bools.get('xray_corr', False)}, "
      f"{relgain_alias}: {any(agipd_corr.relgain_bools)}, "
      f"BLC: {any(agipd_corr.blc_bools)})")
 # Display retrieved calibration constants timestamps
 agipd_cal.display_markdown_retrieved_constants(metadata=agipd_metadata)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Validate constants availability and exclude modules with no offsets.
 for da, calibrations in agipd_metadata.items():
    mod = modules[karabo_da.index(da)]
    # Constants to error out for when missing.
    error_missing_constants = {"Offset"}
    if not gain_mode:
        error_missing_constants |= {"ThresholdsDark"}

    error_missing_constants -= set(calibrations)
    if error_missing_constants:
        warning(f"Offset constant is not available to correct {da}.")
        # Remove module from files to process.
        del mapped_files[module_index_to_qm(mod)]
        karabo_da.remove(da)
        modules.remove(mod)

    warn_missing_constants = set(dark_constants + pc_constants + ff_constants + cs_constants)
    warn_missing_constants -= error_missing_constants
    warn_missing_constants -= set(calibrations)
    if warn_missing_constants:
        warning(f"Constants {warn_missing_constants} were not retrieved for {da}.")

 if not mapped_files:  # Offsets are missing for all modules.
    raise Exception("Could not find offset constants for any modules, will not correct data.")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Record constant details in YAML metadata
 write_constants_fragment(
    out_folder=(metadata_folder or out_folder),
    det_metadata=agipd_metadata,
    caldb_root=agipd_cal.caldb_root)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Load calibration constants to RAM
 agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))

 def load_constants(da, module):
    """
    Initialize constants data from previously retrieved metadata.

    Args:
        da (str): Data Aggregator (Karabo DA)
        module (int): Module index

    Returns:
        (int, dict, str): Module index, {constant name: creation time}, Karabo DA
    """
    const_data = dict()
    variant = dict()
    for cname, mdata in agipd_metadata[da].items():
        dataset = mdata["dataset"]
        with h5py.File(agipd_cal.caldb_root / mdata["path"], "r") as cf:  # noqa
            const_data[cname] = np.copy(cf[f"{dataset}/data"])
            variant[cname] = cf[dataset].attrs["variant"] if cf[dataset].attrs.keys() else 0  # noqa
    agipd_corr.init_constants(const_data, module, variant)


 step_timer.start()
 with multiprocessing.Pool(processes=len(modules)) as pool:
    pool.starmap(load_constants, zip(karabo_da, modules))
 step_timer.done_step(f'Constants were loaded in ')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Store timestamps for Offset, SlopesPC/SlopesCS, and SlopesFF
 # in YAML file for time-summary table.
 timestamps = {}

 for mod, mod_mdata in agipd_metadata.items():
    modno = int(mod[-2:])

    module_timestamps = {}

    # Store few time stamps if exists
    # Add NA to keep array structure
    for key in ['Offset', f'Slopes{relgain_alias}', 'SlopesFF']:
        if key in mod_mdata:
            module_timestamps[key] = mod_mdata[key]["begin_validity_at"]
        else:
            module_timestamps[key] = "NA"

    timestamps[module_index_to_qm(modno)] = module_timestamps

 seq = sequences[0] if sequences else 0

 with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fd:
    yaml.safe_dump({"time-summary": {f"S{seq}": timestamps}}, fd)
 ```

 %% Cell type:markdown id: tags:

 ## Data processing ##

 %% Cell type:code id: tags:

 ``` python
 # allocate memory for images and hists
 n_images_max = mem_cells * 256
 data_shape = (n_images_max, 512, 128)
 agipd_corr.allocate_images(data_shape, n_cores_files)
 ```

 %% Cell type:code id: tags:

 ``` python
 def batches(l, batch_size):
    """Group a list into batches of (up to) batch_size elements"""
    start = 0
    while start < len(l):
        yield l[start:start + batch_size]
        start += batch_size
 ```

 %% Cell type:code id: tags:

 ``` python
 def imagewise_chunks(img_counts):
    """Break up the loaded data into chunks of up to chunk_size

    Yields (file data slot, start index, stop index)
    """
-
-
    for i_proc, n_img in enumerate(img_counts):
        n_chunks = math.ceil(n_img / chunk_size)
        for i in range(n_chunks):
            yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
+all_imgs_counts = []
 if max_tasks_per_worker == -1:
    max_tasks_per_worker = None
 with multiprocessing.Pool(maxtasksperchild=max_tasks_per_worker) as pool:
    step_timer.done_step('Started pool')

    for file_batch in batches(file_list, n_cores_files):
        # TODO: Move some printed output to logging or similar
        print(f"Processing next {len(file_batch)} files")
        step_timer.start()
        img_counts = pool.starmap(
            agipd_corr.read_file,
            zip(range(len(file_batch)), file_batch, [not common_mode]*len(file_batch))
        )
        step_timer.done_step(f'Loading data from files')

-        if img_counts == 0:
+        all_imgs_counts += img_counts
+        if not np.any(img_counts):
            # Skip any further processing and output if there are no images to
            # correct in this file.
            continue

        if mask_zero_std:
            # Evaluate zero-data-std mask
            pool.starmap(
                agipd_corr.mask_zero_std, itertools.product(
                    range(len(file_batch)),
                    np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
                )
            )
            step_timer.done_step('Mask 0 std')

        # Perform offset image-wise correction
        pool.starmap(agipd_corr.offset_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Offset correction")

        if blc_noise or blc_stripes or blc_hmatch:
            # Perform image-wise correction
            pool.starmap(agipd_corr.baseline_correction, imagewise_chunks(img_counts))
            step_timer.done_step("Base-line shift correction")

        if common_mode:
            # In common mode corrected is enabled.
            # Cell selection is only activated after common mode correction.
            # Perform cross-file correction parallel over asics
            image_files_idx = [i_proc for i_proc, n_img in enumerate(img_counts) if n_img > 0]
            pool.starmap(agipd_corr.cm_correction, itertools.product(
                image_files_idx, range(16)  # 16 ASICs per module
            ))
            step_timer.done_step("Common-mode correction")

            img_counts = pool.map(agipd_corr.apply_selected_pulses, image_files_idx)
            step_timer.done_step("Applying selected cells after common mode correction")

        # Perform image-wise correction"
        pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Gain corrections")

+        # Peform additional processing
+        if count_lit_pixels:
+            pool.starmap(agipd_corr.litpixel_counter, imagewise_chunks(img_counts))
+            step_timer.done_step("Lit-pixel counting")
+
        # Save corrected data
        pool.starmap(agipd_corr.write_file, [
            (i_proc, file_name, str(out_folder / Path(file_name).name.replace("RAW", "CORR")))
            for i_proc, file_name in enumerate(file_batch)
        ])
        step_timer.done_step("Save")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Correction of {len(file_list)} files is finished")
 print(f"Total processing time {step_timer.timespan():.01f} s")
 print(f"Timing summary per batch of {n_cores_files} files:")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
-    print('Skipping plots')
-    import sys
+    print("Skipping plots as configured.")
+    sys.exit(0)
+elif not np.any(all_imgs_counts):
+    latex_warning(f"All sequence files contain no data for correction.")
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis, title=""):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
-    extent = [np.min(edges[1]), np.max(edges[1]),
-              np.min(edges[0]), np.max(edges[0])]
+    extent = np.array(
+        [np.nanmin(edges[1]), np.nanmax(edges[1]),
+         np.nanmin(edges[0]), np.nanmax(edges[0])])
    im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
                   norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_title(title)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 def get_trains_data(data_folder, source, include, detector_id, tid=None, modules=16, fillvalue=None, mod_starts_at=0):
    """Load single train for all module

    :param data_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param detector_id: The karabo id of the detector to get data for
    :param tid: Train Id to be loaded. First train is considered if None is given
    :param path: Path to find image data inside h5 file
    """
    try:
        run_data = RunDirectory(data_folder, include)
    except FileNotFoundError:
        warning(f'No corrected files for {include}. Skipping plots.')
-        import sys
        sys.exit(0)
    if tid is not None:
        tid, data = run_data.select(
            f'{detector_id}/DET/*', source).train_from_id(tid, keep_dims=True)
    else:
        # A first full trainId for all available modules is of interest.
        tid, data = next(run_data.select(
            f'{detector_id}/DET/*', source).trains(require_all=True, keep_dims=True))

    stacked_data = stack_detector_data(
        train=data, data=source, fillvalue=fillvalue, modules=modules,
        starts_at=mod_starts_at,
    )

    return tid, stacked_data
 ```

 %% Cell type:code id: tags:

 ``` python
 if "AGIPD500K" in karabo_id:
    geom = AGIPD_500K2GGeometry.from_origin()
 elif "AGIPD1M" in karabo_id:
    geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
        (-525, 625),
        (-550, -10),
        (520, -160),
        (542.5, 475),
    ])
 else:  # single module AGIPD detector
    geom = agipd_single_module_geometry()
 ```

 %% Cell type:code id: tags:

 ``` python
 include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
 mod_starts_at = 0 if nmods > 1 else modules[0]  # TODO: use CALCAT metadata for the detector.
 tid, corrected = get_trains_data(out_folder, 'image.data', include, karabo_id, modules=nmods, mod_starts_at=mod_starts_at)

 _, gains = get_trains_data(out_folder, 'image.gain', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 _, mask = get_trains_data(out_folder, 'image.mask', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 _, blshift = get_trains_data(out_folder, 'image.blShift', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 _, cellId = get_trains_data(out_folder, 'image.cellId', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 _, pulseId = get_trains_data(out_folder, 'image.pulseId', include, karabo_id, tid, modules=nmods, fillvalue=0, mod_starts_at=mod_starts_at)
 _, raw = get_trains_data(run_folder, 'image.data', include, karabo_id, tid, modules=nmods, mod_starts_at=mod_starts_at)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
 ```

-%% Cell type:markdown id: tags:
-
-### Signal vs. Analogue Gain ###
-
 %% Cell type:code id: tags:

 ``` python
-raw_float = raw.astype(np.float32)
-signal = raw[:, 0, ...]
+# As part of data reduction efforts, the DAQ now has an option to discard AGIPD gain data
+# when it is known that all data is in the same gain stage. In such cases, the gain data
+# will be set to zeros. Consequently, the signal vs. analog gain 2D histogram can be skipped.
 gain = raw[:, 1, ...]
-hist, bins_x, bins_y = calgs.histogram2d(
-    signal.flatten().astype(np.float32),
-    gain.flatten().astype(np.float32),
-    bins=(100, 100),
-    range=[
-        np.percentile(signal, [0.02, 99.8]),
-        np.percentile(gain, [0.02, 99.8]),
-        ],
+if gain.max() > 0:
+    signal = raw[:, 0, ...]
+    display(Markdown("### Signal vs. Analogue Gain"))
+    hist, bins_x, bins_y = calgs.histogram2d(
+        signal.flatten().astype(np.float32),
+        gain.flatten().astype(np.float32),
+        bins=(100, 100),
+        range=[
+            np.percentile(signal, [0.02, 99.8]),
+            np.percentile(gain, [0.02, 99.8]),
+            ],
    )
-do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
+    do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Digitized Gain ###

 The following plot shows plots signal vs. digitized gain

 %% Cell type:code id: tags:

 ``` python
 vmin, vmax = np.nanmin(corrected), np.nanmax(corrected)
 hist, bins_x, bins_y = calgs.histogram2d(
    corrected.flatten().astype(np.float32),
    gains.flatten().astype(np.float32), bins=(100, 3),
    range=[
        # The range boundaries and decided by DET expert.
        [max(vmin, -50), min(vmax, 8192)],
        [0, 3]
        ],
    )
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Gain statistics in %")
 table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
          f'{gains[gains==1].size/gains.size*100:.03f}',
          f'{gains[gains==2].size/gains.size*100:.03f}']]
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["High", "Medium", "Low"])))
 ```

 %% Cell type:markdown id: tags:

 ### Intensity per Pulse ###

 %% Cell type:code id: tags:

 ``` python
-pulse_range = [np.min(pulseId[pulseId>=0]), np.max(pulseId[pulseId>=0])]
+pulse_range = [np.nanmin(pulseId[pulseId>=0]), np.nanmax(pulseId[pulseId>=0])]


 def clamp(value, min_value, max_value):
    return max(min_value, min(value, max_value))


 # Modify pulse_range, if only one pulse is selected.
 if pulse_range[0] == pulse_range[1]:
    pulse_range = [0, pulse_range[1]+int(acq_rate)]

 mean_data = np.nanmean(corrected, axis=(2, 3))
-vmin, vmax = mean_data.min(), mean_data.max()
+vmin, vmax = np.nanmin(mean_data), np.nanmax(mean_data)
 hist, bins_x, bins_y = calgs.histogram2d(
    mean_data.flatten().astype(np.float32),
    pulseId.flatten().astype(np.float32),
    bins=(100, int(pulse_range[1])),
    range=[[clamp(vmin, -50, -0.2), min(vmax, 1000)], pulse_range],
 )
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id", title="Signal-Pulse ID")

 if vmax > 1000:  # a zoom out plot.
    hist, bins_x, bins_y = calgs.histogram2d(
        mean_data.flatten().astype(np.float32),
        pulseId.flatten().astype(np.float32),
        bins=(100,  int(pulse_range[1])),
        range=[[clamp(vmin, -50, -0.2), min(vmax, 20000)], pulse_range]
    )
    do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id", title="Signal-Pulse ID (Extended View)")
 ```

 %% Cell type:markdown id: tags:

 ### Baseline shift ###

 Estimated base-line shift with respect to the total ADU counts of corrected image.

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(blshift.flatten(), bins=100, log=True)
 _ = plt.xlabel('Baseline shift [ADU]')
 _ = plt.ylabel('Counts')
 _ = ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 10))
 corrected_ave = np.nansum(corrected, axis=(2, 3))
 plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)
 plt.xlim(np.nanpercentile(corrected_ave/10**6, [2, 98]))
 plt.grid()
 plt.xlabel('Illuminated corrected [MADU] ')
 _ = plt.ylabel('Estimated baseline shift [ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
 if cell_id_preview not in cellId[:, 0]:
    print(f"WARNING: The selected cell_id_preview value {cell_id_preview} is not available in the corrected data.")
    cell_id_preview = cellId[:, 0][0]
    cell_idx_preview = 0
    print(f"Previewing the first available cellId: {cell_id_preview}.")
 else:
    cell_idx_preview = np.where(cellId[:, 0] == cell_id_preview)[0][0]
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Raw preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown(f'Mean over images of the RAW data\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(raw[slice(*cell_sel.crange), 0, ...], axis=0)
    vmin, vmax = np.percentile(data, [5, 95])
    ax = geom.plot_data_fast(data, ax=ax, vmin=vmin, vmax=vmax, cmap=cmap)
    pass
 else:
    print("Skipping mean RAW preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'Single shot of the RAW data from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = np.percentile(raw[cell_idx_preview, 0, ...], [5, 95])
 ax = geom.plot_data_fast(
    raw[cell_idx_preview, 0, ...], ax=ax, vmin=vmin, vmax=vmax, cmap=cmap)
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Corrected preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown('### Mean CORRECTED Preview ###\n'))
    display(Markdown(f'A mean across train: {tid}\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(corrected, axis=0)
    vmax = np.nanpercentile(data, 99.8)
    ax = geom.plot_data_fast(data, ax=ax, vmin=max(-50, np.nanmin(data)), vmax=vmax, cmap=cmap)
    pass
 else:
    print("Skipping mean CORRECTED preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'A single shot of the CORRECTED image from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmax = np.nanpercentile(corrected[cell_idx_preview], 99.8)
 ax = geom.plot_data_fast(
    corrected[cell_idx_preview],
    ax=ax,
    vmin=max(-50, np.nanmin(corrected[cell_idx_preview])),
    vmax=vmax,
    cmap=cmap,
 )
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_idx_preview], 5, -50)
 nbins = int((vmax + 50) / 2)
 h = ax.hist(corrected[cell_idx_preview].flatten(),
            bins=nbins, range=(-50, vmax),
            histtype='stepfilled', log=True)
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ax.grid()
 plt.title(f'Log-scaled histogram for corrected data for cell {cell_idx_preview}')
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected, 10, -100)
 vmax = np.nanmax(corrected)
 if vmax > 50000:
    vmax = 50000
 nbins = int((vmax + 100) / 5)
 h = ax.hist(corrected.flatten(), bins=nbins,
            range=(-100, vmax), histtype='step', log=True, label = 'All')
 ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='High gain', color='green')
 ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Medium gain', color='red')
 ax.hist(corrected[gains == 2].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Low gain', color='yellow')
 ax.legend()
 ax.grid()
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 plt.title(f'Overlaid Histograms for corrected data for multiple gains')
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Maximum GAIN Preview ###\n'))
 display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(
    np.max(gains, axis=0), ax=ax,
    cmap=cmap, vmin=-0.3, vmax=2.3)  # Extend cmap for wrong gain values.
 pass
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'### Single Shot Bad Pixels ### \n'))
 display(Markdown(f'A single shot bad pixel map from cell {cell_id_preview} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(
    np.log2(mask[cell_idx_preview]), ax=ax, vmin=0, vmax=32, cmap=cmap)
 pass
 ```

 %% Cell type:code id: tags:

 ``` python
 if round_photons:
    display(Markdown('### Photonization histograms ###'))

    x_preround = (agipd_corr.hist_bins_preround[1:] + agipd_corr.hist_bins_preround[:-1]) / 2
    x_postround = (agipd_corr.hist_bins_postround[1:] + agipd_corr.hist_bins_postround[:-1]) / 2
    x_photons = np.arange(0, (x_postround[-1] + 1) / photon_energy)

    fig, ax = plt.subplots(ncols=1, nrows=1, clear=True)
    ax.plot(x_preround, agipd_corr.shared_hist_preround, '.-', color='C0')
    ax.bar(x_postround, agipd_corr.shared_hist_postround, photon_energy, color='C1', alpha=0.5)
    ax.set_yscale('log')
    ax.set_ylim(0, max(agipd_corr.shared_hist_preround.max(), agipd_corr.shared_hist_postround.max())*3)
    ax.set_xlim(x_postround[0], x_postround[-1]+1)
    ax.set_xlabel('Photon energy / keV')
    ax.set_ylabel('Intensity')
    ax.vlines(x_photons * photon_energy, *ax.get_ylim(), color='k', linestyle='dashed')

    phx = ax.twiny()
    phx.set_xlim(x_postround[0] / photon_energy, (x_postround[-1]+1)/photon_energy)
    phx.set_xticks(x_photons)
    phx.set_xlabel('# Photons')
    pass
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train  ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(
    np.mean(mask>0, axis=0), vmin=0, ax=ax, vmax=1, cmap=cmap)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train. Only Dark Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 cm = np.copy(mask)
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 ax = geom.plot_data_fast(
    np.mean(cm>0, axis=0), vmin=0, ax=ax, vmax=1, cmap=cmap)
 pass
 ```

--- a/notebooks/AGIPD/AGIPD_Correct_and_Verify_Summary_NBC.ipynb
+++ b/notebooks/AGIPD/AGIPD_Correct_and_Verify_Summary_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Summary of the AGIPD offline correction #

 %% Cell type:code id: tags:

 ``` python
 run = 11 # runs to process, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/AGIPD_Corr" # path to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 karabo_id = "SPB_DET_AGIPD1M-1" # karabo karabo_id
 modules = [-1]
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 rel_gain_mode = "off"  # Select relative gain correction. Choices [`PC`, `CS`, `off`]. (`PC`: Pulse Capacitor, `CS`: Current Source, `off`: Disable relative gain correction). Default: off.
+# Additional processing
+count_lit_pixels = False  # Count the number of pixels registering photons
+spi_hitfinding = False  # Find hits using lit-pixel counter
+
+# SPI hit-finder parameters
+spi_hf_modules = [3, 4, 8, 15]  # Use specified modules for SPI hitfinding
+spi_hf_mode = "adaptive"  # The method to compute threshold for hitscores in SPI hitfinding: `fixed` or `adaptive`
+spi_hf_snr = 4.0  # Siginal-to-noise ration for adaptive threshold in SPI hitfinding
+spi_hf_min_scores = 100  # The minimal size of events to compute adaptive threshold in SPI hitfinding
+spi_hf_fixed_threshold = 0  # The fixed threshold value
+spi_hf_hitrate_window_size = 200  # The window size for runnig average of hitrate in trains
+spi_hf_miss_fraction = 1  # The fraction of misses to select along with hits
+spi_hf_miss_fraction_base = "hit"  # The base to compute the number of misses to select: the number of hits (`hit`) or misses (`miss`)
 ```

 %% Cell type:code id: tags:

 ``` python
 from pathlib import Path
+from logging import warning

 import yaml

 import tabulate
 from cal_tools.tools import CalibrationMetadata
-from IPython.display import Latex, display
+from IPython.display import Latex, display, Markdown
+
+import matplotlib
+import matplotlib.pyplot as plt
+matplotlib.use("agg")
+%matplotlib inline
+
+from extra_data import RunDirectory
+from extra_redu.fileutils import StackedPulseSource, exdf_save
+from extra_redu.spi import SpiHitfinder
 ```

 %% Cell type:code id: tags:

 ``` python
 out_folder = Path(out_folder)
 metadata = CalibrationMetadata(metadata_folder or out_folder)
 const_dict = metadata.setdefault("retrieved-constants", {})
 time_dict = const_dict.setdefault("time-summary", {})

 # Extracting Instrument string
 instrument = karabo_id.split("_")[0]
 # Evaluate detector instance for mapping
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 # This is needed only if AGIPD Correction notebook had no precorrection notebooks for retrieving constants
 # gather all generated sequence yml files for time summary of retrieved constant under retrieved-constants in metadata.yml
 for fn in sorted(out_folder.glob("retrieved_constants_*.yml")):
    with fn.open("r") as fd:
        fdict = yaml.safe_load(fd)
    # append different sequences' time summary to the main yaml
    time_dict.update(fdict["time-summary"])
    fn.unlink()

 metadata.save()
 ```

 %% Cell type:code id: tags:

 ``` python
 def print_const_table(const):
    print(f"{const} constants were injected on:")
    table_data = {}
    for seq, mod_data in time_dict.items():
        for mod, const_data in mod_data.items():
            timestamp = const_data[const]
            table_data.setdefault(timestamp, []).append(f"{seq}:{mod}")
    table = []
    if not len(table_data):
        table.append(["No constants retrieved"])
    elif len(table_data) == 1:
        table.append([[*table_data][0], "All modules"])
    else:
        for timestamp, seqmod in table_data.items():
            table.append([timestamp, seqmod[0]])
            for further_module in seqmod[1:]:
                table.append(["", further_module])
    display(Latex(tabulate.tabulate(table,
                 tablefmt="latex",
                 headers=["Timestamps", "Modules and sequences"])))
 rel_gain_alias = "CS" if rel_gain_mode.lower() == "cs" else "PC"  # 'off' or 'pc'
 for const in ['Offset', f'Slopes{rel_gain_alias}', 'SlopesFF']:
    print_const_table(const)
 ```
+
+%% Cell type:code id: tags:
+
+``` python
+if spi_hitfinding and not count_lit_pixels:
+    # Operators are not expected to enable SPI hitfidnig without lit-pixels
+    # counting. If this happens, warn on the mismatch in the configuration.
+    warning("SPI hitfinding will be skipped because the required lit-pixel "
+            "counting is disabled. To run hitfinding, enable also lit-pixel "
+            "counting with the `--count-lit-pixels` option.")
+
+if spi_hitfinding and count_lit_pixels:
+    display(Markdown("# SPI hit finding"))
+
+    dc = RunDirectory(out_folder)
+    litpx_src = StackedPulseSource.from_datacollection(
+        dc, f"{karabo_id}/CORR/(?P<key>\d+)CH0:output", "litpx")
+
+    hitfinder = SpiHitfinder(
+        modules=spi_hf_modules,
+        mode=spi_hf_mode,
+        snr=spi_hf_snr,
+        min_scores=spi_hf_min_scores,
+        fixed_threshold=spi_hf_fixed_threshold,
+        hitrate_window_size=spi_hf_hitrate_window_size,
+        miss_fraction=spi_hf_miss_fraction,
+        miss_fraction_base=spi_hf_miss_fraction_base,
+    )
+    hitfinder.find_hits(litpx_src)
+
+    # write hit-finder data in file
+    sources = {
+        f"{karabo_id}/REDU/SPI_HITFINDER": hitfinder,
+    }
+    exdf_save(out_folder, "REDU00", run, sources, sequence_size=3500)
+
+    # draw plots
+    display(Markdown("## Hit-rate plot"))
+    hitfinder.plot_hitrate()
+    plt.show()
+
+    display(Markdown("## Hitscore histogram"))
+    hitfinder.plot_hitscore_hist()
+    plt.show()
+
+    display(Markdown("## Hitscore plots"))
+    hitfinder.plot_hitscore()
+```

 %% Cell type:markdown id: tags:

 # Summary of the AGIPD offline correction #

 %% Cell type:code id: tags:

 ``` python
 run = 11 # runs to process, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/AGIPD_Corr" # path to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 karabo_id = "SPB_DET_AGIPD1M-1" # karabo karabo_id
 modules = [-1]
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 rel_gain_mode = "off"  # Select relative gain correction. Choices [`PC`, `CS`, `off`]. (`PC`: Pulse Capacitor, `CS`: Current Source, `off`: Disable relative gain correction). Default: off.
+# Additional processing
+count_lit_pixels = False  # Count the number of pixels registering photons
+spi_hitfinding = False  # Find hits using lit-pixel counter
+
+# SPI hit-finder parameters
+spi_hf_modules = [3, 4, 8, 15]  # Use specified modules for SPI hitfinding
+spi_hf_mode = "adaptive"  # The method to compute threshold for hitscores in SPI hitfinding: `fixed` or `adaptive`
+spi_hf_snr = 4.0  # Siginal-to-noise ration for adaptive threshold in SPI hitfinding
+spi_hf_min_scores = 100  # The minimal size of events to compute adaptive threshold in SPI hitfinding
+spi_hf_fixed_threshold = 0  # The fixed threshold value
+spi_hf_hitrate_window_size = 200  # The window size for runnig average of hitrate in trains
+spi_hf_miss_fraction = 1  # The fraction of misses to select along with hits
+spi_hf_miss_fraction_base = "hit"  # The base to compute the number of misses to select: the number of hits (`hit`) or misses (`miss`)
 ```

 %% Cell type:code id: tags:

 ``` python
 from pathlib import Path
+from logging import warning

 import yaml

 import tabulate
 from cal_tools.tools import CalibrationMetadata
-from IPython.display import Latex, display
+from IPython.display import Latex, display, Markdown
+
+import matplotlib
+import matplotlib.pyplot as plt
+matplotlib.use("agg")
+%matplotlib inline
+
+from extra_data import RunDirectory
+from extra_redu.fileutils import StackedPulseSource, exdf_save
+from extra_redu.spi import SpiHitfinder
 ```

 %% Cell type:code id: tags:

 ``` python
 out_folder = Path(out_folder)
 metadata = CalibrationMetadata(metadata_folder or out_folder)
 const_dict = metadata.setdefault("retrieved-constants", {})
 time_dict = const_dict.setdefault("time-summary", {})

 # Extracting Instrument string
 instrument = karabo_id.split("_")[0]
 # Evaluate detector instance for mapping
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 # This is needed only if AGIPD Correction notebook had no precorrection notebooks for retrieving constants
 # gather all generated sequence yml files for time summary of retrieved constant under retrieved-constants in metadata.yml
 for fn in sorted(out_folder.glob("retrieved_constants_*.yml")):
    with fn.open("r") as fd:
        fdict = yaml.safe_load(fd)
    # append different sequences' time summary to the main yaml
    time_dict.update(fdict["time-summary"])
    fn.unlink()

 metadata.save()
 ```

 %% Cell type:code id: tags:

 ``` python
 def print_const_table(const):
    print(f"{const} constants were injected on:")
    table_data = {}
    for seq, mod_data in time_dict.items():
        for mod, const_data in mod_data.items():
            timestamp = const_data[const]
            table_data.setdefault(timestamp, []).append(f"{seq}:{mod}")
    table = []
    if not len(table_data):
        table.append(["No constants retrieved"])
    elif len(table_data) == 1:
        table.append([[*table_data][0], "All modules"])
    else:
        for timestamp, seqmod in table_data.items():
            table.append([timestamp, seqmod[0]])
            for further_module in seqmod[1:]:
                table.append(["", further_module])
    display(Latex(tabulate.tabulate(table,
                 tablefmt="latex",
                 headers=["Timestamps", "Modules and sequences"])))
 rel_gain_alias = "CS" if rel_gain_mode.lower() == "cs" else "PC"  # 'off' or 'pc'
 for const in ['Offset', f'Slopes{rel_gain_alias}', 'SlopesFF']:
    print_const_table(const)
 ```
+
+%% Cell type:code id: tags:
+
+``` python
+if spi_hitfinding and not count_lit_pixels:
+    # Operators are not expected to enable SPI hitfidnig without lit-pixels
+    # counting. If this happens, warn on the mismatch in the configuration.
+    warning("SPI hitfinding will be skipped because the required lit-pixel "
+            "counting is disabled. To run hitfinding, enable also lit-pixel "
+            "counting with the `--count-lit-pixels` option.")
+
+if spi_hitfinding and count_lit_pixels:
+    display(Markdown("# SPI hit finding"))
+
+    dc = RunDirectory(out_folder)
+    litpx_src = StackedPulseSource.from_datacollection(
+        dc, f"{karabo_id}/CORR/(?P<key>\d+)CH0:output", "litpx")
+
+    hitfinder = SpiHitfinder(
+        modules=spi_hf_modules,
+        mode=spi_hf_mode,
+        snr=spi_hf_snr,
+        min_scores=spi_hf_min_scores,
+        fixed_threshold=spi_hf_fixed_threshold,
+        hitrate_window_size=spi_hf_hitrate_window_size,
+        miss_fraction=spi_hf_miss_fraction,
+        miss_fraction_base=spi_hf_miss_fraction_base,
+    )
+    hitfinder.find_hits(litpx_src)
+
+    # write hit-finder data in file
+    sources = {
+        f"{karabo_id}/REDU/SPI_HITFINDER": hitfinder,
+    }
+    exdf_save(out_folder, "REDU00", run, sources, sequence_size=3500)
+
+    # draw plots
+    display(Markdown("## Hit-rate plot"))
+    hitfinder.plot_hitrate()
+    plt.show()
+
+    display(Markdown("## Hitscore histogram"))
+    hitfinder.plot_hitscore_hist()
+    plt.show()
+
+    display(Markdown("## Hitscore plots"))
+    hitfinder.plot_hitscore()
+```

--- a/notebooks/AGIPD/CS_Characterization_unequalClockStep_Summary.ipynb
+++ b/notebooks/AGIPD/CS_Characterization_unequalClockStep_Summary.ipynb
 %% Cell type:markdown id: tags:

 # Current Source Characterisation Summary

 This notebook is used as a dependency notebook for current source characterisation to provide summary for all modules of the AGIPD.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "" # in this notebook, in_folder is not used as the data source is in the destination folder
 out_folder = "/gpfs/exfel/exp/SPB/202331/p900376/usr/CS/agipd12/ITESTC65_75_100_150/"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 proc_folder = "" # Path to corrected image data used to create histograms and validation plots
 raw_folder = '/gpfs/exfel/exp/SPB/202331/p900376/raw/'  # folder of raw data. This is used to save information of source data of generated constants, required
 dark_run = 264

 karabo_id_control = "SPB_IRU_AGIPD1M1"  # karabo-id for the control device e.g. "MID_EXP_AGIPD1M1", or "SPB_IRU_AGIPD1M1"
 karabo_id = "SPB_DET_AGIPD1M-1"
 ctrl_source_template = '{}/MDL/FPGA_COMP' # path to control information

 creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
 cal_db_interface = "tcp://max-exfl-cal001:8015#8045"
 db_output = False
 pic_format = 'jpeg'
 save_plots = False

 # Detector conditions
 bias_voltage = -1 # detector bias voltage, use -1 to use value stored in slow data.
 mem_cells = -1  # number of memory cells used, use -1 to use value stored in slow data.
 acq_rate = -1. # the detector acquisition rate, use -1. to use value stored in slow data.
 gain_setting = -1 # the gain setting, use -1 to use value stored in slow data.
 ```

 %% Cell type:code id: tags:

 ``` python
 import warnings

 import h5py
 import matplotlib.pyplot as plt
 import matplotlib.gridspec as gridspec
 import matplotlib.cm as cm
 import numpy as np

 import tabulate
 from datetime import timedelta
 from dateutil import parser
 from pathlib import Path
 # import os

 from cal_tools.agipdlib import AgipdCtrl
 from cal_tools.ana_tools import get_range
 from cal_tools.enums import BadPixels
 from cal_tools.tools import (
    module_index_to_qm,
    calcat_creation_time,
    get_report,
    get_pdu_from_db,
    send_to_db
 )
 from cal_tools.plotting import agipd_single_module_geometry

 from iCalibrationDB import Conditions, Constants

 from extra_data import RunDirectory
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry

 from IPython.display import Latex, display
 from XFELDetAna.plotting.simpleplot import simplePlot

 %matplotlib inline
 warnings.filterwarnings('ignore')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Get operation conditions
 ctrl_source = ctrl_source_template.format(karabo_id_control)
 run_folder = f'{raw_folder}/r{dark_run:04d}/'
 raw_dc = RunDirectory(run_folder)

 # Read operating conditions from AGIPD00 files
 instrument_src_mod = [
    s for s in list(raw_dc.all_sources) if "0CH" in s][0]
 ctrl_src = [
    s for s in list(raw_dc.all_sources) if ctrl_source in s][0]
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate creation time
 creation_time = calcat_creation_time(raw_folder, dark_run, creation_time)
 offset = parser.parse(creation_date_offset)
 delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
 creation_time += delta
 print(f"Creation time: {creation_time}")

 agipd_cond = AgipdCtrl(
    run_dc=raw_dc,
    image_src=instrument_src_mod,
    ctrl_src=ctrl_src,
    raise_error=False,  # to be able to process very old data without mosetting value
 )
 if mem_cells == -1:
    mem_cells = agipd_cond.get_num_cells()
 if mem_cells is None:
    raise ValueError(f"No raw images found in {run_folder}")
 if acq_rate == -1.:
    acq_rate = agipd_cond.get_acq_rate()
 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == -1:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)

 # Evaluate detector instance for mapping
 instance = karabo_id_control.split("_")[-1]
 agipd_instances = {'AGIPD65K1': [1, agipd_single_module_geometry()],
                   'AGIPD500K2G': [8, AGIPD_500K2GGeometry.from_origin()],
                   'AGIPD1M1': [16, AGIPD_1MGeometry.from_quad_positions(quad_pos=[(-525, 625),
                                                                                   (-550, -10),
                                                                                   (520, -160),
                                                                                   (542.5, 475),
                                                                                  ])]
                  }

 try:
    nmods = agipd_instances[instance][0]
    geom =  agipd_instances[instance][1]
 except KeyError as ke:
    print('The provided AGIPD instance is not recognised. Available instances are: \
    AGIPD65K1,\nAGIPD500K2G,\nAGIPD1M1')

 print(f"Using {creation_time} as creation time")
 print(f"Operating conditions are:\n• Bias voltage: {bias_voltage}\n• Memory cells: {mem_cells}\n"
      f"• Acquisition rate: {acq_rate}\n• Gain setting: {gain_setting}"
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Sanitised constants keys:
 # mH: high gain slope (0)
 # mM: medium gain slope (1)
 # mL: low gain slope (2)
 #
 # bH: high gain intercept (3)
 # bM: medium gain intercept (4)
 # bL: low gain intercept (5)
 #
 # H-M: ratio of high gain and medium gain slope (6)
 # M-L: ratio of medium gain and low gain slope (7)

 # Consts saved to DB as (8,mem_cells,128,512)
 # order: ['mH', 'mM', 'mL', 'bH', 'bM', 'bL']

 keys_fit = ['m', 'b']
 gs = ['H', 'M', 'L']
 keys_ratio = ['H-M', 'M-L']

 labels = {'m': 'Slope (m)',
          'b': 'Intercept (b)',
          'H-M': "High-to-Medium Ratio",
          'M-L': 'Medium-to-Low Ratio',
          'mask': 'Fraction of bad pixels over cells' }

 fit_data = {}
 ratios = {}
 BPmap = {}
 sanitised_const = {}

 modules = []
 karabo_da = []

 out_folder = Path(out_folder)
 constants_files = sorted(out_folder.glob('*.h5'),
                         key=lambda f: (len(f.stem), f.stem))
 for f in constants_files:
    mod = int(f.stem.split("_")[-1][1:])
    qm = module_index_to_qm(mod)
    ratios[mod] = {}
    fit_data[mod] = {}
    sanitised_const[mod] = {}

    with h5py.File(f, 'r') as hf:
        BPmap[mod] = hf['/BadPixels/data'][()].swapaxes(1,2)

        for key in keys_fit:
            fit_data[mod][key] = {}
            for g in range(0,3):
                # loop 1st through keys, then through gains
                # to have right order of constants for DB injection
                fit_data[mod][key][g] = hf[f'/SanitizedConsts/{g}/{key}/data'][()].swapaxes(1,2)
                sanitised_const[mod][f'{key}{gs[g]}'] = hf[f'/SanitizedConsts/{g}/{key}/data'][()]

        for key in keys_ratio:
            ratios[mod][key]= hf[f'/SanitizedConsts/Ratios/{key}/data'][()].swapaxes(1,2)
            sanitised_const[mod][key] = hf[f'/SanitizedConsts/Ratios/{key}/data'][()]

    modules.append(mod)
    karabo_da.append(f"AGIPD{mod:02d}")
    print(f'Data available for {qm}')

 # Change the order of dict keys to maintain compatibility for the rest of code
 fit_data = {mod: {g: {f: fit_data[mod][f][g] for f in keys_fit
                     }
                  for g in range(0,3)
                 }
            for mod in modules
           }
 ```

 %% Cell type:code id: tags:

 ``` python
 def slope_dict_to_arr(d):
    """Convert dictionary to numpy array."""

    arr = np.zeros((8,mem_cells,128,512), np.float32)
    for i, key in enumerate(d):
        arr[i,...] = d[key]
    arr = np.transpose(arr, (2, 3, 1, 0))

    return arr
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, raw_folder.strip('/').split('/')))[-2]
 file_loc = f'Proposal: {proposal}, Run: {dark_run}'

 report = get_report(metadata_folder)
 ```

 %% Cell type:markdown id: tags:

 ## Injection to DB

 %% Cell type:code id: tags:

 ``` python
 md = None

 # set the operating condition
 condition = Conditions.Dark.AGIPD(memory_cells=mem_cells,
                                  bias_voltage=bias_voltage,
                                  acquisition_rate=acq_rate,
                                  gain_setting=gain_setting
                                 )

 db_modules = get_pdu_from_db(karabo_id, karabo_da, Constants.AGIPD.SlopesCS(),
                             condition, cal_db_interface,
                             snapshot_at=creation_time)

 cond_dev = {"Memory cells": [352, 352],
            "Acquisition rate": [4.5, 4.5]
           }

 if db_output:
    for parm in condition.parameters:
        if parm.name in cond_dev:
            parm.lower_deviation = cond_dev[parm.name][0]
            parm.upper_deviation = cond_dev[parm.name][1]
            print(f'Condition for {parm.name} deviation updated to {cond_dev[parm.name]}.')

    for mod, pdu in zip(modules, db_modules):
        for const in ["SlopesCS", "BadPixelsCS"]:
            dbconst = getattr(Constants.AGIPD, const)()

            if const == "SlopesCS":
                dbconst.data = slope_dict_to_arr(sanitised_const[mod])
            else:
-                dbconst.data = BPmap[mod].swapaxes(1,2)
+                dbconst.data = BPmap[mod].swapaxes(0,2)

            md = send_to_db(pdu, karabo_id, dbconst, condition,
                            file_loc, report, cal_db_interface,
                            creation_time=creation_time)

    print("Constants parameter conditions are:\n")
    print(f"• memory_cells: {np.diff(cond_dev['Memory cells'])[0]}-{cond_dev['Memory cells'][0]}\n"
          f"• bias_voltage: {bias_voltage}\n"
          f"• acquisition_rate: {np.diff(cond_dev['Acquisition rate'])[0]}-{cond_dev['Acquisition rate'][0]}\n"
          f"• gain_setting: {gain_setting}\n"
          f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 else:
    print('Injection to DB not requested.')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create the arrays that will be used for figures.
 # Each array correponds to the data for all processed modules.

 pixel_range = [0,0,512,128]

 # const_data contains ratios of slopes and BP
 const_data = {}
 for key in keys_ratio:
    const_data[key] = np.full((nmods, mem_cells, 512, 128), np.nan)
    for mod in modules:
        if key in ratios[mod]:
            const_data[key][mod,:,pixel_range[0]:pixel_range[2],
                               pixel_range[1]:pixel_range[3]] = ratios[mod][key]

 const_data['mask'] = np.full((nmods, mem_cells, 512, 128), np.nan)
 for mod in modules:
    const_data['mask'][mod,:,pixel_range[0]:pixel_range[2],
                    pixel_range[1]:pixel_range[3]] = BPmap[mod]

 # fit_const_data contains fitted slopes and intercepts for each gain stage
 fit_const_data = {}
 for g in range(0,3):
    fit_const_data[g] = {}
    for key in keys_fit:
        fit_const_data[g][key] = np.full((nmods, mem_cells, 512, 128), np.nan)
        for mod in modules:
            if key in fit_data[mod][g].keys():
                fit_const_data[g][key][mod,:,pixel_range[0]:pixel_range[2],
                               pixel_range[1]:pixel_range[3]] = fit_data[mod][g][key]
 ```

 %% Cell type:markdown id: tags:

 ## Summary across pixels ##

 %% Cell type:code id: tags:

 ``` python
 import matplotlib as mpl
 ```

 %% Cell type:code id: tags:

 ``` python
 mpl.rcParams['axes.labelsize'] = 14
 mpl.rcParams['font.size'] = 13
 ```

 %% Cell type:code id: tags:

 ``` python
 g_label = ['High gain', 'Medium gain', 'Low gain']
 for g in range(0,3):
    gs = gridspec.GridSpec(1, 2)
    fig = plt.figure(figsize=(14, 6), dpi=100)
    plt.suptitle(f'{g_label[g]}', fontsize=16)

    for i, key in enumerate(fit_const_data[g].keys()):
        data = np.nanmean(fit_const_data[g][key], axis=1)
        vmin, vmax = get_range(data, 5)
        ax = fig.add_subplot(gs[0, i])
        geom.plot_data_fast(data,
                                vmin=vmin, vmax=vmax, ax=ax, cmap="jet", figsize=(12,6),
                                colorbar={'shrink': 1,
                                          'pad': 0.04,
                                          'fraction': 0.1
                                         })
        colorbar = ax.images[0].colorbar
        ax.set_title(labels[key])
        ax.set_xlabel('Columns')
        ax.set_ylabel('Rows')

    plt.show()
    if save_plots:
        fig.savefig(f'{out_folder}/{g_label[g].split(" ", 1)[0]}_gain_fit_maps.{pic_format}',
                   bbox_inches='tight', dpi=300)

 ```

 %% Cell type:code id: tags:

 ``` python
 gs = gridspec.GridSpec(2, 2)
 fig = plt.figure(figsize=(14, 14), tight_layout=True, dpi=100)

 for i, key in enumerate(const_data.keys()):
    if key=='mask':
        ax = fig.add_subplot(gs[0, :])
        data = np.nanmean(const_data[key]>0, axis=1)
        vmin, vmax = (0,1)

    else:
        ax = fig.add_subplot(gs[1, i])
        data = np.nanmean(const_data[key], axis=1)
        vmin, vmax = get_range(data, 5)

    geom.plot_data_fast(data,
                            vmin=vmin, vmax=vmax, ax=ax, cmap="jet", figsize=(12,6),
                            colorbar={'shrink': 1,
                                      'pad': 0.04,
                                      'fraction': 0.1
                                     })
    colorbar = ax.images[0].colorbar
    ax.set_title(labels[key])
    ax.set_xlabel('Columns')
    ax.set_ylabel('Rows')

    if save_plots:
        extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
        fig.savefig(f'{out_folder}/{list(labels)[i+2]}.{pic_format}', bbox_inches=extent.expanded(1.4, 1.2),
                   dpi=300)

 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Summary across cells ##

 Good pixels only.

 %% Cell type:code id: tags:

 ``` python
 for k, key in enumerate(const_data.keys()):
    data = np.copy(const_data[key])
    if key=='mask':
        data = data>0
    else:
        data[const_data['mask']>0] = np.nan

    d = []
    for i in range(nmods):
        d.append({'x': np.arange(data[i].shape[0]),
                  'y': np.nanmean(data[i], axis=(1,2)),
                  'drawstyle': 'steps-pre',
                  'label': f'{i}',
                  'linewidth': 2,
                  'linestyle': '--' if i>7 else '-'
                  })

    fig = plt.figure(figsize=(11, 5), dpi=100)
    ax = fig.add_subplot(111)

    _ = simplePlot(d, xrange=(-12, 510),
                        x_label='Memory Cell ID',
                        y_label=labels[key],
                        use_axis=ax,
                        legend='top-left-frame-ncol8',)
    ylim = ax.get_ylim()
    ax.set_ylim(ylim[0], ylim[1] + np.abs(ylim[1]-ylim[0])*0.2)
    ax.grid()
    plt.show()
    if save_plots:
        fig.savefig(f'{out_folder}/CellDependency_{list(labels)[k+2]}.{pic_format}',
                   bbox_inches='tight', dpi=300)
 ```

 %% Cell type:code id: tags:

 ``` python
 table = []
 for mod in modules:

    table.append((mod,
                  f"{np.nanmean(ratios[mod]['H-M']):0.1f} +- {np.nanstd(ratios[mod]['H-M']):0.2f}",
                  f"{np.nanmean(ratios[mod]['M-L']):0.1f} +- {np.nanstd(ratios[mod]['M-L']):0.2f}",
                  f"{np.nanmean(BPmap[mod]>0)*100:0.1f} ({np.nansum(BPmap[mod]>0)})"
                        ))

 all_HM = []
 all_ML = []
 for mod in modules:
    all_HM.extend(ratios[mod]['H-M'])
    all_ML.extend(ratios[mod]['M-L'])

 all_HM = np.array(all_HM)
 all_ML = np.array(all_ML)

 all_MSK = np.array([list(msk) for msk in BPmap.values()])

 table.append(('overall',
              f"{np.nanmean(all_HM):0.1f} +- {np.nanstd(all_HM):0.2f}",
              f"{np.nanmean(all_ML):0.1f} +- {np.nanstd(all_ML):0.2f}",
              f"{np.nanmean(all_MSK>0)*100:0.1f} ({np.nansum(all_MSK>0)})"
                    ))

 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Module", "Ratio High-Medium", "Ratio Medium-Low","Bad pixels [%(Count)]"])))
 ```

 %% Cell type:markdown id: tags:

 ## Summary gain ratios (good pixels only) + histograms

 %% Cell type:code id: tags:

 ``` python
 names = ['HM_masked', 'ML_masked', 'hist_HM_masked', 'hist_ML_masked']

 gs = gridspec.GridSpec(2, 2)
 fig = plt.figure(figsize=(15, 15), dpi=100)

 colors = cm.rainbow(np.linspace(0, 1, nmods))
 k = 0
 for i, key in enumerate(const_data.keys()):
    if key != 'mask':
        const_data[key][const_data['mask']>0] = np.nan
        data = np.nanmean(const_data[key], axis=1)
        vmin, vmax = get_range(data, 5)
        ax = fig.add_subplot(gs[0, i])
        geom.plot_data_fast(data,
                                vmin=vmin, vmax=vmax, ax=ax, cmap="jet", figsize=(13,7),
                                colorbar={'shrink': 1,
                                          'pad': 0.04,
                                          'fraction': 0.1
                                         })
        colorbar = ax.images[0].colorbar
        ax.set_title(labels[key])
        ax.set_xlabel('Columns')
        ax.set_ylabel('Rows')

        if save_plots:
            extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
            fig.savefig(f'{out_folder}/{names[k]}.{pic_format}', bbox_inches=extent.expanded(1.4, 1.2),
                   dpi=300)
        k += 1

        ax = fig.add_subplot(gs[1,i])
        for nmod in range(const_data[key].shape[0]):
            h, e = np.histogram(const_data[key][nmod,...].flatten(), bins=100, range=(vmin, vmax))
            ax.plot(e[:-1], h, color=colors[nmod],linewidth=2, label=f'{nmod}', alpha=0.8)
            ax.set_xlabel(f'{labels[key]}')
            ax.set_ylabel('Counts')
        ax.grid()
        ax.legend()

        if save_plots:
            extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
            fig.savefig(f'{out_folder}/{names[k]}.{pic_format}', bbox_inches=extent.expanded(1.4, 1.2),
                   dpi=300)
        k += 1

 plt.show()
 ```

 %% Cell type:markdown id: tags:

 # Current Source Characterisation Summary

 This notebook is used as a dependency notebook for current source characterisation to provide summary for all modules of the AGIPD.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "" # in this notebook, in_folder is not used as the data source is in the destination folder
 out_folder = "/gpfs/exfel/exp/SPB/202331/p900376/usr/CS/agipd12/ITESTC65_75_100_150/"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 proc_folder = "" # Path to corrected image data used to create histograms and validation plots
 raw_folder = '/gpfs/exfel/exp/SPB/202331/p900376/raw/'  # folder of raw data. This is used to save information of source data of generated constants, required
 dark_run = 264

 karabo_id_control = "SPB_IRU_AGIPD1M1"  # karabo-id for the control device e.g. "MID_EXP_AGIPD1M1", or "SPB_IRU_AGIPD1M1"
 karabo_id = "SPB_DET_AGIPD1M-1"
 ctrl_source_template = '{}/MDL/FPGA_COMP' # path to control information

 creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
 cal_db_interface = "tcp://max-exfl-cal001:8015#8045"
 db_output = False
 pic_format = 'jpeg'
 save_plots = False

 # Detector conditions
 bias_voltage = -1 # detector bias voltage, use -1 to use value stored in slow data.
 mem_cells = -1  # number of memory cells used, use -1 to use value stored in slow data.
 acq_rate = -1. # the detector acquisition rate, use -1. to use value stored in slow data.
 gain_setting = -1 # the gain setting, use -1 to use value stored in slow data.
 ```

 %% Cell type:code id: tags:

 ``` python
 import warnings

 import h5py
 import matplotlib.pyplot as plt
 import matplotlib.gridspec as gridspec
 import matplotlib.cm as cm
 import numpy as np

 import tabulate
 from datetime import timedelta
 from dateutil import parser
 from pathlib import Path
 # import os

 from cal_tools.agipdlib import AgipdCtrl
 from cal_tools.ana_tools import get_range
 from cal_tools.enums import BadPixels
 from cal_tools.tools import (
    module_index_to_qm,
    calcat_creation_time,
    get_report,
    get_pdu_from_db,
    send_to_db
 )
 from cal_tools.plotting import agipd_single_module_geometry

 from iCalibrationDB import Conditions, Constants

 from extra_data import RunDirectory
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry

 from IPython.display import Latex, display
 from XFELDetAna.plotting.simpleplot import simplePlot

 %matplotlib inline
 warnings.filterwarnings('ignore')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Get operation conditions
 ctrl_source = ctrl_source_template.format(karabo_id_control)
 run_folder = f'{raw_folder}/r{dark_run:04d}/'
 raw_dc = RunDirectory(run_folder)

 # Read operating conditions from AGIPD00 files
 instrument_src_mod = [
    s for s in list(raw_dc.all_sources) if "0CH" in s][0]
 ctrl_src = [
    s for s in list(raw_dc.all_sources) if ctrl_source in s][0]
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate creation time
 creation_time = calcat_creation_time(raw_folder, dark_run, creation_time)
 offset = parser.parse(creation_date_offset)
 delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
 creation_time += delta
 print(f"Creation time: {creation_time}")

 agipd_cond = AgipdCtrl(
    run_dc=raw_dc,
    image_src=instrument_src_mod,
    ctrl_src=ctrl_src,
    raise_error=False,  # to be able to process very old data without mosetting value
 )
 if mem_cells == -1:
    mem_cells = agipd_cond.get_num_cells()
 if mem_cells is None:
    raise ValueError(f"No raw images found in {run_folder}")
 if acq_rate == -1.:
    acq_rate = agipd_cond.get_acq_rate()
 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == -1:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)

 # Evaluate detector instance for mapping
 instance = karabo_id_control.split("_")[-1]
 agipd_instances = {'AGIPD65K1': [1, agipd_single_module_geometry()],
                   'AGIPD500K2G': [8, AGIPD_500K2GGeometry.from_origin()],
                   'AGIPD1M1': [16, AGIPD_1MGeometry.from_quad_positions(quad_pos=[(-525, 625),
                                                                                   (-550, -10),
                                                                                   (520, -160),
                                                                                   (542.5, 475),
                                                                                  ])]
                  }

 try:
    nmods = agipd_instances[instance][0]
    geom =  agipd_instances[instance][1]
 except KeyError as ke:
    print('The provided AGIPD instance is not recognised. Available instances are: \
    AGIPD65K1,\nAGIPD500K2G,\nAGIPD1M1')

 print(f"Using {creation_time} as creation time")
 print(f"Operating conditions are:\n• Bias voltage: {bias_voltage}\n• Memory cells: {mem_cells}\n"
      f"• Acquisition rate: {acq_rate}\n• Gain setting: {gain_setting}"
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Sanitised constants keys:
 # mH: high gain slope (0)
 # mM: medium gain slope (1)
 # mL: low gain slope (2)
 #
 # bH: high gain intercept (3)
 # bM: medium gain intercept (4)
 # bL: low gain intercept (5)
 #
 # H-M: ratio of high gain and medium gain slope (6)
 # M-L: ratio of medium gain and low gain slope (7)

 # Consts saved to DB as (8,mem_cells,128,512)
 # order: ['mH', 'mM', 'mL', 'bH', 'bM', 'bL']

 keys_fit = ['m', 'b']
 gs = ['H', 'M', 'L']
 keys_ratio = ['H-M', 'M-L']

 labels = {'m': 'Slope (m)',
          'b': 'Intercept (b)',
          'H-M': "High-to-Medium Ratio",
          'M-L': 'Medium-to-Low Ratio',
          'mask': 'Fraction of bad pixels over cells' }

 fit_data = {}
 ratios = {}
 BPmap = {}
 sanitised_const = {}

 modules = []
 karabo_da = []

 out_folder = Path(out_folder)
 constants_files = sorted(out_folder.glob('*.h5'),
                         key=lambda f: (len(f.stem), f.stem))
 for f in constants_files:
    mod = int(f.stem.split("_")[-1][1:])
    qm = module_index_to_qm(mod)
    ratios[mod] = {}
    fit_data[mod] = {}
    sanitised_const[mod] = {}

    with h5py.File(f, 'r') as hf:
        BPmap[mod] = hf['/BadPixels/data'][()].swapaxes(1,2)

        for key in keys_fit:
            fit_data[mod][key] = {}
            for g in range(0,3):
                # loop 1st through keys, then through gains
                # to have right order of constants for DB injection
                fit_data[mod][key][g] = hf[f'/SanitizedConsts/{g}/{key}/data'][()].swapaxes(1,2)
                sanitised_const[mod][f'{key}{gs[g]}'] = hf[f'/SanitizedConsts/{g}/{key}/data'][()]

        for key in keys_ratio:
            ratios[mod][key]= hf[f'/SanitizedConsts/Ratios/{key}/data'][()].swapaxes(1,2)
            sanitised_const[mod][key] = hf[f'/SanitizedConsts/Ratios/{key}/data'][()]

    modules.append(mod)
    karabo_da.append(f"AGIPD{mod:02d}")
    print(f'Data available for {qm}')

 # Change the order of dict keys to maintain compatibility for the rest of code
 fit_data = {mod: {g: {f: fit_data[mod][f][g] for f in keys_fit
                     }
                  for g in range(0,3)
                 }
            for mod in modules
           }
 ```

 %% Cell type:code id: tags:

 ``` python
 def slope_dict_to_arr(d):
    """Convert dictionary to numpy array."""

    arr = np.zeros((8,mem_cells,128,512), np.float32)
    for i, key in enumerate(d):
        arr[i,...] = d[key]
    arr = np.transpose(arr, (2, 3, 1, 0))

    return arr
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, raw_folder.strip('/').split('/')))[-2]
 file_loc = f'Proposal: {proposal}, Run: {dark_run}'

 report = get_report(metadata_folder)
 ```

 %% Cell type:markdown id: tags:

 ## Injection to DB

 %% Cell type:code id: tags:

 ``` python
 md = None

 # set the operating condition
 condition = Conditions.Dark.AGIPD(memory_cells=mem_cells,
                                  bias_voltage=bias_voltage,
                                  acquisition_rate=acq_rate,
                                  gain_setting=gain_setting
                                 )

 db_modules = get_pdu_from_db(karabo_id, karabo_da, Constants.AGIPD.SlopesCS(),
                             condition, cal_db_interface,
                             snapshot_at=creation_time)

 cond_dev = {"Memory cells": [352, 352],
            "Acquisition rate": [4.5, 4.5]
           }

 if db_output:
    for parm in condition.parameters:
        if parm.name in cond_dev:
            parm.lower_deviation = cond_dev[parm.name][0]
            parm.upper_deviation = cond_dev[parm.name][1]
            print(f'Condition for {parm.name} deviation updated to {cond_dev[parm.name]}.')

    for mod, pdu in zip(modules, db_modules):
        for const in ["SlopesCS", "BadPixelsCS"]:
            dbconst = getattr(Constants.AGIPD, const)()

            if const == "SlopesCS":
                dbconst.data = slope_dict_to_arr(sanitised_const[mod])
            else:
-                dbconst.data = BPmap[mod].swapaxes(1,2)
+                dbconst.data = BPmap[mod].swapaxes(0,2)

            md = send_to_db(pdu, karabo_id, dbconst, condition,
                            file_loc, report, cal_db_interface,
                            creation_time=creation_time)

    print("Constants parameter conditions are:\n")
    print(f"• memory_cells: {np.diff(cond_dev['Memory cells'])[0]}-{cond_dev['Memory cells'][0]}\n"
          f"• bias_voltage: {bias_voltage}\n"
          f"• acquisition_rate: {np.diff(cond_dev['Acquisition rate'])[0]}-{cond_dev['Acquisition rate'][0]}\n"
          f"• gain_setting: {gain_setting}\n"
          f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 else:
    print('Injection to DB not requested.')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create the arrays that will be used for figures.
 # Each array correponds to the data for all processed modules.

 pixel_range = [0,0,512,128]

 # const_data contains ratios of slopes and BP
 const_data = {}
 for key in keys_ratio:
    const_data[key] = np.full((nmods, mem_cells, 512, 128), np.nan)
    for mod in modules:
        if key in ratios[mod]:
            const_data[key][mod,:,pixel_range[0]:pixel_range[2],
                               pixel_range[1]:pixel_range[3]] = ratios[mod][key]

 const_data['mask'] = np.full((nmods, mem_cells, 512, 128), np.nan)
 for mod in modules:
    const_data['mask'][mod,:,pixel_range[0]:pixel_range[2],
                    pixel_range[1]:pixel_range[3]] = BPmap[mod]

 # fit_const_data contains fitted slopes and intercepts for each gain stage
 fit_const_data = {}
 for g in range(0,3):
    fit_const_data[g] = {}
    for key in keys_fit:
        fit_const_data[g][key] = np.full((nmods, mem_cells, 512, 128), np.nan)
        for mod in modules:
            if key in fit_data[mod][g].keys():
                fit_const_data[g][key][mod,:,pixel_range[0]:pixel_range[2],
                               pixel_range[1]:pixel_range[3]] = fit_data[mod][g][key]
 ```

 %% Cell type:markdown id: tags:

 ## Summary across pixels ##

 %% Cell type:code id: tags:

 ``` python
 import matplotlib as mpl
 ```

 %% Cell type:code id: tags:

 ``` python
 mpl.rcParams['axes.labelsize'] = 14
 mpl.rcParams['font.size'] = 13
 ```

 %% Cell type:code id: tags:

 ``` python
 g_label = ['High gain', 'Medium gain', 'Low gain']
 for g in range(0,3):
    gs = gridspec.GridSpec(1, 2)
    fig = plt.figure(figsize=(14, 6), dpi=100)
    plt.suptitle(f'{g_label[g]}', fontsize=16)

    for i, key in enumerate(fit_const_data[g].keys()):
        data = np.nanmean(fit_const_data[g][key], axis=1)
        vmin, vmax = get_range(data, 5)
        ax = fig.add_subplot(gs[0, i])
        geom.plot_data_fast(data,
                                vmin=vmin, vmax=vmax, ax=ax, cmap="jet", figsize=(12,6),
                                colorbar={'shrink': 1,
                                          'pad': 0.04,
                                          'fraction': 0.1
                                         })
        colorbar = ax.images[0].colorbar
        ax.set_title(labels[key])
        ax.set_xlabel('Columns')
        ax.set_ylabel('Rows')

    plt.show()
    if save_plots:
        fig.savefig(f'{out_folder}/{g_label[g].split(" ", 1)[0]}_gain_fit_maps.{pic_format}',
                   bbox_inches='tight', dpi=300)

 ```

 %% Cell type:code id: tags:

 ``` python
 gs = gridspec.GridSpec(2, 2)
 fig = plt.figure(figsize=(14, 14), tight_layout=True, dpi=100)

 for i, key in enumerate(const_data.keys()):
    if key=='mask':
        ax = fig.add_subplot(gs[0, :])
        data = np.nanmean(const_data[key]>0, axis=1)
        vmin, vmax = (0,1)

    else:
        ax = fig.add_subplot(gs[1, i])
        data = np.nanmean(const_data[key], axis=1)
        vmin, vmax = get_range(data, 5)

    geom.plot_data_fast(data,
                            vmin=vmin, vmax=vmax, ax=ax, cmap="jet", figsize=(12,6),
                            colorbar={'shrink': 1,
                                      'pad': 0.04,
                                      'fraction': 0.1
                                     })
    colorbar = ax.images[0].colorbar
    ax.set_title(labels[key])
    ax.set_xlabel('Columns')
    ax.set_ylabel('Rows')

    if save_plots:
        extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
        fig.savefig(f'{out_folder}/{list(labels)[i+2]}.{pic_format}', bbox_inches=extent.expanded(1.4, 1.2),
                   dpi=300)

 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Summary across cells ##

 Good pixels only.

 %% Cell type:code id: tags:

 ``` python
 for k, key in enumerate(const_data.keys()):
    data = np.copy(const_data[key])
    if key=='mask':
        data = data>0
    else:
        data[const_data['mask']>0] = np.nan

    d = []
    for i in range(nmods):
        d.append({'x': np.arange(data[i].shape[0]),
                  'y': np.nanmean(data[i], axis=(1,2)),
                  'drawstyle': 'steps-pre',
                  'label': f'{i}',
                  'linewidth': 2,
                  'linestyle': '--' if i>7 else '-'
                  })

    fig = plt.figure(figsize=(11, 5), dpi=100)
    ax = fig.add_subplot(111)

    _ = simplePlot(d, xrange=(-12, 510),
                        x_label='Memory Cell ID',
                        y_label=labels[key],
                        use_axis=ax,
                        legend='top-left-frame-ncol8',)
    ylim = ax.get_ylim()
    ax.set_ylim(ylim[0], ylim[1] + np.abs(ylim[1]-ylim[0])*0.2)
    ax.grid()
    plt.show()
    if save_plots:
        fig.savefig(f'{out_folder}/CellDependency_{list(labels)[k+2]}.{pic_format}',
                   bbox_inches='tight', dpi=300)
 ```

 %% Cell type:code id: tags:

 ``` python
 table = []
 for mod in modules:

    table.append((mod,
                  f"{np.nanmean(ratios[mod]['H-M']):0.1f} +- {np.nanstd(ratios[mod]['H-M']):0.2f}",
                  f"{np.nanmean(ratios[mod]['M-L']):0.1f} +- {np.nanstd(ratios[mod]['M-L']):0.2f}",
                  f"{np.nanmean(BPmap[mod]>0)*100:0.1f} ({np.nansum(BPmap[mod]>0)})"
                        ))

 all_HM = []
 all_ML = []
 for mod in modules:
    all_HM.extend(ratios[mod]['H-M'])
    all_ML.extend(ratios[mod]['M-L'])

 all_HM = np.array(all_HM)
 all_ML = np.array(all_ML)

 all_MSK = np.array([list(msk) for msk in BPmap.values()])

 table.append(('overall',
              f"{np.nanmean(all_HM):0.1f} +- {np.nanstd(all_HM):0.2f}",
              f"{np.nanmean(all_ML):0.1f} +- {np.nanstd(all_ML):0.2f}",
              f"{np.nanmean(all_MSK>0)*100:0.1f} ({np.nansum(all_MSK>0)})"
                    ))

 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Module", "Ratio High-Medium", "Ratio Medium-Low","Bad pixels [%(Count)]"])))
 ```

 %% Cell type:markdown id: tags:

 ## Summary gain ratios (good pixels only) + histograms

 %% Cell type:code id: tags:

 ``` python
 names = ['HM_masked', 'ML_masked', 'hist_HM_masked', 'hist_ML_masked']

 gs = gridspec.GridSpec(2, 2)
 fig = plt.figure(figsize=(15, 15), dpi=100)

 colors = cm.rainbow(np.linspace(0, 1, nmods))
 k = 0
 for i, key in enumerate(const_data.keys()):
    if key != 'mask':
        const_data[key][const_data['mask']>0] = np.nan
        data = np.nanmean(const_data[key], axis=1)
        vmin, vmax = get_range(data, 5)
        ax = fig.add_subplot(gs[0, i])
        geom.plot_data_fast(data,
                                vmin=vmin, vmax=vmax, ax=ax, cmap="jet", figsize=(13,7),
                                colorbar={'shrink': 1,
                                          'pad': 0.04,
                                          'fraction': 0.1
                                         })
        colorbar = ax.images[0].colorbar
        ax.set_title(labels[key])
        ax.set_xlabel('Columns')
        ax.set_ylabel('Rows')

        if save_plots:
            extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
            fig.savefig(f'{out_folder}/{names[k]}.{pic_format}', bbox_inches=extent.expanded(1.4, 1.2),
                   dpi=300)
        k += 1

        ax = fig.add_subplot(gs[1,i])
        for nmod in range(const_data[key].shape[0]):
            h, e = np.histogram(const_data[key][nmod,...].flatten(), bins=100, range=(vmin, vmax))
            ax.plot(e[:-1], h, color=colors[nmod],linewidth=2, label=f'{nmod}', alpha=0.8)
            ax.set_xlabel(f'{labels[key]}')
            ax.set_ylabel('Counts')
        ax.grid()
        ax.legend()

        if save_plots:
            extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
            fig.savefig(f'{out_folder}/{names[k]}.{pic_format}', bbox_inches=extent.expanded(1.4, 1.2),
                   dpi=300)
        k += 1

 plt.show()
 ```

--- a/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
+++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
 %% Cell type:markdown id: tags:

 # AGIPD Characterize Dark Images #

 Author: European XFEL Detector Group, Version: 2.0

 The following code analyzes a set of dark images taken with the AGIPD detector to deduce detector offsets , noise, bad-pixel maps and thresholding. All four types of constants are evaluated per-pixel and per-memory cell. Data for the detector's three gain stages needs to be present, separated into separate runs.

 The evaluated calibration constants are stored locally and injected in the calibration data base.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/d/raw/CALLAB/202031/p900113" # path to input data, required
 out_folder = "" # path to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 modules = [-1]  # list of modules to evaluate, RANGE ALLOWED
 run_high = 9985 # run number in which high gain data was recorded, required
 run_med = 9984 # run number in which medium gain data was recorded, required
 run_low = 9983 # run number in which low gain data was recorded, required
 operation_mode = "ADAPTIVE_GAIN"  # Detector operation mode, optional (defaults to "ADAPTIVE_GAIN")

 karabo_id = "HED_DET_AGIPD500K2G" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_template = "{}CH0" # inset for receiver devices
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information
 karabo_id_control = "HED_EXP_AGIPD500K2G" # karabo-id for control device '

 use_dir_creation_date = True  # use dir creation date as data production reference date
 cal_db_interface = "tcp://max-exfl-cal001:8020" # the database interface to use
 cal_db_timeout = 3000000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database
 sort_runs = True  # Sort the selected dark runs. This flag is added for old data (e.g. 900174 r0011).

 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 0 # bias voltage, set to 0 to use stored value in slow data.
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode, use -1 to use value stored in slow data.
 integration_time = -1 # integration time, negative values for auto-detection.
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 interlaced = False # assume interlaced data format, for data prior to Dec. 2017

 thresholds_offset_sigma = 3. # offset sigma thresholds for offset deduced bad pixels
 thresholds_offset_hard = [0, 0]  # For setting the same threshold offset for the 3 gains. Left for backward compatibility. Default [0, 0] to take the following parameters.
 thresholds_offset_hard_hg = [3000, 7000]  # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_mg = [6000, 10000]  # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_lg = [6000, 10000]  # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_hg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_hg, but for fixed gain operation
 thresholds_offset_hard_mg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_mg, but for fixed gain operation
 thresholds_offset_hard_lg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_lg, but for fixed gain operation

 thresholds_noise_sigma = 5. # noise sigma thresholds for offset deduced bad pixels
 thresholds_noise_hard = [0, 0] # For setting the same threshold noise for the 3 gains. Left for backward compatibility. Default [0, 0] to take the following parameters.
 thresholds_noise_hard_hg = [4, 20] # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_mg = [4, 20] # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_lg = [4, 20] # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels

 thresholds_gain_sigma = 5.  # Gain separation sigma threshold
 max_trains = 550  # Maximum number of trains to use for processing dark. Set to 0 to process all available trains. 550 added for ~500GB nodes to temporarily avoid memory issues.
 min_trains = 1  # Minimum number of trains for processing dark. If run folder has less than minimum trains, processing is stopped.
 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. ~7mins extra time for 64 memory cells

 # This is used if modules is not specified:
 def find_modules(in_folder, run_high, modules):
    if (modules is not None) and modules != [-1]:
        return modules
    from pathlib import Path
    import re
    modules = set()
    for file in Path(in_folder, f'r{run_high:04d}').iterdir():
        m = re.search(r'-AGIPD(\d{2})-', file.name)
        if m:
            modules.add(int(m.group(1)))
    return sorted(modules)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import multiprocessing
 import os
 from collections import OrderedDict
 from datetime import timedelta
 from pathlib import Path
 from typing import Tuple

 import matplotlib
 import numpy as np
 import pasha as psh
 import tabulate
 import yaml
 from IPython.display import Latex, Markdown, display
 from extra_data import RunDirectory

 matplotlib.use('agg')

 import iCalibrationDB
 import matplotlib.pyplot as plt
 from cal_tools import step_timing
 from cal_tools.agipdlib import AgipdCtrlRuns
 from cal_tools.enums import BadPixels
 from cal_tools.plotting import (
    create_constant_overview,
    plot_badpix_3d,
    show_overview,
    show_processed_modules,
 )
 from cal_tools.tools import (
    get_dir_creation_date,
    get_from_db,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    module_index_to_qm,
    run_prop_seq_from_path,
    save_const_to_h5,
    send_to_db,
 )

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 # insert control device if format string (does nothing otherwise)
 ctrl_src = ctrl_source_template.format(karabo_id_control)

 run_numbers = [run_high, run_med, run_low]

 run, prop, seq = run_prop_seq_from_path(in_folder)

 report = get_report(metadata_folder)
 cal_db_interface = get_random_db_interface(cal_db_interface)
 print(f'Calibration database interface: {cal_db_interface}')

 instrument = karabo_id.split("_")[0]

 if "AGIPD1M" in karabo_id:
    nmods = 16
 elif "AGIPD500K" in karabo_id:
    nmods = 8
 else:
    nmods = 1

 instrument_src = instrument_source_template.format(karabo_id, receiver_template)

 def create_karabo_da_list(modules):
    return(["AGIPD{:02d}".format(i) for i in modules])

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    mod_indices = modules if nmods > 1 else [0]
    karabo_da = create_karabo_da_list(modules)
 else:
    # TODO: use CALCAT metadata for detector's modules indices.
    modules = [int(x[-2:]) for x in karabo_da]
    mod_indices = modules if nmods > 1 else [0]

 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")

 step_timer = step_timing.StepTimer()
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 # Create out_folder if it doesn't exist.
 Path(out_folder).mkdir(parents=True, exist_ok=True)

 mod_image_size = []
 for run in run_numbers:
    missing_modules = []  # modules with no images within a run.
    n_trains_list = []   # list of the number of trains for each module within a run.
    # This is important in case of no slurm parallelization over modules is done.
    # (e.g. running notebook interactively)
    for m in modules:
        # validate that there are trains for the selected modules and run.
        dc = RunDirectory(f'{in_folder}/r{run:04d}/').select(
            instrument_src.format(m), "*", require_all=True)
        n_trains = len(dc.train_ids)

        if n_trains == 0:
            print(f"WARNING: No images for module AGIPD{m:02d}, run {run}.")
            missing_modules.append(m)
        # Raise a warning if the module has less trains than expected.
        elif n_trains < min_trains:
            print(f"WARNING: AGIPD{m:02d}, run {run} "
                  f"has trains less than minimum trains: {min_trains}.")
        else:
            print(f"Processing {max_trains if max_trains < n_trains else n_trains} "
                  f"for AGIPD{m:02d}, run {run} ")

        n_trains_list.append(n_trains)
        mod_image_size.append(np.product(dc[instrument_src.format(m), "image.data"].shape) * 2  / 1e9)

    if max(n_trains_list) == 0:
        raise ValueError(f"No images to process for run: {run}")
    elif max(n_trains_list) < min_trains:
        raise ValueError(f"{run} has less than minimum trains: {min_trains}")

 # Update modules and karabo_da lists based on available modules to processes.
 modules = [m for m in modules if m not in missing_modules]
 karabo_da = create_karabo_da_list(modules)
 print(f"Will process data of ({sum(mod_image_size):.02f} GB).")
 step_timer.done_step("Checking the data size and availability.")
 ```

 %% Cell type:markdown id: tags:

 ## Read and validate the runs control data.

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 # Read slow data from 1st channel of the selected module only.
 # Currently dark slurm jobs run per one module.
 first_mod_channel = sorted(modules)[0]

 instrument_src_mod = instrument_src.format(first_mod_channel)

 agipd_ctrl_dark = AgipdCtrlRuns(
    raw_folder=in_folder,
    runs=run_numbers,
    image_src=instrument_src_mod,
    ctrl_src=ctrl_src,
    sort_dark_runs_enabled=sort_runs
 )
 # Update run_numbers list in case it was sorted.
 run_numbers = agipd_ctrl_dark.runs

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_numbers[0])

 print(f"Using {creation_time} as creation time of constant.")

 if mem_cells == 0:
    mem_cells = agipd_ctrl_dark.get_memory_cells()

 if acq_rate == 0:
    acq_rate = agipd_ctrl_dark.get_acq_rate()

 if bias_voltage == 0:
    bias_voltage = agipd_ctrl_dark.get_bias_voltage(
        karabo_id_control, module=first_mod_channel)

 fixed_gain_mode = False
 if gain_mode == -1:
    gain_mode = agipd_ctrl_dark.gain_modes
    fixed_gain_mode = agipd_ctrl_dark.fixed_gain_mode()

 if gain_setting == -1:
    gain_setting = agipd_ctrl_dark.get_gain_setting()

 if integration_time == -1:
    integration_time = agipd_ctrl_dark.get_integration_time()
 step_timer.done_step(f"Read operating conditions.")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Determine the gain operation mode based on the gain_mode stored in control h5file.
 if operation_mode not in ("ADAPTIVE_GAIN", "FIXED_GAIN"):
    print(f"WARNING: unknown operation_mode \"{operation_mode}\" parameter set")

 if fixed_gain_mode and operation_mode == "ADAPTIVE_GAIN":
    print(
        "WARNING: Operation_mode parameter is ADAPTIVE_GAIN, but"
        "slow data indicates FIXED_GAIN. Processing fixed gain constants.")
 elif not fixed_gain_mode and operation_mode == "FIXED_GAIN":
    print(
        "WARNING: Operation_mode parameter is FIXED_GAIN, "
        "slow data indicates ADAPTIVE_GAIN. Processing adaptive gain constants.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Parameters are:")
 print(f"Proposal: {prop}")
 print(f"Acquisition rate: {acq_rate}")
 print(f"Memory cells: {mem_cells}")
 print(f"Runs: {run_numbers}")
 print(f"Interlaced mode: {interlaced}")
 print(f"Using DB: {db_output}")
 print(f"Input: {in_folder}")
 print(f"Output: {out_folder}")
 print(f"Bias voltage: {bias_voltage}V")
 print(f"Gain setting: {gain_setting}")
 print(f"Integration time: {integration_time}")
 print(f"Operation mode is {'fixed' if fixed_gain_mode else 'adaptive'} gain mode")
 ```

 %% Cell type:code id: tags:

 ``` python
 if thresholds_offset_hard != [0, 0]:
    # if set, this will override the individual parameters
    thresholds_offset_hard = [thresholds_offset_hard] * 3
 elif fixed_gain_mode:
    thresholds_offset_hard = [
        thresholds_offset_hard_hg_fixed,
        thresholds_offset_hard_mg_fixed,
        thresholds_offset_hard_lg_fixed,
    ]
 else:
    thresholds_offset_hard = [
        thresholds_offset_hard_hg,
        thresholds_offset_hard_mg,
        thresholds_offset_hard_lg,
    ]
 print("Will use the following hard offset thresholds")
 for name, value in zip(("High", "Medium", "Low"), thresholds_offset_hard):
    print(f"- {name} gain: {value}")

 if thresholds_noise_hard != [0, 0]:
    thresholds_noise_hard = [thresholds_noise_hard] * 3
 else:
    thresholds_noise_hard = [
        thresholds_noise_hard_hg,
        thresholds_noise_hard_mg,
        thresholds_noise_hard_lg,
    ]
 ```

 %% Cell type:markdown id: tags:

 ## Calculate Offsets, Noise and Thresholds ##

 The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array.

 %% Cell type:code id: tags:

 ``` python
 parallel_num_procs = min(6, len(modules)*3)
 parallel_num_threads = multiprocessing.cpu_count() // parallel_num_procs
 print(f"Will use {parallel_num_procs} processes with {parallel_num_threads} threads each")

 def characterize_module(
    mod_index: int, channel: int, gain_run: Tuple[int, int],
 ) -> Tuple[int, int, np.array, np.array, np.array, np.array, np.array]:

    gain_index, run = gain_run
    # Select the corresponding module channel.
    instrument_src_mod = instrument_src.format(channel)

    run_dc = RunDirectory(f'{in_folder}/r{run:04d}/').select(instrument_src_mod, require_all=True)
    if max_trains != 0:
        run_dc = run_dc.select_trains(np.s_[:max_trains])

    # Read module's image and cellId data.
    im = run_dc[instrument_src_mod, "image.data"].ndarray()
    cell_ids = np.squeeze(run_dc[instrument_src_mod, "image.cellId"].ndarray())

    local_thresholds_offset_hard = thresholds_offset_hard[gain_index]
    local_thresholds_noise_hard = thresholds_noise_hard[gain_index]

    if interlaced:
        if not fixed_gain_mode:
            ga = im[1::2, 0, ...]
        im = im[0::2, 0, ...].astype(np.float32)
        cell_ids = cell_ids[::2]
    else:
        if not fixed_gain_mode:
            ga = im[:, 1, ...]
        im = im[:, 0, ...].astype(np.float32)
    im = np.transpose(im)
    if not fixed_gain_mode:
        ga = np.transpose(ga)

    context = psh.context.ThreadContext(num_workers=parallel_num_threads)
    offset = context.alloc(shape=(im.shape[0], im.shape[1], mem_cells), dtype=np.float64)
    noise = context.alloc(like=offset)

    if fixed_gain_mode:
        gains = None
        gains_std = None
    else:
        gains = context.alloc(like=offset)
        gains_std = context.alloc(like=offset)

    def process_cell(worker_id, array_index, cell_number):
        cell_slice_index = (cell_ids == cell_number)
        im_slice = im[..., cell_slice_index]
        offset[..., cell_number] = np.median(im_slice, axis=2)
        noise[..., cell_number] = np.std(im_slice, axis=2)
        if not fixed_gain_mode:
            ga_slice = ga[..., cell_slice_index]
            gains[..., cell_number] = np.median(ga_slice, axis=2)
            gains_std[..., cell_number] = np.std(ga_slice, axis=2)
    unique_cell_ids = np.unique(cell_ids)

    # We assume cells are accepted starting 0.
    if np.any(unique_cell_ids > mem_cells):
        raise ValueError(
            f"Invalid cells found {unique_cell_ids} "
            f"for run: {run}.")

    context.map(process_cell, unique_cell_ids)

    # bad pixels
    bp = np.zeros_like(offset, dtype=np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0,1))
    offset_std = np.nanstd(offset, axis=(0,1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD
    bp[(offset < local_thresholds_offset_hard[0]) |
       (offset > local_thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0,1))
    noise_std = np.nanstd(noise, axis=(0,1))
    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD
    bp[(noise < local_thresholds_noise_hard[0]) | (noise > local_thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR

    return mod_index, gain_index, offset, noise, gains, gains_std, bp
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 with multiprocessing.Pool(processes=parallel_num_procs) as pool:
    results = pool.starmap(
        characterize_module, itertools.product(mod_indices, modules, list(enumerate(run_numbers))))

 step_timer.done_step("Processing dark from the 3 runs.")

 # mapped values for processing 2 modules example:
 # [(0, (0, 9013))
 #     0, (0, run-high),
 #     0, (1, run-med),
 #     0, (2, run-low),
 #     1, (0, run-high),
 #     1, (1, run-med),
 #     1, (2, run-low),,
 # ]
 ```

 %% Cell type:code id: tags:

 ``` python
 offset_g = OrderedDict()
 noise_g = OrderedDict()
 badpix_g = OrderedDict()
 if not fixed_gain_mode:
    gain_g = OrderedDict()
    gainstd_g = OrderedDict()


 for module_index, gain_index, offset, noise, gains, gains_std, bp in results:
    qm = module_index_to_qm(module_index)
    if qm not in offset_g:
        offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2], 3))
        noise_g[qm] = np.zeros_like(offset_g[qm])
        badpix_g[qm] = np.zeros_like(offset_g[qm], np.uint32)
        if not fixed_gain_mode:
            gain_g[qm] = np.zeros_like(offset_g[qm])
            gainstd_g[qm] = np.zeros_like(offset_g[qm])

    offset_g[qm][..., gain_index] = offset
    noise_g[qm][..., gain_index] = noise
    badpix_g[qm][..., gain_index] = bp
    if not fixed_gain_mode:
        gain_g[qm][..., gain_index] = gains
        gainstd_g[qm][..., gain_index] = gains_std
 ```

 %% Cell type:code id: tags:

 ``` python
 # Add bad pixels due to bad gain separation
 if not fixed_gain_mode:
    for qm in gain_g.keys():
        for g in range(2):
            # Bad pixels during bad gain separation.
            # Fraction of pixels in the module with separation lower than "thresholds_gain_sigma".
            bad_sep = (gain_g[qm][..., g+1] - gain_g[qm][..., g]) / \
                np.sqrt(gainstd_g[qm][..., g+1]**2 + gainstd_g[qm][..., g]**2)
            badpix_g[qm][...,g+1][bad_sep<thresholds_gain_sigma] |= \
                BadPixels.GAIN_THRESHOLDING_ERROR
 ```

 %% Cell type:markdown id: tags:

 The thresholds for gain switching are then defined as the mean value between in individual gain bit levels. Note that these thresholds need to be refined with charge induced thresholds, as the two are not the same.

 %% Cell type:code id: tags:

 ``` python
 if not fixed_gain_mode:
    thresholds_g = {}
    for qm in gain_g.keys():
        thresholds_g[qm] = np.zeros((gain_g[qm].shape[0], gain_g[qm].shape[1], gain_g[qm].shape[2], 5))
        thresholds_g[qm][...,0] = (gain_g[qm][...,1]+gain_g[qm][...,0])/2
        thresholds_g[qm][...,1] = (gain_g[qm][...,2]+gain_g[qm][...,1])/2
        for i in range(3):
            thresholds_g[qm][...,2+i] = gain_g[qm][...,i]
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()
 for i in mod_indices:
    qm = module_index_to_qm(i)
    res[qm] = {
        'Offset': offset_g[qm],
        'Noise': noise_g[qm],
        'BadPixelsDark': badpix_g[qm]
    }
    if not fixed_gain_mode:
        res[qm]['ThresholdsDark'] = thresholds_g[qm]
 ```

 %% Cell type:code id: tags:

 ``` python
 # set the operating condition
 # note: iCalibrationDB only adds gain_mode if it is truthy, so we don't need to handle None
 condition = iCalibrationDB.Conditions.Dark.AGIPD(
    memory_cells=mem_cells,
    bias_voltage=bias_voltage,
    acquisition_rate=acq_rate,
    gain_setting=gain_setting,
    gain_mode=fixed_gain_mode,
    integration_time=integration_time
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create mapping from module(s) (qm) to karabo_da(s) and PDU(s)
 qm_dict = OrderedDict()
 all_pdus = get_pdu_from_db(
    karabo_id,
    karabo_da,
    constant=iCalibrationDB.CalibrationConstant(),
    condition=condition,
    cal_db_interface=cal_db_interface,
    snapshot_at=creation_time.isoformat() if creation_time else None,
    timeout=cal_db_timeout
 )
 for module_index, module_da, module_pdu in zip(mod_indices, karabo_da, all_pdus):
    qm = module_index_to_qm(module_index)
    qm_dict[qm] = {
        "karabo_da": module_da,
        "db_module": module_pdu
    }
 ```

 %% Cell type:markdown id: tags:

 ## Sending calibration constants to the database.

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 md = None
 # Location of source data, injected with the constants
 file_loc = f"proposal:{prop} runs:{' '.join([str(r) for r in reversed(run_numbers)])}"

 for qm in res:
    db_module = qm_dict[qm]["db_module"]
    for const in res[qm]:
        dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()
        dconst.data = res[qm][const]

        if db_output:
            md = send_to_db(db_module, karabo_id, dconst, condition, file_loc,
                            report, cal_db_interface, creation_time=creation_time,
                            timeout=cal_db_timeout)

        if local_output:
            md = save_const_to_h5(db_module, karabo_id, dconst, condition, dconst.data,
                                  file_loc, report, creation_time, out_folder)
            print(f"Calibration constant {const} for {qm} is stored locally in {file_loc}.\n")

 print("Constants parameter conditions are:\n")
 print(f"• memory_cells: {mem_cells}\n• bias_voltage: {bias_voltage}\n"
      f"• acquisition_rate: {acq_rate}\n• gain_setting: {gain_setting}\n"
      f"• gain_mode: {fixed_gain_mode}\n• integration_time: {integration_time}\n"
      f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")\

 step_timer.done_step("Inject calibration constants to the database.")
 ```

 %% Cell type:markdown id: tags:

 ## Retrieving previous calibration constants for comparison.

 %% Cell type:code id: tags:

 ``` python
 # Start retrieving existing constants for comparison
 qm_x_const = [(qm, const) for const in res[qm] for qm in res]


 def retrieve_old_constant(qm, const):

    import time

    st = time.time()

    dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()

    data, mdata = get_from_db(
        karabo_id=karabo_id,
        karabo_da=qm_dict[qm]["karabo_da"],
        constant=dconst,
        condition=condition,
        empty_constant=None,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time-timedelta(seconds=1) if creation_time else None,
        strategy="pdu_prior_in_time",
        verbosity=1,
        timeout=cal_db_timeout
    )

    if mdata is None or data is None:
        timestamp = "Not found"
        filepath = None
        h5path = None
    else:
        timestamp = mdata.calibration_constant_version.begin_at.isoformat()
        filepath = os.path.join(
            mdata.calibration_constant_version.hdf5path,
            mdata.calibration_constant_version.filename
        )
        h5path = mdata.calibration_constant_version.h5path

    return data, timestamp, filepath, h5path, time.time() - st


 old_retrieval_pool = multiprocessing.Pool()
 old_retrieval_res = old_retrieval_pool.starmap_async(
    retrieve_old_constant, qm_x_const
 )
 old_retrieval_pool.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 # This is a temporary workaround to enable dark processing for the first SM AGIPD detector
 if nmods > 1:  # AGIPD1M and AGIPD500K
    mnames = []
    for i in modules:
        qm = module_index_to_qm(i)
        mnames.append(qm)
        display(Markdown(f'## Position of the module {qm} and its ASICs'))
    show_processed_modules(karabo_id, constants=None, mnames=mnames, mode="position")
 ```

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:markdown id: tags:

 ### High Gain ###

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 cell = 3
 gain = 0
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))
 ```

 %% Cell type:markdown id: tags:

 ### Medium Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 1
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))
 ```

 %% Cell type:markdown id: tags:

 ### Low Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 2
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))

 step_timer.done_step("Single-Cell Overviews.")
 ```

 %% Cell type:code id: tags:

 ``` python
 if high_res_badpix_3d:
    cols = {
        BadPixels.NOISE_OUT_OF_THRESHOLD: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.GAIN_THRESHOLDING_ERROR: (BadPixels.GAIN_THRESHOLDING_ERROR.name, '#FF40FF40'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD | BadPixels.NOISE_OUT_OF_THRESHOLD: ('OFFSET_OUT_OF_THRESHOLD + NOISE_OUT_OF_THRESHOLD', '#DD00DD80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD | BadPixels.NOISE_OUT_OF_THRESHOLD |
        BadPixels.GAIN_THRESHOLDING_ERROR: ('MIXED', '#BFDF009F')
    }

    display(Markdown("""

    ## Global Bad Pixel Behaviour ##

    The following plots show the results of bad pixel evaluation for all evaluated memory cells.
    Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2.
    This excludes single bad pixels present only in disconnected pixels.
    Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
    Colors encode the bad pixel type, or mixed type.

    """))

    gnames = ['High Gain', 'Medium Gain', 'Low Gain']
    for gain in range(3):
        display(Markdown(f'### {gnames[gain]} ###'))
        for mod, data in badpix_g.items():
            plot_badpix_3d(data[...,gain], cols, title=mod, rebin_fac=1)
            plt.show()
 ```

 %% Cell type:markdown id: tags:


 ## Aggregate values, and per Cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior.

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 create_constant_overview(offset_g, "Offset (ADU)", mem_cells, 4000, 8000,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(noise_g, "Noise (ADU)", mem_cells, 0, 100,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 if not fixed_gain_mode:
    # Plot only three gain threshold maps.
    bp_thresh = OrderedDict()
    for mod, con in badpix_g.items():
        bp_thresh[mod] = np.zeros((con.shape[0], con.shape[1], con.shape[2], 5), dtype=con.dtype)
        bp_thresh[mod][...,:2] = con[...,:2]
        bp_thresh[mod][...,2:] = con

    create_constant_overview(thresholds_g, "Threshold (ADU)", mem_cells, 4000, 10000, 5,
                             badpixels=[bp_thresh, np.nan],
                             gmap=['HG-MG Threshold', 'MG-LG Threshold', 'High gain', 'Medium gain', 'low gain'],
                             marker=['d','d','','','']
                             )

 step_timer.done_step("Aggregate values, and per Cell behaviour.")
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixel_aggregate_g = OrderedDict()
 for m, d in badpix_g.items():
-    bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)
+    bad_pixel_aggregate_g[m] = d.astype(np.bool_).astype(np.float64)
 create_constant_overview(bad_pixel_aggregate_g, "Bad pixel fraction", mem_cells, 0, 0.10, 3)
 ```

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 # now we need the old constants
 old_const = {}
 old_mdata = {}
 old_retrieval_res.wait()

 timings = []
 for (qm, const), (data, timestamp, filepath, h5path, timing) in zip(qm_x_const, old_retrieval_res.get()):
    old_const.setdefault(qm, {})[const] = data
    old_mdata.setdefault(qm, {})[const] = {
        "timestamp": timestamp,
        "filepath": filepath,
        "h5path": h5path
    }
    timings.append(timing)
 print(f"Retrieving old constant took around {np.asarray(timings).mean():.01f} s")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown("The following pre-existing constants are used for comparison:"))
 for qm, consts in old_mdata.items():
    display(Markdown(f"- {qm}"))
    for const in consts:
        display(Markdown(f"    - {const} at {consts[const]['timestamp']}"))
    # saving locations of old constants for summary notebook
    with open(f"{out_folder}/module_metadata_{qm}.yml", "w") as fd:
        yaml.safe_dump(
            {
                "module": qm,
                "pdu": qm_dict[qm]["db_module"],
                "old-constants": old_mdata[qm]
            },
            fd,
        )
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 table = []
 gain_names = ['High', 'Medium', 'Low']
 bits = [
    BadPixels.NOISE_OUT_OF_THRESHOLD,
    BadPixels.OFFSET_OUT_OF_THRESHOLD,
    BadPixels.OFFSET_NOISE_EVAL_ERROR,
    BadPixels.GAIN_THRESHOLDING_ERROR,
 ]

 for qm in badpix_g.keys():
    for gain in range(3):
        l_data = []
        l_data_old = []

        data = np.copy(badpix_g[qm][:,:,:,gain])
        datau32 = data.astype(np.uint32)
        l_data.append(len(datau32[datau32>0].flatten()))
        for bit in bits:
            l_data.append(np.count_nonzero(badpix_g[qm][:,:,:,gain] & bit))

        if old_const[qm]['BadPixelsDark'] is not None:
            dataold = np.copy(old_const[qm]['BadPixelsDark'][:, :, :, gain])
            datau32old = dataold.astype(np.uint32)
            l_data_old.append(len(datau32old[datau32old>0].flatten()))
            for bit in bits:
                l_data_old.append(np.count_nonzero(old_const[qm]['BadPixelsDark'][:, :, :, gain] & bit))

        l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',
                       'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR', 'GAIN_THRESHOLDING_ERROR']

        l_threshold = ['', f'{thresholds_noise_sigma}' f'{thresholds_noise_hard[gain]}',
                       f'{thresholds_offset_sigma}' f'{thresholds_offset_hard[gain]}',
                       '', f'{thresholds_gain_sigma}']

        for i in range(len(l_data)):
            line = [f'{l_data_name[i]}, {gain_names[gain]} gain', l_threshold[i], l_data[i]]

            if old_const[qm]['BadPixelsDark'] is not None:
                line += [l_data_old[i]]
            else:
                line += ['-']

            table.append(line)
        table.append(['', '', '', ''])

 display(Markdown('''
 ### Number of bad pixels

 One pixel can be bad for different reasons, therefore, the sum of all types of bad pixels can be more than the number of all bad pixels.

 '''))
 if len(table)>0:
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                         headers=["Pixel type", "Threshold",
                                                  "New constant", "Old constant"])))

 step_timer.done_step("Create badpixels table.")
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 if fixed_gain_mode:
    constants = ['Offset', 'Noise']
 else:
    constants = ['Offset', 'Noise', 'ThresholdsDark']

 constants_x_qms = list(itertools.product(constants, res.keys()))


 def compute_table(const, qm):
    if const == 'ThresholdsDark':
        table = [['','HG-MG threshold', 'HG-MG threshold', 'MG-LG threshold', 'MG-LG threshold']]
    else:
        table = [['','High gain', 'High gain', 'Medium gain', 'Medium gain', 'Low gain', 'Low gain']]

    compare_with_old_constant = old_const[qm][const] is not None and \
        old_const[qm]['BadPixelsDark'] is not None

    data = np.copy(res[qm][const])

    if const == 'ThresholdsDark':
        data[...,0][res[qm]['BadPixelsDark'][...,0]>0] = np.nan
        data[...,1][res[qm]['BadPixelsDark'][...,1]>0] = np.nan
    else:
        data[res[qm]['BadPixelsDark']>0] = np.nan

    if compare_with_old_constant:
        data_old = np.copy(old_const[qm][const])
        if const == 'ThresholdsDark':
            data_old[...,0][old_const[qm]['BadPixelsDark'][...,0]>0] = np.nan
            data_old[...,1][old_const[qm]['BadPixelsDark'][...,1]>0] = np.nan
        else:
            data_old[old_const[qm]['BadPixelsDark']>0] = np.nan

    f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
    n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

    def compute_row(i):
        line = [n_list[i]]
        for gain in range(3):
            # Compare only 3 threshold gain-maps
            if gain == 2 and const == 'ThresholdsDark':
                continue
            stat_measure = f_list[i](data[...,gain])
            line.append(f"{stat_measure:6.1f}")
            if compare_with_old_constant:
                old_stat_measure = f_list[i](data_old[...,gain])
                line.append(f"{old_stat_measure:6.1f}")
            else:
                line.append("-")
        return line


    with multiprocessing.pool.ThreadPool(processes=multiprocessing.cpu_count() // len(constants_x_qms)) as pool:
        rows = pool.map(compute_row, range(len(f_list)))

    table.extend(rows)

    return table


 with multiprocessing.Pool(processes=len(constants_x_qms)) as pool:
    tables = pool.starmap(compute_table, constants_x_qms)

 for (const, qm), table in zip(constants_x_qms, tables):
    display(Markdown(f"### {qm}: {const} [ADU], good pixels only"))
    display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))

 step_timer.done_step("Computing comparison tables.")
 ```

 %% Cell type:markdown id: tags:

 # AGIPD Characterize Dark Images #

 Author: European XFEL Detector Group, Version: 2.0

 The following code analyzes a set of dark images taken with the AGIPD detector to deduce detector offsets , noise, bad-pixel maps and thresholding. All four types of constants are evaluated per-pixel and per-memory cell. Data for the detector's three gain stages needs to be present, separated into separate runs.

 The evaluated calibration constants are stored locally and injected in the calibration data base.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/d/raw/CALLAB/202031/p900113" # path to input data, required
 out_folder = "" # path to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 modules = [-1]  # list of modules to evaluate, RANGE ALLOWED
 run_high = 9985 # run number in which high gain data was recorded, required
 run_med = 9984 # run number in which medium gain data was recorded, required
 run_low = 9983 # run number in which low gain data was recorded, required
 operation_mode = "ADAPTIVE_GAIN"  # Detector operation mode, optional (defaults to "ADAPTIVE_GAIN")

 karabo_id = "HED_DET_AGIPD500K2G" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_template = "{}CH0" # inset for receiver devices
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information
 karabo_id_control = "HED_EXP_AGIPD500K2G" # karabo-id for control device '

 use_dir_creation_date = True  # use dir creation date as data production reference date
 cal_db_interface = "tcp://max-exfl-cal001:8020" # the database interface to use
 cal_db_timeout = 3000000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database
 sort_runs = True  # Sort the selected dark runs. This flag is added for old data (e.g. 900174 r0011).

 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 0 # bias voltage, set to 0 to use stored value in slow data.
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode, use -1 to use value stored in slow data.
 integration_time = -1 # integration time, negative values for auto-detection.
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 interlaced = False # assume interlaced data format, for data prior to Dec. 2017

 thresholds_offset_sigma = 3. # offset sigma thresholds for offset deduced bad pixels
 thresholds_offset_hard = [0, 0]  # For setting the same threshold offset for the 3 gains. Left for backward compatibility. Default [0, 0] to take the following parameters.
 thresholds_offset_hard_hg = [3000, 7000]  # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_mg = [6000, 10000]  # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_lg = [6000, 10000]  # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_hg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_hg, but for fixed gain operation
 thresholds_offset_hard_mg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_mg, but for fixed gain operation
 thresholds_offset_hard_lg_fixed = [3500, 6500]  # Same as thresholds_offset_hard_lg, but for fixed gain operation

 thresholds_noise_sigma = 5. # noise sigma thresholds for offset deduced bad pixels
 thresholds_noise_hard = [0, 0] # For setting the same threshold noise for the 3 gains. Left for backward compatibility. Default [0, 0] to take the following parameters.
 thresholds_noise_hard_hg = [4, 20] # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_mg = [4, 20] # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_lg = [4, 20] # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels

 thresholds_gain_sigma = 5.  # Gain separation sigma threshold
 max_trains = 550  # Maximum number of trains to use for processing dark. Set to 0 to process all available trains. 550 added for ~500GB nodes to temporarily avoid memory issues.
 min_trains = 1  # Minimum number of trains for processing dark. If run folder has less than minimum trains, processing is stopped.
 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. ~7mins extra time for 64 memory cells

 # This is used if modules is not specified:
 def find_modules(in_folder, run_high, modules):
    if (modules is not None) and modules != [-1]:
        return modules
    from pathlib import Path
    import re
    modules = set()
    for file in Path(in_folder, f'r{run_high:04d}').iterdir():
        m = re.search(r'-AGIPD(\d{2})-', file.name)
        if m:
            modules.add(int(m.group(1)))
    return sorted(modules)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import multiprocessing
 import os
 from collections import OrderedDict
 from datetime import timedelta
 from pathlib import Path
 from typing import Tuple

 import matplotlib
 import numpy as np
 import pasha as psh
 import tabulate
 import yaml
 from IPython.display import Latex, Markdown, display
 from extra_data import RunDirectory

 matplotlib.use('agg')

 import iCalibrationDB
 import matplotlib.pyplot as plt
 from cal_tools import step_timing
 from cal_tools.agipdlib import AgipdCtrlRuns
 from cal_tools.enums import BadPixels
 from cal_tools.plotting import (
    create_constant_overview,
    plot_badpix_3d,
    show_overview,
    show_processed_modules,
 )
 from cal_tools.tools import (
    get_dir_creation_date,
    get_from_db,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    module_index_to_qm,
    run_prop_seq_from_path,
    save_const_to_h5,
    send_to_db,
 )

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 # insert control device if format string (does nothing otherwise)
 ctrl_src = ctrl_source_template.format(karabo_id_control)

 run_numbers = [run_high, run_med, run_low]

 run, prop, seq = run_prop_seq_from_path(in_folder)

 report = get_report(metadata_folder)
 cal_db_interface = get_random_db_interface(cal_db_interface)
 print(f'Calibration database interface: {cal_db_interface}')

 instrument = karabo_id.split("_")[0]

 if "AGIPD1M" in karabo_id:
    nmods = 16
 elif "AGIPD500K" in karabo_id:
    nmods = 8
 else:
    nmods = 1

 instrument_src = instrument_source_template.format(karabo_id, receiver_template)

 def create_karabo_da_list(modules):
    return(["AGIPD{:02d}".format(i) for i in modules])

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    mod_indices = modules if nmods > 1 else [0]
    karabo_da = create_karabo_da_list(modules)
 else:
    # TODO: use CALCAT metadata for detector's modules indices.
    modules = [int(x[-2:]) for x in karabo_da]
    mod_indices = modules if nmods > 1 else [0]

 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")

 step_timer = step_timing.StepTimer()
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 # Create out_folder if it doesn't exist.
 Path(out_folder).mkdir(parents=True, exist_ok=True)

 mod_image_size = []
 for run in run_numbers:
    missing_modules = []  # modules with no images within a run.
    n_trains_list = []   # list of the number of trains for each module within a run.
    # This is important in case of no slurm parallelization over modules is done.
    # (e.g. running notebook interactively)
    for m in modules:
        # validate that there are trains for the selected modules and run.
        dc = RunDirectory(f'{in_folder}/r{run:04d}/').select(
            instrument_src.format(m), "*", require_all=True)
        n_trains = len(dc.train_ids)

        if n_trains == 0:
            print(f"WARNING: No images for module AGIPD{m:02d}, run {run}.")
            missing_modules.append(m)
        # Raise a warning if the module has less trains than expected.
        elif n_trains < min_trains:
            print(f"WARNING: AGIPD{m:02d}, run {run} "
                  f"has trains less than minimum trains: {min_trains}.")
        else:
            print(f"Processing {max_trains if max_trains < n_trains else n_trains} "
                  f"for AGIPD{m:02d}, run {run} ")

        n_trains_list.append(n_trains)
        mod_image_size.append(np.product(dc[instrument_src.format(m), "image.data"].shape) * 2  / 1e9)

    if max(n_trains_list) == 0:
        raise ValueError(f"No images to process for run: {run}")
    elif max(n_trains_list) < min_trains:
        raise ValueError(f"{run} has less than minimum trains: {min_trains}")

 # Update modules and karabo_da lists based on available modules to processes.
 modules = [m for m in modules if m not in missing_modules]
 karabo_da = create_karabo_da_list(modules)
 print(f"Will process data of ({sum(mod_image_size):.02f} GB).")
 step_timer.done_step("Checking the data size and availability.")
 ```

 %% Cell type:markdown id: tags:

 ## Read and validate the runs control data.

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 # Read slow data from 1st channel of the selected module only.
 # Currently dark slurm jobs run per one module.
 first_mod_channel = sorted(modules)[0]

 instrument_src_mod = instrument_src.format(first_mod_channel)

 agipd_ctrl_dark = AgipdCtrlRuns(
    raw_folder=in_folder,
    runs=run_numbers,
    image_src=instrument_src_mod,
    ctrl_src=ctrl_src,
    sort_dark_runs_enabled=sort_runs
 )
 # Update run_numbers list in case it was sorted.
 run_numbers = agipd_ctrl_dark.runs

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_numbers[0])

 print(f"Using {creation_time} as creation time of constant.")

 if mem_cells == 0:
    mem_cells = agipd_ctrl_dark.get_memory_cells()

 if acq_rate == 0:
    acq_rate = agipd_ctrl_dark.get_acq_rate()

 if bias_voltage == 0:
    bias_voltage = agipd_ctrl_dark.get_bias_voltage(
        karabo_id_control, module=first_mod_channel)

 fixed_gain_mode = False
 if gain_mode == -1:
    gain_mode = agipd_ctrl_dark.gain_modes
    fixed_gain_mode = agipd_ctrl_dark.fixed_gain_mode()

 if gain_setting == -1:
    gain_setting = agipd_ctrl_dark.get_gain_setting()

 if integration_time == -1:
    integration_time = agipd_ctrl_dark.get_integration_time()
 step_timer.done_step(f"Read operating conditions.")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Determine the gain operation mode based on the gain_mode stored in control h5file.
 if operation_mode not in ("ADAPTIVE_GAIN", "FIXED_GAIN"):
    print(f"WARNING: unknown operation_mode \"{operation_mode}\" parameter set")

 if fixed_gain_mode and operation_mode == "ADAPTIVE_GAIN":
    print(
        "WARNING: Operation_mode parameter is ADAPTIVE_GAIN, but"
        "slow data indicates FIXED_GAIN. Processing fixed gain constants.")
 elif not fixed_gain_mode and operation_mode == "FIXED_GAIN":
    print(
        "WARNING: Operation_mode parameter is FIXED_GAIN, "
        "slow data indicates ADAPTIVE_GAIN. Processing adaptive gain constants.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Parameters are:")
 print(f"Proposal: {prop}")
 print(f"Acquisition rate: {acq_rate}")
 print(f"Memory cells: {mem_cells}")
 print(f"Runs: {run_numbers}")
 print(f"Interlaced mode: {interlaced}")
 print(f"Using DB: {db_output}")
 print(f"Input: {in_folder}")
 print(f"Output: {out_folder}")
 print(f"Bias voltage: {bias_voltage}V")
 print(f"Gain setting: {gain_setting}")
 print(f"Integration time: {integration_time}")
 print(f"Operation mode is {'fixed' if fixed_gain_mode else 'adaptive'} gain mode")
 ```

 %% Cell type:code id: tags:

 ``` python
 if thresholds_offset_hard != [0, 0]:
    # if set, this will override the individual parameters
    thresholds_offset_hard = [thresholds_offset_hard] * 3
 elif fixed_gain_mode:
    thresholds_offset_hard = [
        thresholds_offset_hard_hg_fixed,
        thresholds_offset_hard_mg_fixed,
        thresholds_offset_hard_lg_fixed,
    ]
 else:
    thresholds_offset_hard = [
        thresholds_offset_hard_hg,
        thresholds_offset_hard_mg,
        thresholds_offset_hard_lg,
    ]
 print("Will use the following hard offset thresholds")
 for name, value in zip(("High", "Medium", "Low"), thresholds_offset_hard):
    print(f"- {name} gain: {value}")

 if thresholds_noise_hard != [0, 0]:
    thresholds_noise_hard = [thresholds_noise_hard] * 3
 else:
    thresholds_noise_hard = [
        thresholds_noise_hard_hg,
        thresholds_noise_hard_mg,
        thresholds_noise_hard_lg,
    ]
 ```

 %% Cell type:markdown id: tags:

 ## Calculate Offsets, Noise and Thresholds ##

 The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array.

 %% Cell type:code id: tags:

 ``` python
 parallel_num_procs = min(6, len(modules)*3)
 parallel_num_threads = multiprocessing.cpu_count() // parallel_num_procs
 print(f"Will use {parallel_num_procs} processes with {parallel_num_threads} threads each")

 def characterize_module(
    mod_index: int, channel: int, gain_run: Tuple[int, int],
 ) -> Tuple[int, int, np.array, np.array, np.array, np.array, np.array]:

    gain_index, run = gain_run
    # Select the corresponding module channel.
    instrument_src_mod = instrument_src.format(channel)

    run_dc = RunDirectory(f'{in_folder}/r{run:04d}/').select(instrument_src_mod, require_all=True)
    if max_trains != 0:
        run_dc = run_dc.select_trains(np.s_[:max_trains])

    # Read module's image and cellId data.
    im = run_dc[instrument_src_mod, "image.data"].ndarray()
    cell_ids = np.squeeze(run_dc[instrument_src_mod, "image.cellId"].ndarray())

    local_thresholds_offset_hard = thresholds_offset_hard[gain_index]
    local_thresholds_noise_hard = thresholds_noise_hard[gain_index]

    if interlaced:
        if not fixed_gain_mode:
            ga = im[1::2, 0, ...]
        im = im[0::2, 0, ...].astype(np.float32)
        cell_ids = cell_ids[::2]
    else:
        if not fixed_gain_mode:
            ga = im[:, 1, ...]
        im = im[:, 0, ...].astype(np.float32)
    im = np.transpose(im)
    if not fixed_gain_mode:
        ga = np.transpose(ga)

    context = psh.context.ThreadContext(num_workers=parallel_num_threads)
    offset = context.alloc(shape=(im.shape[0], im.shape[1], mem_cells), dtype=np.float64)
    noise = context.alloc(like=offset)

    if fixed_gain_mode:
        gains = None
        gains_std = None
    else:
        gains = context.alloc(like=offset)
        gains_std = context.alloc(like=offset)

    def process_cell(worker_id, array_index, cell_number):
        cell_slice_index = (cell_ids == cell_number)
        im_slice = im[..., cell_slice_index]
        offset[..., cell_number] = np.median(im_slice, axis=2)
        noise[..., cell_number] = np.std(im_slice, axis=2)
        if not fixed_gain_mode:
            ga_slice = ga[..., cell_slice_index]
            gains[..., cell_number] = np.median(ga_slice, axis=2)
            gains_std[..., cell_number] = np.std(ga_slice, axis=2)
    unique_cell_ids = np.unique(cell_ids)

    # We assume cells are accepted starting 0.
    if np.any(unique_cell_ids > mem_cells):
        raise ValueError(
            f"Invalid cells found {unique_cell_ids} "
            f"for run: {run}.")

    context.map(process_cell, unique_cell_ids)

    # bad pixels
    bp = np.zeros_like(offset, dtype=np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0,1))
    offset_std = np.nanstd(offset, axis=(0,1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD
    bp[(offset < local_thresholds_offset_hard[0]) |
       (offset > local_thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0,1))
    noise_std = np.nanstd(noise, axis=(0,1))
    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD
    bp[(noise < local_thresholds_noise_hard[0]) | (noise > local_thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR

    return mod_index, gain_index, offset, noise, gains, gains_std, bp
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 with multiprocessing.Pool(processes=parallel_num_procs) as pool:
    results = pool.starmap(
        characterize_module, itertools.product(mod_indices, modules, list(enumerate(run_numbers))))

 step_timer.done_step("Processing dark from the 3 runs.")

 # mapped values for processing 2 modules example:
 # [(0, (0, 9013))
 #     0, (0, run-high),
 #     0, (1, run-med),
 #     0, (2, run-low),
 #     1, (0, run-high),
 #     1, (1, run-med),
 #     1, (2, run-low),,
 # ]
 ```

 %% Cell type:code id: tags:

 ``` python
 offset_g = OrderedDict()
 noise_g = OrderedDict()
 badpix_g = OrderedDict()
 if not fixed_gain_mode:
    gain_g = OrderedDict()
    gainstd_g = OrderedDict()


 for module_index, gain_index, offset, noise, gains, gains_std, bp in results:
    qm = module_index_to_qm(module_index)
    if qm not in offset_g:
        offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2], 3))
        noise_g[qm] = np.zeros_like(offset_g[qm])
        badpix_g[qm] = np.zeros_like(offset_g[qm], np.uint32)
        if not fixed_gain_mode:
            gain_g[qm] = np.zeros_like(offset_g[qm])
            gainstd_g[qm] = np.zeros_like(offset_g[qm])

    offset_g[qm][..., gain_index] = offset
    noise_g[qm][..., gain_index] = noise
    badpix_g[qm][..., gain_index] = bp
    if not fixed_gain_mode:
        gain_g[qm][..., gain_index] = gains
        gainstd_g[qm][..., gain_index] = gains_std
 ```

 %% Cell type:code id: tags:

 ``` python
 # Add bad pixels due to bad gain separation
 if not fixed_gain_mode:
    for qm in gain_g.keys():
        for g in range(2):
            # Bad pixels during bad gain separation.
            # Fraction of pixels in the module with separation lower than "thresholds_gain_sigma".
            bad_sep = (gain_g[qm][..., g+1] - gain_g[qm][..., g]) / \
                np.sqrt(gainstd_g[qm][..., g+1]**2 + gainstd_g[qm][..., g]**2)
            badpix_g[qm][...,g+1][bad_sep<thresholds_gain_sigma] |= \
                BadPixels.GAIN_THRESHOLDING_ERROR
 ```

 %% Cell type:markdown id: tags:

 The thresholds for gain switching are then defined as the mean value between in individual gain bit levels. Note that these thresholds need to be refined with charge induced thresholds, as the two are not the same.

 %% Cell type:code id: tags:

 ``` python
 if not fixed_gain_mode:
    thresholds_g = {}
    for qm in gain_g.keys():
        thresholds_g[qm] = np.zeros((gain_g[qm].shape[0], gain_g[qm].shape[1], gain_g[qm].shape[2], 5))
        thresholds_g[qm][...,0] = (gain_g[qm][...,1]+gain_g[qm][...,0])/2
        thresholds_g[qm][...,1] = (gain_g[qm][...,2]+gain_g[qm][...,1])/2
        for i in range(3):
            thresholds_g[qm][...,2+i] = gain_g[qm][...,i]
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()
 for i in mod_indices:
    qm = module_index_to_qm(i)
    res[qm] = {
        'Offset': offset_g[qm],
        'Noise': noise_g[qm],
        'BadPixelsDark': badpix_g[qm]
    }
    if not fixed_gain_mode:
        res[qm]['ThresholdsDark'] = thresholds_g[qm]
 ```

 %% Cell type:code id: tags:

 ``` python
 # set the operating condition
 # note: iCalibrationDB only adds gain_mode if it is truthy, so we don't need to handle None
 condition = iCalibrationDB.Conditions.Dark.AGIPD(
    memory_cells=mem_cells,
    bias_voltage=bias_voltage,
    acquisition_rate=acq_rate,
    gain_setting=gain_setting,
    gain_mode=fixed_gain_mode,
    integration_time=integration_time
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create mapping from module(s) (qm) to karabo_da(s) and PDU(s)
 qm_dict = OrderedDict()
 all_pdus = get_pdu_from_db(
    karabo_id,
    karabo_da,
    constant=iCalibrationDB.CalibrationConstant(),
    condition=condition,
    cal_db_interface=cal_db_interface,
    snapshot_at=creation_time.isoformat() if creation_time else None,
    timeout=cal_db_timeout
 )
 for module_index, module_da, module_pdu in zip(mod_indices, karabo_da, all_pdus):
    qm = module_index_to_qm(module_index)
    qm_dict[qm] = {
        "karabo_da": module_da,
        "db_module": module_pdu
    }
 ```

 %% Cell type:markdown id: tags:

 ## Sending calibration constants to the database.

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 md = None
 # Location of source data, injected with the constants
 file_loc = f"proposal:{prop} runs:{' '.join([str(r) for r in reversed(run_numbers)])}"

 for qm in res:
    db_module = qm_dict[qm]["db_module"]
    for const in res[qm]:
        dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()
        dconst.data = res[qm][const]

        if db_output:
            md = send_to_db(db_module, karabo_id, dconst, condition, file_loc,
                            report, cal_db_interface, creation_time=creation_time,
                            timeout=cal_db_timeout)

        if local_output:
            md = save_const_to_h5(db_module, karabo_id, dconst, condition, dconst.data,
                                  file_loc, report, creation_time, out_folder)
            print(f"Calibration constant {const} for {qm} is stored locally in {file_loc}.\n")

 print("Constants parameter conditions are:\n")
 print(f"• memory_cells: {mem_cells}\n• bias_voltage: {bias_voltage}\n"
      f"• acquisition_rate: {acq_rate}\n• gain_setting: {gain_setting}\n"
      f"• gain_mode: {fixed_gain_mode}\n• integration_time: {integration_time}\n"
      f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")\

 step_timer.done_step("Inject calibration constants to the database.")
 ```

 %% Cell type:markdown id: tags:

 ## Retrieving previous calibration constants for comparison.

 %% Cell type:code id: tags:

 ``` python
 # Start retrieving existing constants for comparison
 qm_x_const = [(qm, const) for const in res[qm] for qm in res]


 def retrieve_old_constant(qm, const):

    import time

    st = time.time()

    dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()

    data, mdata = get_from_db(
        karabo_id=karabo_id,
        karabo_da=qm_dict[qm]["karabo_da"],
        constant=dconst,
        condition=condition,
        empty_constant=None,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time-timedelta(seconds=1) if creation_time else None,
        strategy="pdu_prior_in_time",
        verbosity=1,
        timeout=cal_db_timeout
    )

    if mdata is None or data is None:
        timestamp = "Not found"
        filepath = None
        h5path = None
    else:
        timestamp = mdata.calibration_constant_version.begin_at.isoformat()
        filepath = os.path.join(
            mdata.calibration_constant_version.hdf5path,
            mdata.calibration_constant_version.filename
        )
        h5path = mdata.calibration_constant_version.h5path

    return data, timestamp, filepath, h5path, time.time() - st


 old_retrieval_pool = multiprocessing.Pool()
 old_retrieval_res = old_retrieval_pool.starmap_async(
    retrieve_old_constant, qm_x_const
 )
 old_retrieval_pool.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 # This is a temporary workaround to enable dark processing for the first SM AGIPD detector
 if nmods > 1:  # AGIPD1M and AGIPD500K
    mnames = []
    for i in modules:
        qm = module_index_to_qm(i)
        mnames.append(qm)
        display(Markdown(f'## Position of the module {qm} and its ASICs'))
    show_processed_modules(karabo_id, constants=None, mnames=mnames, mode="position")
 ```

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:markdown id: tags:

 ### High Gain ###

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 cell = 3
 gain = 0
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))
 ```

 %% Cell type:markdown id: tags:

 ### Medium Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 1
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))
 ```

 %% Cell type:markdown id: tags:

 ### Low Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 2
 show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))

 step_timer.done_step("Single-Cell Overviews.")
 ```

 %% Cell type:code id: tags:

 ``` python
 if high_res_badpix_3d:
    cols = {
        BadPixels.NOISE_OUT_OF_THRESHOLD: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.GAIN_THRESHOLDING_ERROR: (BadPixels.GAIN_THRESHOLDING_ERROR.name, '#FF40FF40'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD | BadPixels.NOISE_OUT_OF_THRESHOLD: ('OFFSET_OUT_OF_THRESHOLD + NOISE_OUT_OF_THRESHOLD', '#DD00DD80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD | BadPixels.NOISE_OUT_OF_THRESHOLD |
        BadPixels.GAIN_THRESHOLDING_ERROR: ('MIXED', '#BFDF009F')
    }

    display(Markdown("""

    ## Global Bad Pixel Behaviour ##

    The following plots show the results of bad pixel evaluation for all evaluated memory cells.
    Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2.
    This excludes single bad pixels present only in disconnected pixels.
    Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
    Colors encode the bad pixel type, or mixed type.

    """))

    gnames = ['High Gain', 'Medium Gain', 'Low Gain']
    for gain in range(3):
        display(Markdown(f'### {gnames[gain]} ###'))
        for mod, data in badpix_g.items():
            plot_badpix_3d(data[...,gain], cols, title=mod, rebin_fac=1)
            plt.show()
 ```

 %% Cell type:markdown id: tags:


 ## Aggregate values, and per Cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior.

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 create_constant_overview(offset_g, "Offset (ADU)", mem_cells, 4000, 8000,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(noise_g, "Noise (ADU)", mem_cells, 0, 100,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 if not fixed_gain_mode:
    # Plot only three gain threshold maps.
    bp_thresh = OrderedDict()
    for mod, con in badpix_g.items():
        bp_thresh[mod] = np.zeros((con.shape[0], con.shape[1], con.shape[2], 5), dtype=con.dtype)
        bp_thresh[mod][...,:2] = con[...,:2]
        bp_thresh[mod][...,2:] = con

    create_constant_overview(thresholds_g, "Threshold (ADU)", mem_cells, 4000, 10000, 5,
                             badpixels=[bp_thresh, np.nan],
                             gmap=['HG-MG Threshold', 'MG-LG Threshold', 'High gain', 'Medium gain', 'low gain'],
                             marker=['d','d','','','']
                             )

 step_timer.done_step("Aggregate values, and per Cell behaviour.")
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixel_aggregate_g = OrderedDict()
 for m, d in badpix_g.items():
-    bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)
+    bad_pixel_aggregate_g[m] = d.astype(np.bool_).astype(np.float64)
 create_constant_overview(bad_pixel_aggregate_g, "Bad pixel fraction", mem_cells, 0, 0.10, 3)
 ```

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 # now we need the old constants
 old_const = {}
 old_mdata = {}
 old_retrieval_res.wait()

 timings = []
 for (qm, const), (data, timestamp, filepath, h5path, timing) in zip(qm_x_const, old_retrieval_res.get()):
    old_const.setdefault(qm, {})[const] = data
    old_mdata.setdefault(qm, {})[const] = {
        "timestamp": timestamp,
        "filepath": filepath,
        "h5path": h5path
    }
    timings.append(timing)
 print(f"Retrieving old constant took around {np.asarray(timings).mean():.01f} s")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown("The following pre-existing constants are used for comparison:"))
 for qm, consts in old_mdata.items():
    display(Markdown(f"- {qm}"))
    for const in consts:
        display(Markdown(f"    - {const} at {consts[const]['timestamp']}"))
    # saving locations of old constants for summary notebook
    with open(f"{out_folder}/module_metadata_{qm}.yml", "w") as fd:
        yaml.safe_dump(
            {
                "module": qm,
                "pdu": qm_dict[qm]["db_module"],
                "old-constants": old_mdata[qm]
            },
            fd,
        )
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 table = []
 gain_names = ['High', 'Medium', 'Low']
 bits = [
    BadPixels.NOISE_OUT_OF_THRESHOLD,
    BadPixels.OFFSET_OUT_OF_THRESHOLD,
    BadPixels.OFFSET_NOISE_EVAL_ERROR,
    BadPixels.GAIN_THRESHOLDING_ERROR,
 ]

 for qm in badpix_g.keys():
    for gain in range(3):
        l_data = []
        l_data_old = []

        data = np.copy(badpix_g[qm][:,:,:,gain])
        datau32 = data.astype(np.uint32)
        l_data.append(len(datau32[datau32>0].flatten()))
        for bit in bits:
            l_data.append(np.count_nonzero(badpix_g[qm][:,:,:,gain] & bit))

        if old_const[qm]['BadPixelsDark'] is not None:
            dataold = np.copy(old_const[qm]['BadPixelsDark'][:, :, :, gain])
            datau32old = dataold.astype(np.uint32)
            l_data_old.append(len(datau32old[datau32old>0].flatten()))
            for bit in bits:
                l_data_old.append(np.count_nonzero(old_const[qm]['BadPixelsDark'][:, :, :, gain] & bit))

        l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',
                       'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR', 'GAIN_THRESHOLDING_ERROR']

        l_threshold = ['', f'{thresholds_noise_sigma}' f'{thresholds_noise_hard[gain]}',
                       f'{thresholds_offset_sigma}' f'{thresholds_offset_hard[gain]}',
                       '', f'{thresholds_gain_sigma}']

        for i in range(len(l_data)):
            line = [f'{l_data_name[i]}, {gain_names[gain]} gain', l_threshold[i], l_data[i]]

            if old_const[qm]['BadPixelsDark'] is not None:
                line += [l_data_old[i]]
            else:
                line += ['-']

            table.append(line)
        table.append(['', '', '', ''])

 display(Markdown('''
 ### Number of bad pixels

 One pixel can be bad for different reasons, therefore, the sum of all types of bad pixels can be more than the number of all bad pixels.

 '''))
 if len(table)>0:
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                         headers=["Pixel type", "Threshold",
                                                  "New constant", "Old constant"])))

 step_timer.done_step("Create badpixels table.")
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 if fixed_gain_mode:
    constants = ['Offset', 'Noise']
 else:
    constants = ['Offset', 'Noise', 'ThresholdsDark']

 constants_x_qms = list(itertools.product(constants, res.keys()))


 def compute_table(const, qm):
    if const == 'ThresholdsDark':
        table = [['','HG-MG threshold', 'HG-MG threshold', 'MG-LG threshold', 'MG-LG threshold']]
    else:
        table = [['','High gain', 'High gain', 'Medium gain', 'Medium gain', 'Low gain', 'Low gain']]

    compare_with_old_constant = old_const[qm][const] is not None and \
        old_const[qm]['BadPixelsDark'] is not None

    data = np.copy(res[qm][const])

    if const == 'ThresholdsDark':
        data[...,0][res[qm]['BadPixelsDark'][...,0]>0] = np.nan
        data[...,1][res[qm]['BadPixelsDark'][...,1]>0] = np.nan
    else:
        data[res[qm]['BadPixelsDark']>0] = np.nan

    if compare_with_old_constant:
        data_old = np.copy(old_const[qm][const])
        if const == 'ThresholdsDark':
            data_old[...,0][old_const[qm]['BadPixelsDark'][...,0]>0] = np.nan
            data_old[...,1][old_const[qm]['BadPixelsDark'][...,1]>0] = np.nan
        else:
            data_old[old_const[qm]['BadPixelsDark']>0] = np.nan

    f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
    n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

    def compute_row(i):
        line = [n_list[i]]
        for gain in range(3):
            # Compare only 3 threshold gain-maps
            if gain == 2 and const == 'ThresholdsDark':
                continue
            stat_measure = f_list[i](data[...,gain])
            line.append(f"{stat_measure:6.1f}")
            if compare_with_old_constant:
                old_stat_measure = f_list[i](data_old[...,gain])
                line.append(f"{old_stat_measure:6.1f}")
            else:
                line.append("-")
        return line


    with multiprocessing.pool.ThreadPool(processes=multiprocessing.cpu_count() // len(constants_x_qms)) as pool:
        rows = pool.map(compute_row, range(len(f_list)))

    table.extend(rows)

    return table


 with multiprocessing.Pool(processes=len(constants_x_qms)) as pool:
    tables = pool.starmap(compute_table, constants_x_qms)

 for (const, qm), table in zip(constants_x_qms, tables):
    display(Markdown(f"### {qm}: {const} [ADU], good pixels only"))
    display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))

 step_timer.done_step("Computing comparison tables.")
 ```

--- a/notebooks/AGIPD/Chracterize_AGIPD_Gain_PC_Summary.ipynb
+++ b/notebooks/AGIPD/Chracterize_AGIPD_Gain_PC_Summary.ipynb
 %% Cell type:markdown id: tags:

 # Pulse Capacitor Characterisation Summary

 This notebook is used as a dependency notebook for a pulse capacitor characterisation to provide summary for all modules of the AGIPD.

 %% Cell type:code id: tags:

 ``` python
-in_folder = "/gpfs/exfel/exp/HED/202430/p900438/raw/" # path to input data, required
-out_folder = "/gpfs/exfel/exp/HED/202430/p900438/scratch/rovensky/pc/" # path to output to, required
+in_folder = "/gpfs/exfel/exp/SPB/202431/p900461/raw/" # path to input data, required
+out_folder = "/gpfs/exfel/exp/SPB/202431/p900461/usr/PC/352cells1.1MHz_gs0_20clk/" # path to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 runs = [] # runs to use, required, range allowed

-karabo_id_control = "HED_DET_AGIPD65K1"  # karabo-id for the control device e.g. "MID_EXP_AGIPD1M1", or "SPB_IRU_AGIPD1M1"
-karabo_id = "HED_DET_AGIPD65K1"
+karabo_id_control = "SPB_IRU_AGIPD1M1"  # karabo-id for the control device e.g. "MID_EXP_AGIPD1M1", or "SPB_IRU_AGIPD1M1"
+karabo_id = "SPB_DET_AGIPD1M-1"
 ctrl_source_template = '{}/MDL/FPGA_COMP' # path to control information

 creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
 cal_db_timeout = 3000000 # timeout on caldb requests"
 cal_db_interface = "tcp://max-exfl-cal001:8015#8045"
 db_output = False

 bias_voltage = -1  # detector bias voltage, negative values for auto-detection.
 mem_cells = -1  # number of memory cells used, negative values for auto-detection.
 acq_rate = -1.  # the detector acquisition rate, negative values for auto-detection.
 gain_setting = -1  # gain setting can have value 0 or 1, negative values for auto-detection.
 integration_time = -1  # integration time, negative values for auto-detection.
 ```

 %% Cell type:code id: tags:

 ``` python
 import warnings
 warnings.filterwarnings('ignore')

 from pathlib import Path
 from dateutil import parser
 from datetime import timedelta

 import h5py
 import matplotlib.pyplot as plt
 import matplotlib.gridspec as gridspec
 import matplotlib.cm as cm
 import numpy as np
 import tabulate
 import multiprocessing

 from cal_tools.agipdlib import AgipdCtrl
 from cal_tools.ana_tools import get_range
 from cal_tools.tools import (
    calcat_creation_time,
    module_index_to_qm,
    get_from_db,
    get_pdu_from_db,
    get_report,
    send_to_db
 )
 from cal_tools.plotting import agipd_single_module_geometry

 from extra_data import RunDirectory
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from IPython.display import Latex, display
 from XFELDetAna.plotting.simpleplot import simplePlot

 from iCalibrationDB import Conditions, Constants

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate creation time
 creation_time = calcat_creation_time(in_folder, runs[0], creation_time)
 offset = parser.parse(creation_date_offset)
 delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
 creation_time += delta
 print(f"Creation time: {creation_time}\n")

 # Get operation conditions
 ctrl_source = ctrl_source_template.format(karabo_id_control)
 run_folder = f'{in_folder}/r{runs[0]:04d}/'
 raw_dc = RunDirectory(run_folder)
 instrument_src_mod = list(raw_dc.detector_sources)[0]

 agipd_cond = AgipdCtrl(
    run_dc=raw_dc,
    image_src=instrument_src_mod,
    ctrl_src=ctrl_source,
    raise_error=False,  # to be able to process very old data without mosetting value
 )
 if mem_cells == -1:
    mem_cells = agipd_cond.get_num_cells()
 if mem_cells is None:
    raise ValueError(f"No raw images found in {run_folder}")
 if acq_rate == -1.:
    acq_rate = agipd_cond.get_acq_rate()
 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == -1:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()

 # Evaluate detector instance for mapping
 instance = karabo_id_control.split("_")[-1]
 agipd_instances = {'AGIPD65K1': [1, agipd_single_module_geometry()],
                   'AGIPD500K2G': [8, AGIPD_500K2GGeometry.from_origin()],
                   'AGIPD1M1': [16, AGIPD_1MGeometry.from_quad_positions(quad_pos=[(-525, 625),
                                                                                   (-550, -10),
                                                                                   (520, -160),
                                                                                   (542.5, 475),
                                                                                  ])]
                  }

 try:
    nmods = agipd_instances[instance][0]
    geom =  agipd_instances[instance][1]
 except KeyError as ke:
    print('The provided AGIPD instance is not recognised. Available instances are: \
    AGIPD65K1,\nAGIPD500K2G,\nAGIPD1M1')

 print(f"Using {creation_time} as creation time\n")
 print(f"Operating conditions are:\n• Bias voltage: {bias_voltage}\n• Memory cells: {mem_cells}\n"
      f"• Acquisition rate: {acq_rate}\n• Gain setting: {gain_setting}\n• Integration time: {integration_time}\n"
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # ml - high gain slope
 # bl - high gain intercept
 # devl - absolute relative deviation from linearity for high gain
 # mh - medium gain slope
 # bh - medium gain intercept
 # oh, ch, ah - parameters of hook function fit to medium gain (only if requested)
 # devh - absolute relative deviation from linearity for linear part of medium gain

 keys = ['ml', 'bl', 'mh', 'bh', 'BadPixelsPC']
 keys_file = ["ml", "bl", "devl", "mh", "bh", "oh", "ch", "ah", "devh"]

 fit_data = {}
 bad_pixels = {}
 modules = []
 karabo_da = []
 constants_files = []

 out_folder = Path(out_folder)
 constants_files = sorted(out_folder.glob('*.h5'),
                         key=lambda f: (len(f.stem), f.stem))

 for f in constants_files:
    mod = int(f.stem.split("_")[-1])
    qm = module_index_to_qm(mod)
    fit_data[mod] = {}
    with h5py.File(f, 'r') as hf:
        bad_pixels[mod] = hf[f'/{qm}/BadPixelsPC/0/data'][()]
        for key in keys_file:
            fit_data[mod][key]= hf[f'/{qm}/{key}/0/data'][()]

    modules.append(mod)
    karabo_da.append(f"AGIPD{mod:02d}")
    print(f'Data available for AGIPD{mod:02d}')
 ```

 %% Cell type:code id: tags:

 ``` python
 def slope_dict_to_arr(d):
    key_to_index = {
                    "ml": 0,
                    "bl": 1,
                    "devl": 2,
                    "mh": 3,
                    "bh": 4,
                    "oh": 5,
                    "ch": 6,
                    "ah": 7,
                    "devh": 8,
                    "tresh": 9
    }

    arr = np.zeros([11]+list(d["ml"].shape), np.float32)
    for key, item in d.items():
        if key not in key_to_index:
            continue
        arr[key_to_index[key],...] = item

    return arr
 ```

 %% Cell type:code id: tags:

 ``` python
 # set the operating condition
 condition = Conditions.Dark.AGIPD(memory_cells=mem_cells,
                                  bias_voltage=bias_voltage,
                                  acquisition_rate=acq_rate,
                                  gain_setting=gain_setting,
                                  integration_time=integration_time)

 db_modules = get_pdu_from_db(karabo_id, karabo_da, Constants.AGIPD.SlopesPC(),
                             condition, cal_db_interface,
                             snapshot_at=creation_time)
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = proposal + ' ' + ' '.join(list(map(str,runs)))

 report = get_report(metadata_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 md = None

 if db_output:
    for mod, pdu in zip(modules, db_modules):
        for const in ["SlopesPC", "BadPixelsPC"]:

            dbconst = getattr(Constants.AGIPD, const)()

            if const == "SlopesPC":
                dbconst.data = slope_dict_to_arr(fit_data[mod])
            else:
                dbconst.data = bad_pixels[mod]


            md = send_to_db(pdu, karabo_id, dbconst, condition,
                            file_loc, report, cal_db_interface,
                            creation_time=creation_time,
                           variant=1)

    print("Constants injected with the following conditions:\n")
    print(f"• memory_cells: {mem_cells}\n• bias_voltage: {bias_voltage}\n"
          f"• acquisition_rate: {acq_rate}\n• gain_setting: {gain_setting}\n"
          f"• integration_time: {integration_time}\n"
          f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 else:
    print('Injection to DB not requested.')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Remove keys which won't be used for comparison plots and add BP to the rest of data
 for mod in modules:
    fit_data[mod]['BadPixelsPC'] = bad_pixels[mod]

    for key in keys_file:
        if key not in keys:
            del fit_data[mod][key]

    for key in keys:
        fit_data[mod][key] = fit_data[mod][key].swapaxes(1,2)
 ```

 %% Cell type:code id: tags:

 ``` python
 def retrieve_old_PC(mod):
    dconst = getattr(Constants.AGIPD, 'SlopesPC')()
-    old_PC = get_from_db(karabo_id=karabo_id,
+    old_PC, _ = get_from_db(karabo_id=karabo_id,
            karabo_da=karabo_da[mod],
            constant=dconst,
            condition=condition,
            empty_constant=None,
            cal_db_interface=cal_db_interface,
            creation_time=creation_time-timedelta(seconds=1) if creation_time else None,
            strategy="pdu_prior_in_time",
            verbosity=1,
            timeout=cal_db_timeout)
    return old_PC

 with multiprocessing.Pool(processes=len(modules)) as pool:
    old_PC_consts = pool.map(retrieve_old_PC, range(len(modules)))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create the arrays that will be used for figures.
 # Each array correponds to the data for all processed modules.

 pixel_range = [0,0,512,128]
 const_data = {}
 old_const = {}
-const_order = [0, 1, 3, 4]
+const_order = [0, 1, 3, 4] # this holds the position of slopes and intercepts in PC const array.

 for (key, c) in zip(keys, const_order):
    const_data[key] = np.full((nmods, mem_cells, 512, 128), np.nan)
    old_const[key] = np.full((nmods, mem_cells, 512, 128), np.nan)
    for cnt, mn in enumerate(modules):
        if key in fit_data[mn]:
            const_data[key][cnt,:,pixel_range[0]:pixel_range[2],
                               pixel_range[1]:pixel_range[3]] = fit_data[mn][key]
-            if np.any(old_PC_consts[0][0]):
+            if np.any(old_PC_consts[cnt]):
                old_const[key][cnt,:,pixel_range[0]:pixel_range[2],
-                               pixel_range[1]:pixel_range[3]] = old_PC_consts[cnt][0][c].swapaxes(1,2)
+                               pixel_range[1]:pixel_range[3]] = old_PC_consts[cnt][c].swapaxes(1,2)

 const_data['BadPixelsPC'] = np.full((nmods, mem_cells, 512, 128), np.nan)
 for cnt, mn in enumerate(modules):
    const_data['BadPixelsPC'][cnt,:,pixel_range[0]:pixel_range[2],
                              pixel_range[1]:pixel_range[3]] = fit_data[mn]['BadPixelsPC']
 ```

 %% Cell type:markdown id: tags:

 ## Summary across pixels ##

 %% Cell type:code id: tags:

 ``` python
 gain_data = {'HG': {},
             'MG': {},
             'BP': {}
            }

 old_gain_data = {'HG': {},
                 'MG': {}
                }

 for key in ['ml', 'bl']:
    gain_data['HG'][key] = const_data[key]
    old_gain_data['HG'][key] = old_const[key]
 for key in ['mh', 'bh']:
    gain_data['MG'][key] = const_data[key]
    old_gain_data['MG'][key] = old_const[key]
 gain_data['BP']['BadPixelsPC'] = const_data['BadPixelsPC']
 ```

 %% Cell type:code id: tags:

 ``` python
 def plot_definition(data, g, key):
    titel = ['Difference to previous', 'Percentage difference']
    gs = gridspec.GridSpec(1, 2)
    fig = plt.figure(figsize=(15, 7))
    plt.suptitle(f'{g}', fontsize=16)

    for pos in range(0,2):
        vmin, vmax = get_range(data[pos], 2)
        vmax = max(vmax, abs(vmin))
        ax = fig.add_subplot(gs[0, pos])
        ticks = [-1, 0, 1] if np.isnan(vmin) else [vmin, (vmin+vmax)/2, vmax]
        geom.plot_data_fast(data[pos],
                                vmin=vmin, vmax=vmax, ax=ax, cmap="RdBu", figsize=(13,7),
                                colorbar={'shrink': 1,
                                          'pad': 0.04,
                                          'fraction': 0.1,
                                         })
        colorbar = ax.images[0].colorbar
        colorbar.ax.set_yticklabels(["{:.1f}".format(tk) for tk in colorbar.get_ticks()])
        if pos == 1:
            colorbar.set_label('%')
        ax.set_title(f"{titel[pos]}: {key}", fontsize=14)
        ax.set_xlabel("Columns", fontsize=13)
        ax.set_ylabel("Rows", fontsize=13)

 def plot_diff_consts(old_const, new_const, g, ratio=False):
    if ratio:
        old_data = old_const['HG']['ml'] / old_const['MG']['mh']
        new_data = new_const['HG']['ml'] / new_const['MG']['mh']
        data1 = np.nanmean((new_data - old_data), axis=1)
        data2 = np.nanmean((new_data - old_data)/old_data*100, axis=1)
        data = [data1, data2]
        key ='Slopes ratio HG/MG'
        plot_definition(data, g, key)
    else:
        for i, key in enumerate(old_const[g].keys()):
            data1 = np.nanmean((new_const[g][key] - old_const[g][key]), axis=1)
            data2 = np.nanmean((new_const[g][key] - old_const[g][key])/old_const[g][key]*100, axis=1)
            data = [data1, data2]
            plot_definition(data, g, key)

 ```

 %% Cell type:code id: tags:

 ``` python
 for gain in old_gain_data.keys():
    plot_diff_consts(old_gain_data, gain_data, gain)
 ```

 %% Cell type:code id: tags:

 ``` python
 plot_diff_consts(old_gain_data, gain_data, 'Ratio HG/MG', ratio=True)
 ```

 %% Cell type:code id: tags:

 ``` python
 g_label = ['High gain', 'Medium gain', 'Bad pixels PC']
 for idx, g in enumerate(gain_data.keys()):
    gs = gridspec.GridSpec(1, 2)
    fig = plt.figure(figsize=(15, 7))
    plt.suptitle(f'{g_label[idx]}', fontsize=16)

    for i, key in enumerate(gain_data[g].keys()):
        if key is 'BadPixelsPC':
            data = np.nanmean(gain_data[g][key]>0, axis=1)
            vmin, vmax = (0,1)
            ax = fig.add_subplot(gs[0, :])
            ticks = [0, 0.5, 1]

        else:
            data = np.nanmean(gain_data[g][key], axis=1)
            vmin, vmax = get_range(data, 5)
            ax = fig.add_subplot(gs[0, i])
        geom.plot_data_fast(data,
                                vmin=vmin, vmax=vmax, ax=ax, cmap="jet", figsize=(13,7),
                                colorbar={'shrink': 1,
                                          'pad': 0.04,
                                          'fraction': 0.1,

                                         })
        colorbar = ax.images[0].colorbar
        ax.set_title(key, fontsize=14)
        ax.set_xlabel('Columns', fontsize=13)
        ax.set_ylabel('Rows', fontsize=13)

    plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Summary across cells ##

 Good pixels only.

 %% Cell type:code id: tags:

 ``` python
 ratio = gain_data['HG']['ml'] / gain_data['MG']['mh']

 fig = plt.figure(figsize=(7, 7))
 ax = fig.add_subplot(111)
 data = np.nanmean(ratio, axis=1)
 vmin, vmax = get_range(data, 5)
 ax = geom.plot_data_fast(data,
                                vmin=vmin, vmax=vmax, ax=ax, cmap="jet", figsize=(6,7),
                                colorbar={'shrink': 1,
                                          'pad': 0.04,
                                          'fraction': 0.1
                                         })
 colorbar = ax.images[0].colorbar
 colorbar.set_label('HG slope / MG slope', fontsize=13)
 ax.set_title('High/Medium Gain Slope Ratio', fontsize=14)
 ax.set_xlabel('Columns', fontsize=13)
 ax.set_ylabel('Rows', fontsize=13)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 for idx, g in enumerate(gain_data.keys()):

    for key in gain_data[g].keys():
        data = np.copy(gain_data[g][key])
        if key=='BadPixelsPC':
            data = data>0
        else:
            data[gain_data['BP']['BadPixelsPC']>0] = np.nan

        d = []
-        for i in range(nmods):
+        for i, m in enumerate(modules):
            d.append({'x': np.arange(data[i].shape[0]),
                      'y': np.nanmean(data[i], axis=(1,2)),
                      'drawstyle': 'steps-pre',
-                      'label': f'{modules[i]}',
+                      'label': f'{m}',
                      'linewidth': 2,
                      'linestyle': '--' if i>7 else '-'
                      })

        fig = plt.figure(figsize=(12, 6))
        plt.suptitle(f'{g_label[idx]} - {key}', fontsize=16)
        ax = fig.add_subplot(111)

        _ = simplePlot(d, xrange=(-12, 510),
                            x_label='Memory Cell ID',
                            y_label=key,
                            use_axis=ax,
                            legend='top-left-frame-ncol8',)
        ylim = ax.get_ylim()
        ax.set_ylim(ylim[0], ylim[1] + np.abs(ylim[1]-ylim[0])*0.2)
        ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 d = []
-for i in range(nmods):
+for i, m in enumerate(modules):
    d.append({'x': np.arange(ratio[i].shape[0]),
              'y': np.nanmedian(ratio[i], axis=(1,2)),
              'drawstyle': 'steps-pre',
-              'label': f'{modules[i]}',
+              'label': f'{m}',
              'linewidth': 2,
              'linestyle': '--' if i>7 else '-'
              })

 fig = plt.figure(figsize=(12, 6))
 plt.suptitle('High/Medium Gain Slope Ratio', fontsize=16)
 ax = fig.add_subplot(111)

 _ = simplePlot(d, xrange=(-12, 510),
                    x_label='Memory Cell ID',
                    y_label='Gain ratio ml/mh',
                    use_axis=ax,
                    legend='top-left-frame-ncol8',)
 ylim = ax.get_ylim()
 ax.set_ylim(ylim[0], ylim[1] + np.abs(ylim[1]-ylim[0])*0.2)
 ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 table = []
 ratio_old = old_gain_data['HG']['ml'] / old_gain_data['MG']['mh']
 for cnt, mod in enumerate(modules):

    table.append((mod,
                  f"{np.nanmedian(ratio[cnt]):0.1f} +- {np.nanstd(ratio[cnt]):0.2f}",
                  f"{np.nanmedian(ratio_old[cnt]):0.1f} +- {np.nanstd(ratio_old[cnt]):0.2f}",
                  f"{np.nanmedian(gain_data['BP']['BadPixelsPC'][cnt]>0)*100:0.1f} ({np.nansum(gain_data['BP']['BadPixelsPC'][cnt]>0)})"
                ))

 all_HM = []
 all_HM_old = []
 for mod in range(len(modules)):
    all_HM.extend(ratio[mod])
    all_HM_old.extend(ratio_old[mod])
 all_HM = np.array(all_HM)
 all_HM_old = np.array(all_HM_old)

 all_MSK = np.array([list(msk) for msk in gain_data['BP']['BadPixelsPC']])

 table.append(('overall',
              f"{np.nanmean(all_HM):0.1f} +- {np.nanstd(all_HM):0.2f}",
              f"{np.nanmean(all_HM_old):0.1f} +- {np.nanstd(all_HM_old):0.2f}",
              f"{np.nanmean(all_MSK>0)*100:0.1f} ({np.nansum(all_MSK>0)})"
            ))

 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Module",
                                              "HG/MG Ratio",
                                              "Previous HG/MG Ratio",
                                              "Bad pixels [%(Count)]"])))
 ```

 %% Cell type:markdown id: tags:

 ## Summary high-medium gain ratio (good pixels only) + histograms

 %% Cell type:code id: tags:

 ``` python
 colors = cm.rainbow(np.linspace(0, 1, nmods))

 gs = gridspec.GridSpec(1, 2)
 fig = plt.figure(figsize=(17, 8))


 ratio[gain_data['BP']['BadPixelsPC'] > 0] = np.nan
 data = np.nanmean(ratio, axis=1)
 vmin, vmax = get_range(data, 5)
 ax = fig.add_subplot(gs[0, 0])
 geom.plot_data_fast(data,
                        vmin=vmin, vmax=vmax, ax=ax, cmap="jet", figsize=(12.5,7),
                        colorbar={'shrink': 1,
                                  'pad': 0.04,
                                  'fraction': 0.1
                                 })
 colorbar = ax.images[0].colorbar
 colorbar.set_label('HG/MG', fontsize=12)
 ax.set_xlabel('Columns', fontsize=12)
 ax.set_ylabel('Rows', fontsize=12)

 ax = fig.add_subplot(gs[0,1])
 for cnt, mod in enumerate(modules):
    h, e = np.histogram(ratio[cnt].flatten(), bins=100, range=(vmin, vmax))
    ax.plot(e[:-1], h, color=colors[cnt],linewidth=2, label=f'{mod}', alpha=0.8)
    ax.set_xlabel('High/Medium Gain Ratio', fontsize=13)
    ax.set_ylabel('Counts', fontsize=13)
    plt.ticklabel_format(axis='y', style='sci', scilimits=(0,0))
 ax.grid()
 ax.legend()
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 # Pulse Capacitor Characterisation Summary

 This notebook is used as a dependency notebook for a pulse capacitor characterisation to provide summary for all modules of the AGIPD.

 %% Cell type:code id: tags:

 ``` python
-in_folder = "/gpfs/exfel/exp/HED/202430/p900438/raw/" # path to input data, required
-out_folder = "/gpfs/exfel/exp/HED/202430/p900438/scratch/rovensky/pc/" # path to output to, required
+in_folder = "/gpfs/exfel/exp/SPB/202431/p900461/raw/" # path to input data, required
+out_folder = "/gpfs/exfel/exp/SPB/202431/p900461/usr/PC/352cells1.1MHz_gs0_20clk/" # path to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 runs = [] # runs to use, required, range allowed

-karabo_id_control = "HED_DET_AGIPD65K1"  # karabo-id for the control device e.g. "MID_EXP_AGIPD1M1", or "SPB_IRU_AGIPD1M1"
-karabo_id = "HED_DET_AGIPD65K1"
+karabo_id_control = "SPB_IRU_AGIPD1M1"  # karabo-id for the control device e.g. "MID_EXP_AGIPD1M1", or "SPB_IRU_AGIPD1M1"
+karabo_id = "SPB_DET_AGIPD1M-1"
 ctrl_source_template = '{}/MDL/FPGA_COMP' # path to control information

 creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
 cal_db_timeout = 3000000 # timeout on caldb requests"
 cal_db_interface = "tcp://max-exfl-cal001:8015#8045"
 db_output = False

 bias_voltage = -1  # detector bias voltage, negative values for auto-detection.
 mem_cells = -1  # number of memory cells used, negative values for auto-detection.
 acq_rate = -1.  # the detector acquisition rate, negative values for auto-detection.
 gain_setting = -1  # gain setting can have value 0 or 1, negative values for auto-detection.
 integration_time = -1  # integration time, negative values for auto-detection.
 ```

 %% Cell type:code id: tags:

 ``` python
 import warnings
 warnings.filterwarnings('ignore')

 from pathlib import Path
 from dateutil import parser
 from datetime import timedelta

 import h5py
 import matplotlib.pyplot as plt
 import matplotlib.gridspec as gridspec
 import matplotlib.cm as cm
 import numpy as np
 import tabulate
 import multiprocessing

 from cal_tools.agipdlib import AgipdCtrl
 from cal_tools.ana_tools import get_range
 from cal_tools.tools import (
    calcat_creation_time,
    module_index_to_qm,
    get_from_db,
    get_pdu_from_db,
    get_report,
    send_to_db
 )
 from cal_tools.plotting import agipd_single_module_geometry

 from extra_data import RunDirectory
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from IPython.display import Latex, display
 from XFELDetAna.plotting.simpleplot import simplePlot

 from iCalibrationDB import Conditions, Constants

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate creation time
 creation_time = calcat_creation_time(in_folder, runs[0], creation_time)
 offset = parser.parse(creation_date_offset)
 delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
 creation_time += delta
 print(f"Creation time: {creation_time}\n")

 # Get operation conditions
 ctrl_source = ctrl_source_template.format(karabo_id_control)
 run_folder = f'{in_folder}/r{runs[0]:04d}/'
 raw_dc = RunDirectory(run_folder)
 instrument_src_mod = list(raw_dc.detector_sources)[0]

 agipd_cond = AgipdCtrl(
    run_dc=raw_dc,
    image_src=instrument_src_mod,
    ctrl_src=ctrl_source,
    raise_error=False,  # to be able to process very old data without mosetting value
 )
 if mem_cells == -1:
    mem_cells = agipd_cond.get_num_cells()
 if mem_cells is None:
    raise ValueError(f"No raw images found in {run_folder}")
 if acq_rate == -1.:
    acq_rate = agipd_cond.get_acq_rate()
 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == -1:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()

 # Evaluate detector instance for mapping
 instance = karabo_id_control.split("_")[-1]
 agipd_instances = {'AGIPD65K1': [1, agipd_single_module_geometry()],
                   'AGIPD500K2G': [8, AGIPD_500K2GGeometry.from_origin()],
                   'AGIPD1M1': [16, AGIPD_1MGeometry.from_quad_positions(quad_pos=[(-525, 625),
                                                                                   (-550, -10),
                                                                                   (520, -160),
                                                                                   (542.5, 475),
                                                                                  ])]
                  }

 try:
    nmods = agipd_instances[instance][0]
    geom =  agipd_instances[instance][1]
 except KeyError as ke:
    print('The provided AGIPD instance is not recognised. Available instances are: \
    AGIPD65K1,\nAGIPD500K2G,\nAGIPD1M1')

 print(f"Using {creation_time} as creation time\n")
 print(f"Operating conditions are:\n• Bias voltage: {bias_voltage}\n• Memory cells: {mem_cells}\n"
      f"• Acquisition rate: {acq_rate}\n• Gain setting: {gain_setting}\n• Integration time: {integration_time}\n"
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # ml - high gain slope
 # bl - high gain intercept
 # devl - absolute relative deviation from linearity for high gain
 # mh - medium gain slope
 # bh - medium gain intercept
 # oh, ch, ah - parameters of hook function fit to medium gain (only if requested)
 # devh - absolute relative deviation from linearity for linear part of medium gain

 keys = ['ml', 'bl', 'mh', 'bh', 'BadPixelsPC']
 keys_file = ["ml", "bl", "devl", "mh", "bh", "oh", "ch", "ah", "devh"]

 fit_data = {}
 bad_pixels = {}
 modules = []
 karabo_da = []
 constants_files = []

 out_folder = Path(out_folder)
 constants_files = sorted(out_folder.glob('*.h5'),
                         key=lambda f: (len(f.stem), f.stem))

 for f in constants_files:
    mod = int(f.stem.split("_")[-1])
    qm = module_index_to_qm(mod)
    fit_data[mod] = {}
    with h5py.File(f, 'r') as hf:
        bad_pixels[mod] = hf[f'/{qm}/BadPixelsPC/0/data'][()]
        for key in keys_file:
            fit_data[mod][key]= hf[f'/{qm}/{key}/0/data'][()]

    modules.append(mod)
    karabo_da.append(f"AGIPD{mod:02d}")
    print(f'Data available for AGIPD{mod:02d}')
 ```

 %% Cell type:code id: tags:

 ``` python
 def slope_dict_to_arr(d):
    key_to_index = {
                    "ml": 0,
                    "bl": 1,
                    "devl": 2,
                    "mh": 3,
                    "bh": 4,
                    "oh": 5,
                    "ch": 6,
                    "ah": 7,
                    "devh": 8,
                    "tresh": 9
    }

    arr = np.zeros([11]+list(d["ml"].shape), np.float32)
    for key, item in d.items():
        if key not in key_to_index:
            continue
        arr[key_to_index[key],...] = item

    return arr
 ```

 %% Cell type:code id: tags:

 ``` python
 # set the operating condition
 condition = Conditions.Dark.AGIPD(memory_cells=mem_cells,
                                  bias_voltage=bias_voltage,
                                  acquisition_rate=acq_rate,
                                  gain_setting=gain_setting,
                                  integration_time=integration_time)

 db_modules = get_pdu_from_db(karabo_id, karabo_da, Constants.AGIPD.SlopesPC(),
                             condition, cal_db_interface,
                             snapshot_at=creation_time)
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = proposal + ' ' + ' '.join(list(map(str,runs)))

 report = get_report(metadata_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 md = None

 if db_output:
    for mod, pdu in zip(modules, db_modules):
        for const in ["SlopesPC", "BadPixelsPC"]:

            dbconst = getattr(Constants.AGIPD, const)()

            if const == "SlopesPC":
                dbconst.data = slope_dict_to_arr(fit_data[mod])
            else:
                dbconst.data = bad_pixels[mod]


            md = send_to_db(pdu, karabo_id, dbconst, condition,
                            file_loc, report, cal_db_interface,
                            creation_time=creation_time,
                           variant=1)

    print("Constants injected with the following conditions:\n")
    print(f"• memory_cells: {mem_cells}\n• bias_voltage: {bias_voltage}\n"
          f"• acquisition_rate: {acq_rate}\n• gain_setting: {gain_setting}\n"
          f"• integration_time: {integration_time}\n"
          f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 else:
    print('Injection to DB not requested.')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Remove keys which won't be used for comparison plots and add BP to the rest of data
 for mod in modules:
    fit_data[mod]['BadPixelsPC'] = bad_pixels[mod]

    for key in keys_file:
        if key not in keys:
            del fit_data[mod][key]

    for key in keys:
        fit_data[mod][key] = fit_data[mod][key].swapaxes(1,2)
 ```

 %% Cell type:code id: tags:

 ``` python
 def retrieve_old_PC(mod):
    dconst = getattr(Constants.AGIPD, 'SlopesPC')()
-    old_PC = get_from_db(karabo_id=karabo_id,
+    old_PC, _ = get_from_db(karabo_id=karabo_id,
            karabo_da=karabo_da[mod],
            constant=dconst,
            condition=condition,
            empty_constant=None,
            cal_db_interface=cal_db_interface,
            creation_time=creation_time-timedelta(seconds=1) if creation_time else None,
            strategy="pdu_prior_in_time",
            verbosity=1,
            timeout=cal_db_timeout)
    return old_PC

 with multiprocessing.Pool(processes=len(modules)) as pool:
    old_PC_consts = pool.map(retrieve_old_PC, range(len(modules)))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create the arrays that will be used for figures.
 # Each array correponds to the data for all processed modules.

 pixel_range = [0,0,512,128]
 const_data = {}
 old_const = {}
-const_order = [0, 1, 3, 4]
+const_order = [0, 1, 3, 4] # this holds the position of slopes and intercepts in PC const array.

 for (key, c) in zip(keys, const_order):
    const_data[key] = np.full((nmods, mem_cells, 512, 128), np.nan)
    old_const[key] = np.full((nmods, mem_cells, 512, 128), np.nan)
    for cnt, mn in enumerate(modules):
        if key in fit_data[mn]:
            const_data[key][cnt,:,pixel_range[0]:pixel_range[2],
                               pixel_range[1]:pixel_range[3]] = fit_data[mn][key]
-            if np.any(old_PC_consts[0][0]):
+            if np.any(old_PC_consts[cnt]):
                old_const[key][cnt,:,pixel_range[0]:pixel_range[2],
-                               pixel_range[1]:pixel_range[3]] = old_PC_consts[cnt][0][c].swapaxes(1,2)
+                               pixel_range[1]:pixel_range[3]] = old_PC_consts[cnt][c].swapaxes(1,2)

 const_data['BadPixelsPC'] = np.full((nmods, mem_cells, 512, 128), np.nan)
 for cnt, mn in enumerate(modules):
    const_data['BadPixelsPC'][cnt,:,pixel_range[0]:pixel_range[2],
                              pixel_range[1]:pixel_range[3]] = fit_data[mn]['BadPixelsPC']
 ```

 %% Cell type:markdown id: tags:

 ## Summary across pixels ##

 %% Cell type:code id: tags:

 ``` python
 gain_data = {'HG': {},
             'MG': {},
             'BP': {}
            }

 old_gain_data = {'HG': {},
                 'MG': {}
                }

 for key in ['ml', 'bl']:
    gain_data['HG'][key] = const_data[key]
    old_gain_data['HG'][key] = old_const[key]
 for key in ['mh', 'bh']:
    gain_data['MG'][key] = const_data[key]
    old_gain_data['MG'][key] = old_const[key]
 gain_data['BP']['BadPixelsPC'] = const_data['BadPixelsPC']
 ```

 %% Cell type:code id: tags:

 ``` python
 def plot_definition(data, g, key):
    titel = ['Difference to previous', 'Percentage difference']
    gs = gridspec.GridSpec(1, 2)
    fig = plt.figure(figsize=(15, 7))
    plt.suptitle(f'{g}', fontsize=16)

    for pos in range(0,2):
        vmin, vmax = get_range(data[pos], 2)
        vmax = max(vmax, abs(vmin))
        ax = fig.add_subplot(gs[0, pos])
        ticks = [-1, 0, 1] if np.isnan(vmin) else [vmin, (vmin+vmax)/2, vmax]
        geom.plot_data_fast(data[pos],
                                vmin=vmin, vmax=vmax, ax=ax, cmap="RdBu", figsize=(13,7),
                                colorbar={'shrink': 1,
                                          'pad': 0.04,
                                          'fraction': 0.1,
                                         })
        colorbar = ax.images[0].colorbar
        colorbar.ax.set_yticklabels(["{:.1f}".format(tk) for tk in colorbar.get_ticks()])
        if pos == 1:
            colorbar.set_label('%')
        ax.set_title(f"{titel[pos]}: {key}", fontsize=14)
        ax.set_xlabel("Columns", fontsize=13)
        ax.set_ylabel("Rows", fontsize=13)

 def plot_diff_consts(old_const, new_const, g, ratio=False):
    if ratio:
        old_data = old_const['HG']['ml'] / old_const['MG']['mh']
        new_data = new_const['HG']['ml'] / new_const['MG']['mh']
        data1 = np.nanmean((new_data - old_data), axis=1)
        data2 = np.nanmean((new_data - old_data)/old_data*100, axis=1)
        data = [data1, data2]
        key ='Slopes ratio HG/MG'
        plot_definition(data, g, key)
    else:
        for i, key in enumerate(old_const[g].keys()):
            data1 = np.nanmean((new_const[g][key] - old_const[g][key]), axis=1)
            data2 = np.nanmean((new_const[g][key] - old_const[g][key])/old_const[g][key]*100, axis=1)
            data = [data1, data2]
            plot_definition(data, g, key)

 ```

 %% Cell type:code id: tags:

 ``` python
 for gain in old_gain_data.keys():
    plot_diff_consts(old_gain_data, gain_data, gain)
 ```

 %% Cell type:code id: tags:

 ``` python
 plot_diff_consts(old_gain_data, gain_data, 'Ratio HG/MG', ratio=True)
 ```

 %% Cell type:code id: tags:

 ``` python
 g_label = ['High gain', 'Medium gain', 'Bad pixels PC']
 for idx, g in enumerate(gain_data.keys()):
    gs = gridspec.GridSpec(1, 2)
    fig = plt.figure(figsize=(15, 7))
    plt.suptitle(f'{g_label[idx]}', fontsize=16)

    for i, key in enumerate(gain_data[g].keys()):
        if key is 'BadPixelsPC':
            data = np.nanmean(gain_data[g][key]>0, axis=1)
            vmin, vmax = (0,1)
            ax = fig.add_subplot(gs[0, :])
            ticks = [0, 0.5, 1]

        else:
            data = np.nanmean(gain_data[g][key], axis=1)
            vmin, vmax = get_range(data, 5)
            ax = fig.add_subplot(gs[0, i])
        geom.plot_data_fast(data,
                                vmin=vmin, vmax=vmax, ax=ax, cmap="jet", figsize=(13,7),
                                colorbar={'shrink': 1,
                                          'pad': 0.04,
                                          'fraction': 0.1,

                                         })
        colorbar = ax.images[0].colorbar
        ax.set_title(key, fontsize=14)
        ax.set_xlabel('Columns', fontsize=13)
        ax.set_ylabel('Rows', fontsize=13)

    plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Summary across cells ##

 Good pixels only.

 %% Cell type:code id: tags:

 ``` python
 ratio = gain_data['HG']['ml'] / gain_data['MG']['mh']

 fig = plt.figure(figsize=(7, 7))
 ax = fig.add_subplot(111)
 data = np.nanmean(ratio, axis=1)
 vmin, vmax = get_range(data, 5)
 ax = geom.plot_data_fast(data,
                                vmin=vmin, vmax=vmax, ax=ax, cmap="jet", figsize=(6,7),
                                colorbar={'shrink': 1,
                                          'pad': 0.04,
                                          'fraction': 0.1
                                         })
 colorbar = ax.images[0].colorbar
 colorbar.set_label('HG slope / MG slope', fontsize=13)
 ax.set_title('High/Medium Gain Slope Ratio', fontsize=14)
 ax.set_xlabel('Columns', fontsize=13)
 ax.set_ylabel('Rows', fontsize=13)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 for idx, g in enumerate(gain_data.keys()):

    for key in gain_data[g].keys():
        data = np.copy(gain_data[g][key])
        if key=='BadPixelsPC':
            data = data>0
        else:
            data[gain_data['BP']['BadPixelsPC']>0] = np.nan

        d = []
-        for i in range(nmods):
+        for i, m in enumerate(modules):
            d.append({'x': np.arange(data[i].shape[0]),
                      'y': np.nanmean(data[i], axis=(1,2)),
                      'drawstyle': 'steps-pre',
-                      'label': f'{modules[i]}',
+                      'label': f'{m}',
                      'linewidth': 2,
                      'linestyle': '--' if i>7 else '-'
                      })

        fig = plt.figure(figsize=(12, 6))
        plt.suptitle(f'{g_label[idx]} - {key}', fontsize=16)
        ax = fig.add_subplot(111)

        _ = simplePlot(d, xrange=(-12, 510),
                            x_label='Memory Cell ID',
                            y_label=key,
                            use_axis=ax,
                            legend='top-left-frame-ncol8',)
        ylim = ax.get_ylim()
        ax.set_ylim(ylim[0], ylim[1] + np.abs(ylim[1]-ylim[0])*0.2)
        ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 d = []
-for i in range(nmods):
+for i, m in enumerate(modules):
    d.append({'x': np.arange(ratio[i].shape[0]),
              'y': np.nanmedian(ratio[i], axis=(1,2)),
              'drawstyle': 'steps-pre',
-              'label': f'{modules[i]}',
+              'label': f'{m}',
              'linewidth': 2,
              'linestyle': '--' if i>7 else '-'
              })

 fig = plt.figure(figsize=(12, 6))
 plt.suptitle('High/Medium Gain Slope Ratio', fontsize=16)
 ax = fig.add_subplot(111)

 _ = simplePlot(d, xrange=(-12, 510),
                    x_label='Memory Cell ID',
                    y_label='Gain ratio ml/mh',
                    use_axis=ax,
                    legend='top-left-frame-ncol8',)
 ylim = ax.get_ylim()
 ax.set_ylim(ylim[0], ylim[1] + np.abs(ylim[1]-ylim[0])*0.2)
 ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 table = []
 ratio_old = old_gain_data['HG']['ml'] / old_gain_data['MG']['mh']
 for cnt, mod in enumerate(modules):

    table.append((mod,
                  f"{np.nanmedian(ratio[cnt]):0.1f} +- {np.nanstd(ratio[cnt]):0.2f}",
                  f"{np.nanmedian(ratio_old[cnt]):0.1f} +- {np.nanstd(ratio_old[cnt]):0.2f}",
                  f"{np.nanmedian(gain_data['BP']['BadPixelsPC'][cnt]>0)*100:0.1f} ({np.nansum(gain_data['BP']['BadPixelsPC'][cnt]>0)})"
                ))

 all_HM = []
 all_HM_old = []
 for mod in range(len(modules)):
    all_HM.extend(ratio[mod])
    all_HM_old.extend(ratio_old[mod])
 all_HM = np.array(all_HM)
 all_HM_old = np.array(all_HM_old)

 all_MSK = np.array([list(msk) for msk in gain_data['BP']['BadPixelsPC']])

 table.append(('overall',
              f"{np.nanmean(all_HM):0.1f} +- {np.nanstd(all_HM):0.2f}",
              f"{np.nanmean(all_HM_old):0.1f} +- {np.nanstd(all_HM_old):0.2f}",
              f"{np.nanmean(all_MSK>0)*100:0.1f} ({np.nansum(all_MSK>0)})"
            ))

 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Module",
                                              "HG/MG Ratio",
                                              "Previous HG/MG Ratio",
                                              "Bad pixels [%(Count)]"])))
 ```

 %% Cell type:markdown id: tags:

 ## Summary high-medium gain ratio (good pixels only) + histograms

 %% Cell type:code id: tags:

 ``` python
 colors = cm.rainbow(np.linspace(0, 1, nmods))

 gs = gridspec.GridSpec(1, 2)
 fig = plt.figure(figsize=(17, 8))


 ratio[gain_data['BP']['BadPixelsPC'] > 0] = np.nan
 data = np.nanmean(ratio, axis=1)
 vmin, vmax = get_range(data, 5)
 ax = fig.add_subplot(gs[0, 0])
 geom.plot_data_fast(data,
                        vmin=vmin, vmax=vmax, ax=ax, cmap="jet", figsize=(12.5,7),
                        colorbar={'shrink': 1,
                                  'pad': 0.04,
                                  'fraction': 0.1
                                 })
 colorbar = ax.images[0].colorbar
 colorbar.set_label('HG/MG', fontsize=12)
 ax.set_xlabel('Columns', fontsize=12)
 ax.set_ylabel('Rows', fontsize=12)

 ax = fig.add_subplot(gs[0,1])
 for cnt, mod in enumerate(modules):
    h, e = np.histogram(ratio[cnt].flatten(), bins=100, range=(vmin, vmax))
    ax.plot(e[:-1], h, color=colors[cnt],linewidth=2, label=f'{mod}', alpha=0.8)
    ax.set_xlabel('High/Medium Gain Ratio', fontsize=13)
    ax.set_ylabel('Counts', fontsize=13)
    plt.ticklabel_format(axis='y', style='sci', scilimits=(0,0))
 ax.grid()
 ax.legend()
 plt.show()
 ```

--- a/notebooks/AGIPD/playground/AGIPD-Gain_Distributions.ipynb
+++ b/notebooks/AGIPD/playground/AGIPD-Gain_Distributions.ipynb
--- a/notebooks/AGIPD/playground/AGIPD_SingleM_test_Dark.ipynb
+++ b/notebooks/AGIPD/playground/AGIPD_SingleM_test_Dark.ipynb
No results found