diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
new file mode 100644
index 0000000000000000000000000000000000000000..03b9eafb7d1ac8151ee55f2849eb2ed7265b237c
--- /dev/null
+++ b/.git-blame-ignore-revs
@@ -0,0 +1,10 @@
+# This file contains info on which commits to ignore for git blame to work
+# correctly, you can use either of these see the 'correct' blame results:
+#
+# - `git blame file.py --ignore-revs-file .git-blame-ignore-revs`
+# - `git config blame.ignoreRevsFile .git-blame-ignore-revs`
+#
+# Second option is a bit better as it'll work on the whole repo all the time
+
+#  fix/pre-commit-whitespace - Whitespace fixes
+e7dfadaf4e189ef0e0f67798e8984695111257e3
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 687b779fc369965d8789ef9f3ebc1c56222cd1d0..c55f4dfec1e2e1ecb2c8f127edebecbc7550b049 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,12 +1,28 @@
-isort:
-  stage: test
+stages:
+  - check
+  - test
+
+checks:
+  stage: check
+  only: [merge_requests]
+  allow_failure: true
   script:
-    - python3 -m pip install --user isort==5.6.4
-    - isort --diff **/*.py && isort -c **/*.py
+    - export PATH=/home/gitlab-runner/.local/bin:$PATH
+    # We'd like to run the pre-commit hooks only on files that are being
+    # modified by this merge request, however
+    # `CI_MERGE_REQUEST_TARGET_BRANCH_SHA` is a 'premium' feature according to
+    # GitLab... so this is a workaround for extracting the hash
+    - export CI_MERGE_REQUEST_TARGET_BRANCH_SHA=$(git ls-remote origin $CI_MERGE_REQUEST_TARGET_BRANCH_NAME | cut -d$'\t' -f1)
+    - export FILES=$(git diff $CI_COMMIT_SHA  $CI_MERGE_REQUEST_TARGET_BRANCH_SHA --name-only | tr '\n' ' ')
+    - python3 -m pip install --user -r requirements.txt
+    - echo "Running pre-commit on diff from  $CI_COMMIT_SHA to $CI_MERGE_REQUEST_TARGET_BRANCH_SHA ($CI_MERGE_REQUEST_TARGET_BRANCH_NAME)"
+    #  Pass list of modified files to pre-commit so that it only checks them
+    - echo $FILES | xargs pre-commit run --color=always --files
 
 pytest:
   stage: test
+  only: [merge_requests]
   script:
     - python3 -m pip install --user -r requirements.txt
-    - python3 -m pip install --user pytest
+    - python3 -m pip install --user 'pytest>=5.4.0' pytest-asyncio testpath
     - pytest -vv tests/test_*
diff --git a/.gitlab/merge_request_templates/default.md b/.gitlab/merge_request_templates/default.md
new file mode 100644
index 0000000000000000000000000000000000000000..a1c7dce90b3e8c8b87de85022b9018d32a9ed4a8
--- /dev/null
+++ b/.gitlab/merge_request_templates/default.md
@@ -0,0 +1,31 @@
+<!--- Provide a general summary of your changes in the Title above.
+      Indicate detector & algorithm if applicable, e.g. [AGIPD] [DARK] add plotting -->
+
+## Description
+<!--- Why is this change required? What problem does it solve?
+      If it fixes an open issue, please link to the issue here. -->
+
+## How Has This Been Tested?
+<!--- Please describe in detail how you tested your changes.
+      Include details of your testing environment, tests ran to see how
+      your change affects other areas of the code, etc. -->
+
+## Relevant Documents (optional)
+<!-- Include any relevant screenshot, elogs, reports, if appropriate. -->
+
+## Types of changes
+<!--- What types of changes does your code introduce? Uncomment all lines that apply: -->
+
+<!-- - Bug fix (non-breaking change which fixes an issue) -->
+<!-- - New feature (non-breaking change which adds functionality) -->
+<!-- - Breaking change (fix or feature that would cause existing functionality to not work as expected) -->
+
+## Checklist:
+<!--- Go over all the following points, and uncomment all lines that apply: -->
+
+<!-- - My code follows the code style of this project. -->
+<!-- - My change requires a change to the documentation. -->
+<!-- - I have updated the documentation accordingly. -->
+
+## Reviewers
+<!--- Tag a minimum of two reviewers -->
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..da0c4977b26c4052c8663682a6efc5a727435354
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,48 @@
+repos:
+  - repo: meta
+    hooks:
+    -   id: identity
+  - repo: https://github.com/nbQA-dev/nbQA
+    rev: 0.3.6
+    hooks:
+    - id: nbqa-isort
+      additional_dependencies: [isort==5.6.4]
+      args: [--nbqa-mutate]
+    - id: nbqa-flake8
+      additional_dependencies: [flake8==3.8.4]
+      args: [--nbqa-mutate]
+  - repo: https://github.com/kynan/nbstripout
+    rev: 0.3.9
+    hooks:
+      - id: nbstripout
+  - repo: https://github.com/pycqa/isort
+    rev: 5.6.4
+    hooks:
+      - id: isort
+  - repo: https://gitlab.com/pycqa/flake8
+    rev: 3.8.4
+    hooks:
+    - id: flake8
+      # If `CI_MERGE_REQUEST_TARGET_BRANCH_SHA` env var is set then this will
+      # run flake8 on the diff from the current commit to the latest commit of
+      # the branch being merged into, otherwise it will run flake8 as it would
+      # usually execute via the pre-commit hook
+      entry: bash -c 'if [ -z ${CI_MERGE_REQUEST_TARGET_BRANCH_SHA} ]; then (flake8 "$@"); else (git diff $CI_MERGE_REQUEST_TARGET_BRANCH_SHA | flake8 --diff); fi' --
+  - repo: https://github.com/myint/rstcheck
+    rev: 3f92957478422df87bd730abde66f089cc1ee19b  # commit where pre-commit support was added
+    hooks:
+    -   id: rstcheck
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v2.3.0
+    hooks:
+    - id: check-added-large-files
+    - id: check-ast
+    - id: check-json
+    - id: check-yaml
+    - id: check-toml
+    - id: end-of-file-fixer
+    - id: trailing-whitespace
+    - id: check-docstring-first
+    - id: check-merge-conflict
+    - id: mixed-line-ending
+      args: [--fix=lf]
diff --git a/README.rst b/README.rst
index 6328ee6186d55220d620f7704c1967fbf829ef1a..4a9cbb379f4f12463b5170ce5061f9a1a420bd8b 100644
--- a/README.rst
+++ b/README.rst
@@ -1,146 +1,234 @@
+###################
 Offline Calibration
-===================
+###################
 
 The offline calibration is a package that consists of different services,
 responsible for applying most of the offline calibration and characterization
 for the detectors.
 
-Offline calibration installation
-================================
+.. contents::
 
-It's recommended to install the offline calibration (pycalibration) package
-over maxwell, using anaconda/3 environment.
 
-Installation using Anaconda
----------------------------
+Offline Calibration Installation
+********************************
 
-First you need to load the anaconda/3 environment through::
+It's recommended to install the offline calibration (pycalibration) package over
+maxwell, using anaconda/3 environment.
 
-    1. module load anaconda/3
 
-If installing into other python enviroments, this step can be skipped.
+Installation using python virtual environment - recommended
+===========================================================
 
-Then the package for the offline calibration can be obtained from the git repository::
+1. ``git clone ssh://git@git.xfel.eu:10022/detectors/pycalibration.git && cd pycalibration`` - clone the offline calibration package from EuXFEL GitLab
+2. ``module load anaconda/3`` - load the anaconda/3 environment
+3. ``python3 -m venv .venv`` - create the virtual environment
+4. ``source .venv/bin/activate`` - activate the virtual environment
+5. ``python3 -m pip install --upgrade pip`` - upgrade version of pip
+6. ``python3 -m pip install -r requirements.txt`` - install dependencies
+7. ``python3 -m pip install .`` - install the pycalibration package (add ``-e`` flag for editable development installation)
+8. ``pip install "git+ssh://git@git.xfel.eu:10022/karaboDevices/pyDetLib.git#egg=XFELDetectorAnalysis&subdirectory=lib"``
 
-    2. git clone https://git.xfel.eu/gitlab/detectors/pycalibration.git
+Copy/paste script:
 
+.. code::
 
-You can then install all requirements of this tool chain in your home directory by running::
+  git clone ssh://git@git.xfel.eu:10022/detectors/pycalibration.git
+  cd pycalibration
+  module load anaconda/3
+  python3 -m venv .venv
+  source .venv/bin/activate
+  python3 -m pip install --upgrade pip
+  python3 -m pip install -r requirements.txt
+  python3 -m pip install .  # `-e` flag for editable install
+  python3 -m pip install "git+ssh://git@git.xfel.eu:10022/karaboDevices/pyDetLib.git#egg=XFELDetectorAnalysis&subdirectory=lib/"
 
-    3. pip install -r requirements.txt . --user
 
-in pycalibration's root directory.
+Installation into user home directory
+=====================================
 
-After installation, you should make sure that the home directory is in the PATH environment variable::
+1. ``git clone ssh://git@git.xfel.eu:10022/detectors/pycalibration.git && cd pycalibration`` - clone the offline calibration package from EuXFEL GitLab
+2. ``module load anaconda/3`` - load the anaconda/3 environment. If installing into other python environments, this step can be skipped
+3. ``pip install -r requirements.txt`` - install all requirements of this tool chain in your home directory
+4. ``pip install .`` - install the pycalibration package (add ``-e`` flag for editable development installation)
+5. ``export PATH=$HOME/.local/bin:$PATH`` - make sure that the home directory is in the PATH environment variable
 
-    4. export PATH=$HOME/.local/bin:$PATH
+Copy/paste script:
 
-Installation using virtual python environment
----------------------------------------------
+.. code::
 
-Create virtual environment::
+  git clone ssh://git@git.xfel.eu:10022/detectors/pycalibration.git
+  cd pycalibration
+  module load anaconda/3
+  pip install -r requirements.txt --user
+  pip install .  # `-e` flag for editable install, e.g. `pip install -e .`
+  export PATH=$HOME/.local/bin:$PATH
 
-    module load anaconda/3
-    python -m venv /path/to/new/virtual/environment
-    source /path/to/new/virtual/environment/bin/activate
 
-Clone from git::
+Creating an ipython kernel for virtual environments
+===================================================
 
-    cd /path/to/packages
-    git clone https://git.xfel.eu/gitlab/detectors/pycalibration.git
-    cd pycalibration
+To create an ipython kernel with pycalibration available you should (if using a
+venv) activate the virtual environment first, and then run:
 
-Install the package::
+.. code::
 
-    pip install -r requirements.txt
+  python3 -m pip install ipykernel  # If not using a venv add `--user` flag
+  python3 -m ipykernel install --user --name pycalibration --display-name "pycalibration"  # If not using a venv pick different name
 
-In additional install pyDetLib package, which is required for many notebooks::
+This can be useful for Jupyter notebook tools as https://max-jhub.desy.de/hub/login
 
-    cd /path/to/packages
-    git clone https://git.xfel.eu/gitlab/karaboDevices/pyDetLib.git
-    cd pyDetLib/lib
-    pip install -r requirements.txt
-    pip install .
 
-++++++++++++++++++++++++++++++++++++++++++++++++++
-Setting an ipython kernel for virtual environments
-++++++++++++++++++++++++++++++++++++++++++++++++++
+Contributing
+************
 
-To set a kernel for your virtual environment::
+Guidelines
+==========
 
-    source /path/to/new/virtual/environment/bin/activate
-    pip install ipykernel
-    python -m ipykernel install --user --name <virtenv-name> --display-name "virtenv-display-name"
+Development guidelines can be found on the GitLab Wiki page here: https://git.xfel.eu/gitlab/detectors/pycalibration/wikis/GitLab-Guidelines
 
-This can be useful for Jupyter notebook tools as "max-jhub.desy.de".
+Basics
+======
 
-Development Installation
+The installation instructions above assume that you have set up SSH keys for use
+with GitLab to allow for passwordless clones from GitLab, this way it's possible
+to run ``pip install git+ssh...`` commands and install packages directly from
+GitLab.
+
+To do this check the settings page here: https://git.xfel.eu/gitlab/profile/keys
+
+Pre-Commit Hooks
+================
+
+This repository uses pre-commit hooks automatically run some code quality and
+standard checks, this includes the following:
+
+a. ``identity`` - The 'identity' meta hook prints off a list of files that the hooks will execute on
+b. 'Standard' file checks
+
+   1. ``check-added-large-files`` - Ensures no large files are committed to repo
+   2. ``check-ast`` - Checks that the python AST is parseable
+   3. ``check-json`` - Checks json file formatting is parseable
+   4. ``check-yaml`` - Checks yaml file formatting is parseable
+   5. ``check-toml`` - Checks toml file formatting is parseable
+   6. ``rstcheck`` - Checks rst file formatting is parseable
+   7. ``end-of-file-fixer`` - Fixes EoF to be consistent
+   8. ``trailing-whitespace`` - Removes trailing whitespaces from lines
+   9. ``check-merge-conflict`` - Checks no merge conflicts remain in the commit
+   10. ``mixed-line-ending`` - Fixes mixed line endings
+
+c. Code checks
+
+   1. ``flake8`` - Code style checks
+   2. ``isort`` - Sorts imports in python files
+   3. ``check-docstring-first`` - Ensures docstrings are in the correct place
+
+d. Notebook checks
+
+   1. ``nbqa-flake8`` - Runs flake8 on notebook cells
+   2. ``nbqa-isort`` - Runs isort on notebook cells
+   3. ``nbstripoutput`` - Strips output from ipynb files
+
+To install these checks, set up you environment as mentioned above and then run
+the command:
+
+.. code::
+
+  pre-commit install-hooks
+
+This will set up the hooks in git locally, so that each time you run the command
+``git commit`` the hooks get executed on the **staged files only**, beware that
+if the pre-commit hooks find required changes some of them will **modify your
+files**, however they only modify the current working files, not the ones you
+have already staged. This means that you can look at the diff between your
+staged files and the ones that were modified to see what changes are suggested.
+
+
+Run Checks Only On Diffs
 ------------------------
 
-For a development installation, which automatically
-picks up (most) changes, first install the dependencies as above,
-but then install the tool-chain separately in development mode (install in home directory using --user, in case of using Anaconda/3)::
+Typically ``pre-commit`` is ran on ``--all-files`` within a CI, however as this
+is being set up on an existing codebase these checks will always fail with a
+substantial number of issues. Using some creative workarounds, the CI has been
+set up to only run on files which have changed between a PR and the target
+branch.
+
+If you want to run the pre-commit checks as they would run on the CI, then you
+can use the ``bin/pre-commit-diff.sh`` to execute the checks as on the CI
+pipeline.
+
+A side effect of this is that the checks will run on **all** of the differences
+between the 'local' and target branch. This means that if changes have recently
+been merged into the target branch, and there is divergence between the two,
+then the tests will run on all the differences.
 
-   pip install -e .
+If this happens and the hooks in the CI (or via the script) run on the wrong
+files then you should **rebase onto the target branch** to prevent the checks
+from running on the wrong files/diffs.
 
 
-Activate Offline calibration
-============================
+Skipping Checks
+---------------
 
-For using pycalibration package one needs to activate it through::
+If the checks are failing and you want to ignore them on purpose then you have two options:
 
-    source activate
+- use the ``--no-verify`` flag on your ``git commit`` command to skip them, e.g. ``git commit -m "Commit skipping hooks" --no-verify``
+- use the variable ``SKIP=hooks,to,skip`` before the git commit command to list hooks to skip, e.g. ``SKIP=flake8,isort git commit -m "Commit skipping only flake8 and isort hooks"``
 
-from inside of the pycalibration directory. This will automatically load 
-all needed modules and export the $PATH for the home directory.
+In the CI pipeline the pre-commit check stage has ``allow_failure: true`` set so
+that it is possible to ignore errors in the checks, and so that subsequent
+stages will still run even if the checks have failed. However there should be a
+good reason for allowing the checks to fail, e.g. checks failing due to
+unmodified sections of code being looked at.
 
 
 Python Scripted Calibration
-===========================
+***************************
 
-First: do not run this on the Maxwell gateway. Rather, `salloc`
-a node for yourself first::
+**Do not run this on the Maxwell gateway**. Rather, ``salloc`` a node for
+yourself first:
 
-   salloc -p exfel/upex -t 01:00:00
+.. code::
 
-where `-p` gives the partition to use: exfel or upex and `-t`
-the duration the node should be allocated. Then `ssh` onto 
-that node.
+  salloc -p exfel/upex -t 01:00:00
 
-(optionally) Set up the environment::
+where `-p` gives the partition to use: exfel or upex and `-t` the duration the
+node should be allocated. Then `ssh` onto that node.
 
-   module load python3
-   pip install --user ipython --upgrade
-   pip install --user ipyparallel --upgrade
-   pip install --user dill
-   
-If running headless (i.e. without X forwarding), be sure to set 
-`MPLBACKEND=Agg`, via::
+Then activate your environment as described above (or just continue if you are
+not using a venv).
 
-   export MPLBACKEND=Agg
+If running headless (i.e. without X forwarding), be sure to set
+``MPLBACKEND=Agg``, via:
 
-Then start an `ipcluster`. If you followed the steps above this can be done
-via::
+.. code::
 
-   ~/.local/bin/ipcluster start --n=32
+  export MPLBACKEND=Agg
 
+Then start an ``ipcluster``. If you followed the steps above this can be done
+via:
 
-Run the script::
+.. code::
+
+  ipcluster start --n=32
+
+
+Finally run the script:
+
+.. code::
 
     python3 calibrate.py --input /gpfs/exfel/exp/SPB/201701/p002012/raw/r0100 \
-       --output ../../test_out --mem-cells 30 --detector AGIPD --sequences 0,1   
+      --output ../../test_out --mem-cells 30 --detector AGIPD --sequences 0,1
 
 Here `--input` should point to a directory of `RAW` files for the detector you
-are calibrating. They will be output into the folder specified by `--output`, 
-which will have the run number or the last folder in the hiearchy of the input
-appended. Additionally, you need to specify the number of `--mem-cells` used
-for the run, as well as the `--detector`. Finally, you can optionally 
-specify to only process certain `--sequences` of files, matching the sequence
-numbers of the `RAW` input. These should be given as a comma-separated list.
-
-Finally, there is a `--no-relgain` option, which disables relative gain 
+are calibrating. They will be output into the folder specified by `--output`,
+which will have the run number or the last folder in the hierarchy of the input
+appended. Additionally, you need to specify the number of `--mem-cells` used for
+the run, as well as the `--detector`. Finally, you can optionally specify to
+only process certain `--sequences` of files, matching the sequence numbers of
+the `RAW` input. These should be given as a comma-separated list.
+
+Finally, there is a `--no-relgain` option, which disables relative gain
 correction. This can be useful while we still further characterize the detectors
-to provid accurate relative gain correction constants.
+to provide accurate relative gain correction constants.
 
 You'll get a series of plots in the output directory as well.
-
diff --git a/bin/pre-commit-diff.sh b/bin/pre-commit-diff.sh
new file mode 100755
index 0000000000000000000000000000000000000000..9219bf916408d3f195c8c7cb783b687881de9a52
--- /dev/null
+++ b/bin/pre-commit-diff.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+# We'd like to run the pre-commit hooks only on files that are being modified by
+# this merge request, however `CI_MERGE_REQUEST_TARGET_BRANCH_SHA` is a'premium'
+# feature according to GitLab... so this is a workaround for extracting the hash
+CI_MERGE_REQUEST_TARGET_BRANCH_NAME="${1:-master}"  # Set to master or 1st input
+CI_COMMIT_SHA=$(git rev-parse HEAD)
+export CI_MERGE_REQUEST_TARGET_BRANCH_SHA=$(git ls-remote origin $CI_MERGE_REQUEST_TARGET_BRANCH_NAME | cut -d$'\t' -f1)
+FILES=$(git diff $CI_COMMIT_SHA  $CI_MERGE_REQUEST_TARGET_BRANCH_SHA --name-only | tr '\n' ' ')
+echo "Running pre-commit on diff from  $CI_COMMIT_SHA to $CI_MERGE_REQUEST_TARGET_BRANCH_SHA ($CI_MERGE_REQUEST_TARGET_BRANCH_NAME)"
+#  Pass list of modified files to pre-commit so that it only checks them
+echo $FILES | xargs pre-commit run --color=always --files
diff --git a/cal_tools/cal_tools/agipdlib.py b/cal_tools/cal_tools/agipdlib.py
index 9b40845e024ba0655bb835fac0b1e31be02016d1..212e5c64eb6231f042baf157281fff6db76e220a 100644
--- a/cal_tools/cal_tools/agipdlib.py
+++ b/cal_tools/cal_tools/agipdlib.py
@@ -279,17 +279,17 @@ class AgipdCorrections:
             f = h5py.File(file_name, 'r')
             group = f[agipd_base]["image"]
 
-            (_, first_index, last_index, 
+            (_, first_index, last_index,
              _, valid_indices) = self.get_valid_image_idx(idx_base, f)
 
             allcells = np.squeeze(group['cellId'])
             allpulses = np.squeeze(group['pulseId'])
-         
+
             firange = self.gen_valid_range(first_index, last_index,
                                            self.max_cells, allcells,
                                            allpulses, valid_indices,
                                            apply_sel_pulses)
-                                           
+
             n_img = firange.shape[0]
             data_dict['nImg'][0] = n_img
             if np.all(np.diff(firange) == 1):
@@ -382,7 +382,7 @@ class AgipdCorrections:
         Both corrections are iterative and requires 4 iterations.
 
         Correction is performed in chunks of (e.g. 512 images).
-        A complete array of data from one file 
+        A complete array of data from one file
         (256 trains, 352 cells) will take
         256 * 352 * 128 * 512 * 4 // 1024**3 = 22 Gb in memory
 
@@ -484,7 +484,7 @@ class AgipdCorrections:
             self.shared_dict[i_proc]['t0_rgain'][first:last] = \
                 rawgain / t0[cellid, ...]
             self.shared_dict[i_proc]['raw_data'][first:last] = np.copy(data)
-    
+
         # Often most pixels are in high-gain, so it's more efficient to
         # set the whole output block to zero than select the right pixels.
         gain[:] = 0
@@ -514,7 +514,7 @@ class AgipdCorrections:
 
     def baseline_correction(self, i_proc:int, first:int, last:int):
         """
-        Perform image-wise base-line shift correction for 
+        Perform image-wise base-line shift correction for
         data in shared memory via histogram or stripe
 
         :param first: Index of the first image to be corrected
@@ -635,9 +635,9 @@ class AgipdCorrections:
         # not just set to 0
         if self.corr_bools.get('blc_set_min'):
             data[(data < 0) & (gain == 1)] = 0
-            
+
         # Do xray correction if requested
-        # The slopes we have in our constants are already relative 
+        # The slopes we have in our constants are already relative
         # slopeFF = slopeFFpix/avarege(slopeFFpix)
         # To apply them we have to / not *
         if self.corr_bools.get("xray_corr"):
@@ -746,7 +746,7 @@ class AgipdCorrections:
         :param i_proc: the index of sharedmem for a given file/module
         :return n_img: number of images to correct
         """
-        
+
         data_dict = self.shared_dict[i_proc]
         n_img = data_dict['nImg'][0]
 
@@ -814,12 +814,12 @@ class AgipdCorrections:
 
         return first_pulse, last_pulse, pulse_step
 
-    def choose_selected_pulses(self, allpulses: np.array, 
+    def choose_selected_pulses(self, allpulses: np.array,
                                can_calibrate: np.array) -> np.array:
 
         """
         Choose given selected pulse from pulseId array of
-        raw data. The selected pulses range is validated then 
+        raw data. The selected pulses range is validated then
         used to add a booleans in can_calibrate and guide the
         later appliance.
 
@@ -830,7 +830,7 @@ class AgipdCorrections:
                                selected pulses
         """
 
-        (first_pulse, last_pulse, 
+        (first_pulse, last_pulse,
         pulse_step) = self.validate_selected_pulses(allpulses)
 
         # collect the pulses to be calibrated
@@ -853,9 +853,9 @@ class AgipdCorrections:
         return can_calibrate
 
     def gen_valid_range(self, first_index: int, last_index: int,
-                        max_cells: int, allcells: np.array, allpulses: np.array, 
+                        max_cells: int, allcells: np.array, allpulses: np.array,
                         valid_indices: Optional[np.array] = None,
-                        apply_sel_pulses: Optional[bool] = True 
+                        apply_sel_pulses: Optional[bool] = True
                         ) -> np.array:
         """ Validate the arrays of image.cellId and image.pulseId
         to check presence of data and to avoid empty trains.
@@ -871,7 +871,7 @@ class AgipdCorrections:
         :param valid_indices: validated indices of image.data
         :param apply_sel_pulses: A flag for applying selected pulses
                                  after validation for correction
-        :return firange: An array of validated image.data 
+        :return firange: An array of validated image.data
                          indices to correct
         # TODO: Ignore rows (32 pulse) of empty pulses even if
         common-mode is selected
@@ -1067,7 +1067,7 @@ class AgipdCorrections:
             rel_low gain = _rel_medium gain * 4.48
 
         :param cons_data: A dictionary for each retrieved constant value.
-        :param when: A dictionary for the creation time 
+        :param when: A dictionary for the creation time
                      of each retrieved constant.
         :param module_idx: A module_idx index
         :return:
@@ -1089,7 +1089,7 @@ class AgipdCorrections:
                 bpixels |= cons_data["BadPixelsFF"].astype(np.uint32)[...,
                                                                       :bpixels.shape[2],  # noqa
                                                                       None]
-            
+
             if when["SlopesFF"]: # Checking if constant was retrieved
 
                 slopesFF = cons_data["SlopesFF"]
@@ -1150,16 +1150,16 @@ class AgipdCorrections:
                 pc_med_m = slopesPC[..., :self.max_cells, 3]
                 pc_med_l = slopesPC[..., :self.max_cells, 4]
 
-                # calculate median for slopes  
+                # calculate median for slopes
                 pc_high_med = np.nanmedian(pc_high_m, axis=(0,1))
                 pc_med_med = np.nanmedian(pc_med_m, axis=(0,1))
                 # calculate median for intercepts:
                 pc_high_l_med = np.nanmedian(pc_high_l, axis=(0,1))
                 pc_med_l_med = np.nanmedian(pc_med_l, axis=(0,1))
-                
-                # sanitize PC data 
+
+                # sanitize PC data
                 # (it should be done already on the level of constants)
-                # In the following loop, 
+                # In the following loop,
                 # replace `nan`s across memory cells with
                 # the median value calculated previously.
                 # Then, values outside of the valid range (0.8 and 1.2)
diff --git a/cal_tools/cal_tools/agipdutils.py b/cal_tools/cal_tools/agipdutils.py
index 9533a65717676bef660795007a090214121847a5..85bc81bb3d8e60e1104da6b9e792fd0aea464b3c 100644
--- a/cal_tools/cal_tools/agipdutils.py
+++ b/cal_tools/cal_tools/agipdutils.py
@@ -1,8 +1,9 @@
 import copy
+from typing import Tuple
 
 import numpy as np
 from cal_tools.enums import BadPixels, SnowResolution
-from scipy.signal import cwt, find_peaks_cwt, ricker
+from scipy.signal import cwt, ricker
 from sklearn.mixture import GaussianMixture
 from sklearn.preprocessing import StandardScaler
 
@@ -175,11 +176,11 @@ def baseline_correct_via_noise(d, noise, g, threshold):
     the shift corrected data is returned.
 
     """
-    
+
     seln = (g == 0) & (d <= 50)
     h, e = np.histogram(d[seln], bins=210, range=(-2000, 100))
     c = (e[1:] + e[:-1]) / 2
-    
+
     try:
         cwtmatr = cwt(h, ricker, [noise, 3. * noise, 5. * noise])
     except:
@@ -249,8 +250,10 @@ def correct_baseline_via_hist(d, pcm, g):
             return d, 0
         it += 1
 
-    def min_hist_distance(pc, bins=100, ran=(-10000, 10000), dec=20,
-                          minbin=10):
+    def min_hist_distance(pc: int,
+                          bins: int = 100,
+                          ran: Tuple[int, int] = (-10000, 10000),
+                          minbin: int = 10) -> float:
         hh, e = np.histogram(dd[g == 0] - pc, bins=bins, range=ran)
         hm, e = np.histogram((dd[g == 1] - pc) * pcm[g == 1], bins=bins,
                              range=ran)
diff --git a/cal_tools/cal_tools/enums.py b/cal_tools/cal_tools/enums.py
index a516dee2f55dbd8ece632c00f523d58a80b2f578..19420caeacf38580e808ee0da4a76c508e29e464 100644
--- a/cal_tools/cal_tools/enums.py
+++ b/cal_tools/cal_tools/enums.py
@@ -4,7 +4,7 @@ from enum import Enum
 class BadPixels(Enum):
     """ The European XFEL Bad Pixel Encoding
     """
-        
+
     OFFSET_OUT_OF_THRESHOLD  = 0b000000000000000000001 # bit 1
     NOISE_OUT_OF_THRESHOLD   = 0b000000000000000000010 # bit 2
     OFFSET_NOISE_EVAL_ERROR  = 0b000000000000000000100 # bit 3
@@ -26,12 +26,12 @@ class BadPixels(Enum):
     OVERSCAN                 = 0b001000000000000000000 # bit 19
     NON_SENSITIVE            = 0b010000000000000000000 # bit 20
     NON_LIN_RESPONSE_REGION  = 0b100000000000000000000 # bit 21
-    
-    
+
+
 class BadPixelsFF(Enum):
     """ The SLopesFF Bad Pixel Encoding
     """
-        
+
     FIT_FAILED               = 0b000000000000000000001 # bit 1
     CHI2_THRESHOLD           = 0b000000000000000000010 # bit 2
     NOISE_PEAK_THRESHOLD     = 0b000000000000000000100 # bit 3
@@ -41,11 +41,10 @@ class BadPixelsFF(Enum):
     BAD_DARK                 = 0b000000000000001000000 # bit 6
     NO_ENTRY                 = 0b000000000000010000000 # bit 7
     GAIN_DEVIATION           = 0b000000000000100000000 # bit 8
-    
-    
+
+
 class SnowResolution(Enum):
     """ An Enum specifying how to resolve snowy pixels
     """
     NONE = "none"
     INTERPOLATE = "interpolate"
-
diff --git a/cal_tools/cal_tools/lpdlib.py b/cal_tools/cal_tools/lpdlib.py
index ea78384d4736f866cbed7043aa78669659d6d843..716637a5261cbfddeb752758290644c0c1707c0c 100644
--- a/cal_tools/cal_tools/lpdlib.py
+++ b/cal_tools/cal_tools/lpdlib.py
@@ -68,7 +68,7 @@ class LpdCorrections:
             index section
         :param do_ff: perform flat field corrections
         :param correct_non_linear: perform non-linear transition region corr.
-        :param karabo_data_mode: set to true to use data iterated with karabo 
+        :param karabo_data_mode: set to true to use data iterated with karabo
             data
         """
         self.lpd_base = h5_data_path.format(channel)
@@ -261,19 +261,19 @@ class LpdCorrections:
 
         # correct offset
         im -= og
-        
+
         nlf = 0
         if self.mark_nonlin and self.linear_between is not None:
             for gl, lr in enumerate(self.linear_between):
-                
+
                 midx = (gain == gl) & ((im < lr[0]) | (im > lr[1]))
                 msk[midx] = BadPixels.NON_LIN_RESPONSE_REGION.value
                 numnonlin = np.count_nonzero(midx, axis=(1,2))
                 nlf += numnonlin
             nlf = nlf/float(im.shape[0] * im.shape[1])
-            
+
         # hacky way of smoothening transition region between med and low
-        
+
         cfac = 1
         if self.nlc_version == 1 and self.cnl:
             cfac = 0.314 * np.exp(-im * 0.001)
@@ -310,7 +310,7 @@ class LpdCorrections:
             cf = lin_exp_fun(x, cnl['m'], cnl['b'], cnl['A'], cnl['lam'],
                              cnl['c'])
             im[(gain == 2)] -= np.minimum(cf, 0.45) * x
-        
+
         # create bad pixels masks, here non-finite values
         bidx = ~np.isfinite(im)
         im[bidx] = 0
@@ -547,7 +547,7 @@ class LpdCorrections:
                                     dtype=np.uint16, fletcher32=True)
         self.outfile.create_dataset(lpdbase + "image/length", fsz,
                                     dtype=np.uint32, fletcher32=True)
-        
+
         if self.mark_nonlin:
             self.outfile.create_dataset(lpdbase + "image/nonLinear", fsz,
                                         dtype=np.float32, fletcher32=True)
@@ -590,9 +590,9 @@ class LpdCorrections:
               connect to
             * tcp://host:port_low#port_high to specify a port range from
               which a random port will be picked. E.g. specifying
-              
+
               tcp://max-exfl016:8015#8025
-              
+
               will randomly pick an address in the range max-exfl016:8015 and
               max-exfl016:8025.
 
diff --git a/cal_tools/cal_tools/metrology.py b/cal_tools/cal_tools/metrology.py
index 6c7e807d9051b4078412ccf5d767f24e1cd5d14e..6e94ad85c086a7506e88a4e6c8146b52a22c2092 100644
--- a/cal_tools/cal_tools/metrology.py
+++ b/cal_tools/cal_tools/metrology.py
@@ -7,28 +7,28 @@ from matplotlib import pylab as plt
 
 
 def getModulePosition(metrologyFile, moduleId):
-    """Position (in mm) of a module relative to the top left 
+    """Position (in mm) of a module relative to the top left
     corner of its quadrant. In case of tile-level positions,
-    the the position refers to the center of the top left 
+    the the position refers to the center of the top left
     pixel.
-    
+
     Args
     ----
-    
+
     metrologyFile : str
         Fully qualified path and filename of the metrology file
     moduleId : str
         Identifier of the module in question (e.g. 'Q1M2T03')
-        
+
     Returns
     -------
-    
-    ndarray: 
+
+    ndarray:
         (x, y)-Position of the module in its quadrant
-    
+
     Raises
     ------
-    
+
     ValueError: In case the moduleId contains invalid module
         identifieres
     """
@@ -38,11 +38,11 @@ def getModulePosition(metrologyFile, moduleId):
     #
     #   QXMYTZZ
     #
-    # where X, Y, and Z are digits. Q denotes the quadrant 
-    # (X = 1, ..., 4), M the supermodule (Y = 1, ..., 4) and T 
+    # where X, Y, and Z are digits. Q denotes the quadrant
+    # (X = 1, ..., 4), M the supermodule (Y = 1, ..., 4) and T
     # the tile (Z = 1, ..., 16; with leading zeros).
     modulePattern = re.compile(r'[QMT]\d+')
-    # Give the module identifier Q1M1T01, the moduleList splits this 
+    # Give the module identifier Q1M1T01, the moduleList splits this
     # into the associated quadrant, supermodule, and tile identifiers:
     # >>> print(moduleList)
     # ['Q1', 'M1', 'T01']
@@ -53,7 +53,7 @@ def getModulePosition(metrologyFile, moduleId):
     # >>> print(h5Keys)
     # ['Q1', 'Q1/M1', 'Q1/M1/T01']
     h5Keys = ['/'.join(moduleList[:idx+1]) for idx in range(len(moduleList))]
-    
+
     # Every module of the detector gives its position relative to
     # the top left corner of its parent structure. Every position
     # is stored in the positions array
@@ -83,17 +83,17 @@ def getModulePosition(metrologyFile, moduleId):
 def translateToModuleBL(tilePositions):
     """Tile coordinates within a supermodule with the
     origin in the bottom left corner.
-    
+
     Parameters
     ----------
-    
+
     tilePositions : ndarray
-        Tile positions as retrieved from the LPD metrology 
+        Tile positions as retrieved from the LPD metrology
         file. Must have shape (16, 2)
-        
+
     Returns
     -------
-    
+
     ndarray
         Tile positions relative to the bottom left corner.
     """
@@ -115,7 +115,7 @@ def translateToModuleBL(tilePositions):
     # In the clockwise order of LPD tiles, the 8th
     # tile in the list is the bottom left tile
     bottomLeft8th = np.asarray([0., moduleCoords[8][1]])
-    # Translate coordinates to the bottom left corner 
+    # Translate coordinates to the bottom left corner
     # of the bottom left tile
     bottomLeft = moduleCoords - bottomLeft8th
     return bottomLeft
@@ -124,44 +124,44 @@ def translateToModuleBL(tilePositions):
 def plotSupermoduleData(tileData, metrologyPositions, zoom=1., vmin=100., vmax=6000.):
     """Plots data of a supermodule with tile positions
     determined by the metrology data.
-    
+
     Parameters
     ----------
-    
+
     tileData : ndarray
-        Supermodule image data separated in individual tiles. 
+        Supermodule image data separated in individual tiles.
         Must have shape (16, 32, 128).
-        
-    metrologyPositions : ndarray 
-        Tile positions as retrieved from the metrology file. 
+
+    metrologyPositions : ndarray
+        Tile positions as retrieved from the metrology file.
         Must have shape (16, 2)
-        
+
     zoom : float, optional
         Can enlarge or decrease the size of the plot. Default = 1.
-        
+
     vmin, vmax : float, optional
         Value range. Default vmin=100., vmax=6000.
-        
+
     Returns
     -------
     matplotlib.Figure
-        Figure object containing the supermodule image        
+        Figure object containing the supermodule image
     """
     # Data needs to have 16 tiles, each with
     # 32x128 pixels
     assert tileData.shape == (16, 32, 128)
-    
+
     # Conversion coefficient, required since
     # matplotlib does its business in inches
     mmToInch = 1./25.4 # inch/mm
-    
+
     # Some constants
     numberOfTiles = 16
     numberOfRows = 8
     numberOfCols = 2
     tileWidth = 65.7 # in mm
     tileHeight = 17.7 # in mm
-    
+
     # Base width and height are given by spatial
     # extend of the modules. The constants 3.4 and 1
     # are estimated as a best guess for gaps between
@@ -169,26 +169,26 @@ def plotSupermoduleData(tileData, metrologyPositions, zoom=1., vmin=100., vmax=6
     figureWidth = zoom * numberOfCols*(tileWidth + 3.4)*mmToInch
     figureHeight = zoom * numberOfRows*(tileHeight + 1.)*mmToInch
     fig = plt.figure(figsize=(figureWidth, figureHeight))
-    
-    # The metrology file references module positions 
+
+    # The metrology file references module positions
     bottomRightCornerCoordinates = translateToModuleBL(metrologyPositions)
-    
+
     # The offset here accounts for the fact that there
     # might be negative x,y values
     offset = np.asarray(
-        [min(bottomRightCornerCoordinates[:, 0]), 
+        [min(bottomRightCornerCoordinates[:, 0]),
          min(bottomRightCornerCoordinates[:, 1])]
     )
-    
+
     # Account for blank borders in the plot
     borderLeft = 0.5 * mmToInch
     borderBottom = 0.5 * mmToInch
-    
+
     # The height and width of the plot remain
     # constant for a given supermodule
     width = zoom * 65.7 * mmToInch / (figureWidth - 2.*borderLeft)
     height = zoom * 17.7 * mmToInch / (figureHeight - 2.*borderBottom)
-    
+
     for i in range(numberOfTiles):
         # This is the top left corner of the tile with
         # respect to the top left corner of the supermodule
@@ -200,38 +200,38 @@ def plotSupermoduleData(tileData, metrologyPositions, zoom=1., vmin=100., vmax=6
         ax = fig.add_axes((ax0, ay0, width, height), frameon=False)
         # Do not display axes, tick markers or labels
         ax.tick_params(
-            axis='both', left='off', top='off', right='off', bottom='off', 
+            axis='both', left='off', top='off', right='off', bottom='off',
             labelleft='off', labeltop='off', labelright='off', labelbottom='off'
         )
         # Plot the image. If one wanted to have a colorbar
         # the img object would be needed to produce one
         img = ax.imshow(
-            tileData[i], 
-            interpolation='nearest', 
+            tileData[i],
+            interpolation='nearest',
             vmin=vmin, vmax=vmax
         )
-        
+
     return fig
 
 
 def splitChannelDataIntoTiles(channelData, clockwiseOrder=False):
     """Splits the raw channel data into indiviual tiles
-    
+
     Args
     ----
-    
+
     channelData : ndarray
         Raw channel data. Must have shape (256, 256)
-        
+
     clockwiseOrder : bool, optional
         If set to True, the sequence of tiles is given
         in the clockwise order starting with the top
         right tile (LPD standard). If set to false, tile
         data is returned in reading order
-        
+
     Returns
     -------
-    
+
     ndarray
         Same data, but reshaped into (12, 32, 128)
     """
@@ -240,8 +240,8 @@ def splitChannelDataIntoTiles(channelData, clockwiseOrder=False):
     orderedTiles = tiles.reshape(16, 32, 128)
     if clockwiseOrder:
         # Naturally, the tile data after splitting is in reading
-        # order (i.e. top left tile is first, top right tile is second, 
-        # etc.). The official LPD tile order however is clockwise, 
+        # order (i.e. top left tile is first, top right tile is second,
+        # etc.). The official LPD tile order however is clockwise,
         # starting with the top right tile. The following array
         # contains indices of tiles in reading order as they would
         # be iterated in clockwise order (starting from the top right)
@@ -253,22 +253,22 @@ def splitChannelDataIntoTiles(channelData, clockwiseOrder=False):
 
 def splitChannelDataIntoTiles2(channelData, clockwiseOrder=False):
     """Splits the raw channel data into indiviual tiles
-    
+
     Args
     ----
-    
+
     channelData : ndarray
         Raw channel data. Must have shape (256, 256)
-        
+
     clockwiseOrder : bool, optional
         If set to True, the sequence of tiles is given
         in the clockwise order starting with the top
         right tile (LPD standard). If set to false, tile
         data is returned in reading order
-        
+
     Returns
     -------
-    
+
     ndarray
         Same data, but reshaped into (12, 32, 128)
     """
@@ -277,8 +277,8 @@ def splitChannelDataIntoTiles2(channelData, clockwiseOrder=False):
     orderedTiles = np.moveaxis(tiles.reshape(16, 128, 32, channelData.shape[2]), 2, 1)
     if clockwiseOrder:
         # Naturally, the tile data after splitting is in reading
-        # order (i.e. top left tile is first, top right tile is second, 
-        # etc.). The official LPD tile order however is clockwise, 
+        # order (i.e. top left tile is first, top right tile is second,
+        # etc.). The official LPD tile order however is clockwise,
         # starting with the top right tile. The following array
         # contains indices of tiles in reading order as they would
         # be iterated in clockwise order (starting from the top right)
@@ -294,7 +294,7 @@ def returnPositioned2(geometry_file, modules, dquads):
     tile_order = [1, 2, 3, 4]
     cells = 0
     for sm, mn in modules:
-        
+
         position = np.asarray([getModulePosition(geometry_file,
                                                  'Q{}/M{:d}/T{:02d}'.format(
                                                      sm//4+1,
@@ -355,7 +355,7 @@ def positionFileList(filelist, datapath, geometry_file, quad_pos, nImages='all',
     all_intersected = None
     for file in files:
         ch = int(re.findall(r'.*-{}([0-9]+)-.*'.format(detector), file)[0])
-        
+
         try:
             with h5py.File(file, 'r') as f:
                 if trainIds is None:
@@ -369,18 +369,18 @@ def positionFileList(filelist, datapath, geometry_file, quad_pos, nImages='all',
                     counts = np.squeeze(f[cpath])
                     nzeros = counts != 0
                     tid = tid[nzeros]
-                    
+
                     intersection = np.intersect1d(tid, trainIds, assume_unique=True)
-                    
+
                     if intersection.size == 0:
                         continue
-                        
+
                     if all_intersected is None:
                         all_intersected = intersection
                     else:
                         all_intersected = np.intersect1d(all_intersected, intersection, assume_unique=True)
                     continue
-                    
+
                 if ch not in data:
                     data[ch] = np.moveaxis(np.moveaxis(d, 0, 2), 1, 0)
                 else:
@@ -388,7 +388,7 @@ def positionFileList(filelist, datapath, geometry_file, quad_pos, nImages='all',
         except Exception as e:
             print(file)
             print(e)
-            
+
     pcounts = None
     if trainIds is not None:
         for file in files:
@@ -396,7 +396,7 @@ def positionFileList(filelist, datapath, geometry_file, quad_pos, nImages='all',
 
             try:
                 with h5py.File(file, 'r') as f:
-                    
+
 
                     tid = np.squeeze(f["/INDEX/trainId"])
                     spath = datapath.replace("INSTRUMENT", "INDEX").format(ch).split("/")[:-1]
@@ -408,26 +408,26 @@ def positionFileList(filelist, datapath, geometry_file, quad_pos, nImages='all',
                     tid = tid[nzeros]
 
                     tid_to_use = np.in1d(tid, all_intersected)
-                    
-                    indices = []                                        
+
+                    indices = []
                     firsts = f[fpath][nzeros][tid_to_use]
                     counts = f[cpath][nzeros][tid_to_use]
-                    
+
                     if pcounts is None:
                         pcounts = counts
                     df = firsts[1]-firsts[0]
-                    
-                    for i in range(firsts.shape[0]):                        
+
+                    for i in range(firsts.shape[0]):
                         count = counts[i] if max_counts is None else max_counts
                         first = firsts[i]//df*count if not nwa else firsts[i]
-                        
+
                         indices += list(np.arange(first, first+count))
-                    
+
                     if len(indices) == 0:
-                        continue                        
+                        continue
                     indices = np.unique(np.sort(np.array(indices).astype(np.int)))
                     indices = indices[indices < f[datapath.format(ch)].shape[0]]
-                    
+
                     #if all contingous just use the range
                     #if np.allclose(indices[1:]-indices[:-1], 1):
                     d = np.squeeze(f[datapath.format(ch)][indices,:,:])
@@ -438,11 +438,11 @@ def positionFileList(filelist, datapath, geometry_file, quad_pos, nImages='all',
                     else:
                         data[ch] = np.concatenate(data[ch], np.moveaxis(np.moveaxis(d, 0, 2), 1, 0), axis=2)
             except Exception as e:
-                print(e)                       
-            
+                print(e)
+
     full_data = []
     dummy = next(iter(data.values()))
-    
+
     for i in range(16):
         if i in data:
             full_data.append((i, data[i]))
@@ -453,7 +453,7 @@ def positionFileList(filelist, datapath, geometry_file, quad_pos, nImages='all',
         return np.moveaxis(pos, 2, 0)
     else:
         return np.moveaxis(pos, 2, 0), all_intersected, pcounts
-    
+
 def matchedFileList(filelist, datapath, nImages='all', trainIds=None, nwa=False, max_counts=None):
     import glob
     detector = "LPD" if "LPD" in datapath else "AGIPD"
@@ -462,7 +462,7 @@ def matchedFileList(filelist, datapath, nImages='all', trainIds=None, nwa=False,
     all_intersected = None
     for file in files:
         ch = int(re.findall(r'.*-{}([0-9]+)-.*'.format(detector), file)[0])
-        
+
         try:
             with h5py.File(file, 'r') as f:
                 if trainIds is None:
@@ -476,18 +476,18 @@ def matchedFileList(filelist, datapath, nImages='all', trainIds=None, nwa=False,
                     counts = np.squeeze(f[cpath])
                     nzeros = counts != 0
                     tid = tid[nzeros]
-                    
+
                     intersection = np.intersect1d(tid, trainIds, assume_unique=True)
-                    
+
                     if intersection.size == 0:
                         continue
-                        
+
                     if all_intersected is None:
                         all_intersected = intersection
                     else:
                         all_intersected = np.intersect1d(all_intersected, intersection, assume_unique=True)
                     continue
-                    
+
                 if ch not in data:
                     data[ch] = np.moveaxis(np.moveaxis(d, 0, 2), 1, 0)
                 else:
@@ -495,7 +495,7 @@ def matchedFileList(filelist, datapath, nImages='all', trainIds=None, nwa=False,
         except Exception as e:
             print(file)
             print(e)
-            
+
     pcounts = None
     if trainIds is not None:
         for file in files:
@@ -503,7 +503,7 @@ def matchedFileList(filelist, datapath, nImages='all', trainIds=None, nwa=False,
 
             try:
                 with h5py.File(file, 'r') as f:
-                    
+
 
                     tid = np.squeeze(f["/INDEX/trainId"])
                     spath = datapath.replace("INSTRUMENT", "INDEX").format(ch).split("/")[:-1]
@@ -515,26 +515,26 @@ def matchedFileList(filelist, datapath, nImages='all', trainIds=None, nwa=False,
                     tid = tid[nzeros]
 
                     tid_to_use = np.in1d(tid, all_intersected)
-                    
-                    indices = []                                        
+
+                    indices = []
                     firsts = f[fpath][nzeros][tid_to_use]
                     counts = f[cpath][nzeros][tid_to_use]
-                    
+
                     if pcounts is None:
                         pcounts = counts
                     df = firsts[1]-firsts[0]
-                    
-                    for i in range(firsts.shape[0]):                        
+
+                    for i in range(firsts.shape[0]):
                         count = counts[i] if max_counts is None else max_counts
                         first = firsts[i]//df*count if not nwa else firsts[i]
-                        
+
                         indices += list(np.arange(first, first+count))
-                    
+
                     if len(indices) == 0:
-                        continue                        
+                        continue
                     indices = np.unique(np.sort(np.array(indices).astype(np.int)))
                     indices = indices[indices < f[datapath.format(ch)].shape[0]]
-                    
+
                     #if all contingous just use the range
                     #if np.allclose(indices[1:]-indices[:-1], 1):
                     d = np.squeeze(f[datapath.format(ch)][indices,:,:])
@@ -545,11 +545,11 @@ def matchedFileList(filelist, datapath, nImages='all', trainIds=None, nwa=False,
                     else:
                         data[ch] = np.concatenate(data[ch], np.moveaxis(np.moveaxis(d, 0, 2), 1, 0), axis=2)
             except Exception as e:
-                print(e)                       
-            
+                print(e)
+
     full_data = []
     dummy = next(iter(data.values()))
-    
+
     for i in range(16):
         if i in data:
             full_data.append((i, data[i]))
@@ -559,4 +559,4 @@ def matchedFileList(filelist, datapath, nImages='all', trainIds=None, nwa=False,
     if trainIds is None:
         return pos
     else:
-        return pos, all_intersected, pcounts
\ No newline at end of file
+        return pos, all_intersected, pcounts
diff --git a/cal_tools/cal_tools/plotting.py b/cal_tools/cal_tools/plotting.py
index 7eff8f3ed4be174a5c384ba791ffb6a33a14c7e6..d85cbd1716a887edcd37f7058915f25538d87f70 100644
--- a/cal_tools/cal_tools/plotting.py
+++ b/cal_tools/cal_tools/plotting.py
@@ -47,7 +47,7 @@ def show_overview(d, cell_to_preview, gain_to_preview, out_folder=None, infix=No
             else:
                 med = np.nanmedian(item[..., cell_to_preview])
             medscale = med
-            if med == 0:           
+            if med == 0:
                 medscale = 0.1
 
             bound = 0.2
diff --git a/cal_tools/cal_tools/pnccdlib.py b/cal_tools/cal_tools/pnccdlib.py
index 3d913a72a49dd6e436f924a648d8033415051784..ec8b92e7f65dfb21a09fa099df3a749d55833fc7 100644
--- a/cal_tools/cal_tools/pnccdlib.py
+++ b/cal_tools/cal_tools/pnccdlib.py
@@ -33,10 +33,10 @@ def extract_slow_data(karabo_id: str, karabo_da_control: str,
                 bias_voltage = abs(f[os.path.join(mdl_ctrl_path,
                                                   "DAQ_MPOD/u0voltage/value")][0])  # noqa
             if gain == 0.1:
-                gain = f[os.path.join(mdl_ctrl_path, 
+                gain = f[os.path.join(mdl_ctrl_path,
                                       "DAQ_GAIN/pNCCDGain/value")][0]
             if fix_temperature_top == 0.:
-                fix_temperature_top = f[os.path.join(ctrl_path, 
+                fix_temperature_top = f[os.path.join(ctrl_path,
                                                      "inputA/krdg/value")][0]
             if fix_temperature_bot == 0.:
                 fix_temperature_bot = f[os.path.join(ctrl_path,
@@ -53,5 +53,5 @@ def extract_slow_data(karabo_id: str, karabo_da_control: str,
               os.path.join(ctrl_path, "inputA/krdg/value"))
         print("fix_temperature_bot control h5path:",
               os.path.join(ctrl_path, "inputB/krdg/value"))
-    
-    return bias_voltage, gain, fix_temperature_top, fix_temperature_bot
\ No newline at end of file
+
+    return bias_voltage, gain, fix_temperature_top, fix_temperature_bot
diff --git a/cal_tools/cal_tools/tools.py b/cal_tools/cal_tools/tools.py
index 1b5707a3767f7934e92b218d591a214f70203a45..d4015a558d56e6ea3a2daa5d24ba9fb104378b70 100644
--- a/cal_tools/cal_tools/tools.py
+++ b/cal_tools/cal_tools/tools.py
@@ -275,7 +275,8 @@ def get_dir_creation_date(directory: Union[str, Path], run: int,
             rfile = sorted(rfiles, key=path.getmtime)[0]
             with h5py.File(rfile, 'r') as fin:
                 cdate = fin['METADATA/creationDate'][0].decode()
-                cdate = datetime.datetime.strptime(cdate,
+                cdate = datetime.datetime.strptime(
+                    cdate,
                     "%Y%m%dT%H%M%SZ").replace(tzinfo=datetime.timezone.utc)
             return cdate
         except (IndexError, IOError, ValueError):
diff --git a/cal_tools/cython/agipdalgs.pyx b/cal_tools/cython/agipdalgs.pyx
index e5457aef11a29a81ced4538e00365cf99096c88f..cacc0f549bbce04b4d25b74874488b0ce3cf6f3e 100644
--- a/cal_tools/cython/agipdalgs.pyx
+++ b/cal_tools/cython/agipdalgs.pyx
@@ -15,10 +15,10 @@ def histogram(cnp.ndarray[cnp.float32_t, ndim=2] data, range=(0,1), int bins=20,
     """
 
     cdef cnp.ndarray[cnp.float32_t, ndim=2] ret
-    cdef double min, max 
-    min = range[0] 
-    max = range[1] 
-         
+    cdef double min, max
+    min = range[0]
+    max = range[1]
+
     ret = np.zeros((bins,data.shape[1]), dtype=np.float32)
     cdef double bin_width = (max - min) / bins
     cdef double x
@@ -31,9 +31,9 @@ def histogram(cnp.ndarray[cnp.float32_t, ndim=2] data, range=(0,1), int bins=20,
         for i in xrange(data.shape[0]):
             x = (data[i,j] - min) / bin_width
             if 0.0 <= x < bins:
-                if weights is None: 
+                if weights is None:
                     ret[<int>x,j] += 1.0
-                else: 
+                else:
                     ret[<int>x,j] += weights[i,j]
     return ret, np.linspace(min, max, bins+1)
 
@@ -83,16 +83,16 @@ def gain_choose(cnp.ndarray[cnp.uint8_t, ndim=3] a, cnp.ndarray[cnp.float32_t, n
     cdef cnp.uint8_t v
     cdef cnp.ndarray[cnp.float32_t, ndim=3] out
     out = np.zeros_like(a, dtype=np.float32)
-    
+
     assert (<object>choices).shape == (3,) + (<object>a).shape
-    
+
     with nogil:
         for i in range(a.shape[0]):
             for j in range(a.shape[1]):
                 for k in range(a.shape[2]):
                     v = a[i, j, k]
                     out[i, j, k] = choices[v, i, j, k]
-    
+
     return out
 
 
@@ -104,16 +104,16 @@ def gain_choose_int(cnp.ndarray[cnp.uint8_t, ndim=3] a, cnp.ndarray[cnp.int32_t,
     cdef cnp.uint8_t v
     cdef cnp.ndarray[cnp.int32_t, ndim=3] out
     out = np.zeros_like(a, dtype=np.int32)
-    
+
     assert (<object>choices).shape == (3,) + (<object>a).shape
-    
+
     with nogil:
         for i in range(a.shape[0]):
             for j in range(a.shape[1]):
                 for k in range(a.shape[2]):
                     v = a[i, j, k]
                     out[i, j, k] = choices[v, i, j, k]
-    
+
     return out
 
 
@@ -130,12 +130,12 @@ def sum_and_count_in_range_asic(cnp.ndarray[float, ndim=4] arr, float lower, flo
     cdef float value
     cdef cnp.ndarray[unsigned long long, ndim=2] count
     cdef cnp.ndarray[double, ndim=2] sum_
-    
+
     # Drop axes -2 & -1 (pixel dimensions within each ASIC)
     out_shape = arr[:, :, 0, 0].shape
     count = np.zeros(out_shape, dtype=np.uint64)
     sum_ = np.zeros(out_shape, dtype=np.float64)
-    
+
     with nogil:
         for i in range(arr.shape[0]):
             for k in range(arr.shape[1]):
@@ -161,13 +161,13 @@ def sum_and_count_in_range_cell(cnp.ndarray[float, ndim=4] arr, float lower, flo
     cdef float value
     cdef cnp.ndarray[unsigned long long, ndim=2] count
     cdef cnp.ndarray[double, ndim=2] sum_
-    
+
     # Drop axes 0 & 1
     out_shape = arr[0, 0, :, :].shape
     count = np.zeros(out_shape, dtype=np.uint64)
     sum_ = np.zeros(out_shape, dtype=np.float64)
-    
-    
+
+
     with nogil:
         for i in range(arr.shape[0]):
             for k in range(arr.shape[1]):
diff --git a/docs/source/advanced.rst b/docs/source/advanced.rst
index 9f623ab8cc46ef589babef4d044c72341c43dc4c..07bfb1f9e846a36ee2271f4528a2b457381a0969 100644
--- a/docs/source/advanced.rst
+++ b/docs/source/advanced.rst
@@ -42,35 +42,35 @@ This can be useful to add user requests while running. For this:
 
 1. create a working copy of the notebook in question, and create a commit of the the
    production notebook to fall back to in case of problems::
-   
+
    git add production_notebook_NBC.py
    git commit -m "Known working version before edits"
    cp production_notebook_NBC.py production_notebook_TEST.py
-   
+
 2. add any feature there and *thouroughly* test them
 3. when you are happy with the results, copy them over into the production notebook and
    save.
- 
+
 .. warning::
 
     Live editing of correction notebooks is fully at your responsiblity. Do not do it
     if you are not 100% sure you know what you are doing.
-    
-4. If it fails, revert back to the original state, ideally via git:: 
 
-       git checkout HEAD -- production_notebook_NBC.py 
+4. If it fails, revert back to the original state, ideally via git::
+
+       git checkout HEAD -- production_notebook_NBC.py
 
 5. Any runs which did not correct do to failures of the live edit can then be relaunched
    manually, assuming the correction notebook allows run and overwrite paramters::
-   
+
        xfel-calibrate ...... --run XYZ,ZXY-YYS --overwrite
-  
-  
+
+
 Using a Parameter Generator Function
 ------------------------------------
 
 By default, the parameters to be exposed to the command line are deduced from the
-first code cell of the notebook, after resolving the notebook itself from the 
+first code cell of the notebook, after resolving the notebook itself from the
 detector and characterization type. For some applications it might be beneficial
 to define a context-specific parameter range within the same notebook, based on
 additional user input. This can be done via a parameter generation function which
@@ -82,7 +82,7 @@ is defined in one of the code cell::
         existing = set()
         def extract_parms(cls):
             args, varargs, varkw, defaults = inspect.getargspec(cls.__init__)
-            pList = []        
+            pList = []
             for i, arg in enumerate(args[1:][::-1]):
                 if arg in existing:
                     continue
@@ -90,7 +90,7 @@ is defined in one of the code cell::
                 existing.add(arg)
 
                 if i < len(defaults):
-                    default = defaults[::-1][i]                
+                    default = defaults[::-1][i]
                     if str(default).isdigit():
                         pList.append("{} = {}".format(arg, default))
                     elif default is None or default == "None":
@@ -108,21 +108,21 @@ is defined in one of the code cell::
                 parms = extract_parms(getattr(condition, dtype))
                 [all_conditions.add(p) for p in parms]
         return "\n".join(all_conditions)
-        
+
 
 .. note::
 
    Note how all imports are inlined, as the function is executed outside the
    notebook context.
-       
+
 In the example, the function generates a list of additional parameters depending
 on the `detector_instance` given. Here, `detector_instance` is defined in the first
-code cell the usual way. Any other parameters defined such, that have names matching 
+code cell the usual way. Any other parameters defined such, that have names matching
 those of the generator function signature are passed to this function. The function
 should then return a string containing additional code to be appended to the first
 code cell.
 
-To make use of this functionality, the parameter generator function needs to be 
+To make use of this functionality, the parameter generator function needs to be
 configured in `notebooks.py`, e.g. ::
 
     ...
@@ -136,7 +136,7 @@ configured in `notebooks.py`, e.g. ::
         },
     }
     ...
-       
+
 To generically query which parameters are defined in the first code cell, the
 code execution history feature of iPython can be used::
 
@@ -156,6 +156,6 @@ code execution history feature of iPython can be used::
             parms[n] = str(v) if not isinstance(v, str) else v
         if parms[n] == "None" or parms[n] == "'None'":
             parms[n] = None
-                      
+
 This will create a dictionary `parms` which contains all parameters either
-as `float` or `str` values.
\ No newline at end of file
+as `float` or `str` values.
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 23c008eba4f6ed9c3cf63b4d03b4cb01668b2c73..693a632c479a8138ec0597535dd386458686d595 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -385,7 +385,7 @@ from nbconvert import RSTExporter
 from xfel_calibrate import notebooks
 
 rst_exporter = RSTExporter()
-    
+
 with open("available_notebooks.rst", "w") as f:
     f.write(dedent("""
             .. _available_notebooks:
@@ -395,8 +395,8 @@ with open("available_notebooks.rst", "w") as f:
 
             The following notebooks are currently integrated into the European XFEL
             Offline Calibration tool chain.
-            
-            
+
+
             """))
 
     for detector in sorted(notebooks.notebooks.keys()):
@@ -404,10 +404,10 @@ with open("available_notebooks.rst", "w") as f:
         f.write("{}\n".format(detector))
         f.write("{}\n".format("-"*len(detector)))
         f.write("\n")
-        
+
         for caltype in sorted(values.keys()):
             data = values[caltype]
-            
+
             nbpath = os.path.abspath("{}/../../../{}".format(__file__, data["notebook"]))
             with open(nbpath, "r") as nf:
                 nb = nbformat.read(nf, as_version=4)
@@ -419,16 +419,16 @@ with open("available_notebooks.rst", "w") as f:
                 nb.cells = [mdcell]  # we only want this single cell
                 body, _ = rst_exporter.from_notebook_node(nb)
                 adjusted = []
-                # adjust titles 
+                # adjust titles
                 for line in body.split("\n"):
                     if line.startswith("=="):
                         line = line.replace("=", "+")
                     if line.startswith("--"):
                         line = line.replace("-", "~")
                     adjusted.append(line)
-                f.write("\n".join(adjusted))                
+                f.write("\n".join(adjusted))
                 f.write("\n")
-            
+
             f.write("To invoke this notebook and display help use:\n\n")
             f.write(".. code-block:: bash\n\n")
             f.write("    xfel-calibrate {} {} --help\n\n".format(detector, caltype))
@@ -461,18 +461,18 @@ def xml_to_rst_report(xml, git_tag, reports=[]):
     rst[-1] = rst[-1].format(test_name=test_name, ex_date=ex_date)
     rst += ["="*len(rst[-1])]
     rst += [""]
-    
+
     num_tests = e.get("tests")
     num_err = int(e.get("errors"))
     num_fail = int(e.get("failures"))
     num_skip = int(e.get("skipped"))
-    
+
     # create a summary header
     if num_err + num_fail == 0:
         rst += [":header-passed:`âœ“`"]
     else:
         rst += [":header-failed:`âŒ`"]
-        
+
     if num_skip > 0:
         rst[-1] += ":header-skipped:`âš `"
     rst += [""]
@@ -487,12 +487,12 @@ def xml_to_rst_report(xml, git_tag, reports=[]):
     for rname, rpath in reports:
         rst += [":Report: `{} <{}>`_".format(rname, rpath)]
     rst += [""]
-    
+
     # now the details
     rst += ["Detailed Results"]
     rst += ["-"*len(rst[-1])]
     rst += [""]
-    
+
     detailed_failures = []
     rows = []
     for child in e:
@@ -515,12 +515,12 @@ def xml_to_rst_report(xml, git_tag, reports=[]):
         msg = "\n".join(textwrap.wrap(msg, 20))
         row = [status, name, etype, msg, extime ]
         rows.append(row)
-    
+
     header = ["Result", "Test", "Error", "Message", "Duration (s)"]
     tblrst =  tabulate.tabulate(rows, headers=header, tablefmt="rst")
     rst += tblrst.split("\n")
     rst += [""]
-    
+
     for test, report in detailed_failures:
         rst += ["Failure report for: {}".format(test)]
         rst += ["~"*len(rst[-1])]
@@ -528,15 +528,15 @@ def xml_to_rst_report(xml, git_tag, reports=[]):
         rst += [".. code-block:: python"]
         rst += textwrap.indent(report, " "*4).split("\n")
         rst += [""]
-    
+
     do_console = False
     for child in e:
         if child.tag == "system-out" and len(child.text.strip()):
             do_console = True
             break
-    
+
     if do_console:
-    
+
         # console output
         rst += ["Console Output"]
         rst += ["-"*len(rst[-1])]
@@ -549,7 +549,7 @@ def xml_to_rst_report(xml, git_tag, reports=[]):
             rst += [".. code-block:: console"]
             rst += textwrap.indent(child.text, " "*4).split("\n")
 
-        
+
     return "\n".join(rst)
 
 def sorted_dir(folder):
@@ -570,12 +570,12 @@ Contents:
 
 .. toctree::
    :maxdepth: 2
-   
+
 
 """
 if not os.path.exists("./test_rsts"):
     os.makedirs("./test_rsts")
-    
+
 with open("test_results.rst", "w") as f:
     f.write(header)
     for commit, modtime in sorted_dir(test_artefact_dir):
@@ -586,7 +586,7 @@ with open("test_results.rst", "w") as f:
             rst += ["+"*len(rst[-1])]
             rst += [""]
             fr.write("\n".join(rst))
-            
+
             # copy reports
             pdfs = glob.glob("{}/{}/*/*.pdf".format(test_artefact_dir, commit))
             if not os.path.exists("./_static/reports/{}".format(commit)):
@@ -600,7 +600,7 @@ with open("test_results.rst", "w") as f:
                 rname = os.path.basename(pdf).split(".")[0]
                 rlist.append((rname, "../_static/reports/{}".format(ppath)))
                 reports[rloc] = rlist
-            
+
             xmls = glob.glob("{}/{}/*/TEST*.xml".format(test_artefact_dir, commit))
             for xml in xmls:
                 rloc = xml.split("/")[-2]
diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst
index 8db9435689f61adef714f01daa23725a1204be7e..241070cbd51c9e0eab655e9f8920c5878c1db17c 100644
--- a/docs/source/configuration.rst
+++ b/docs/source/configuration.rst
@@ -28,7 +28,7 @@ python file of the form::
 
     # the command to run this concurrently. It is prepended to the actual call
     launcher_command = "sbatch -p exfel -t 24:00:00 --mem 500G --mail-type END --requeue --output {temp_path}/slurm-%j.out"
-    
+
 A comment is given for the meaning of each configuration parameter.
 
 
@@ -62,11 +62,11 @@ The configuration is to be given in form of a python directory::
              ...
          }
      }
-     
+
 The first key is the detector that the calibration may be used for, here AGIPD. The second
 key level gives the name of the task being performed (here: DARK and PC). For each of these
 entries, a path to the notebook and a concurrency hint should be given. In the concurrency
-hint the first entry specifies which parameter of the notebook expects a list whose integer 
+hint the first entry specifies which parameter of the notebook expects a list whose integer
 entries, can be concurrently run (here "modules"). The second parameter state with which range
 to fill this parameter if it is not given by the user. In the example a `range(16):=0,1,2,...15`
 would be passed onto the notebook, which is run as 16 concurrent jobs, each processing one module.
@@ -75,9 +75,9 @@ be derived e.g. by profiling memory usage per core, run times, etc.
 
 .. note::
 
-    It is good practice to name command line enabled notebooks with an `_NBC` suffix as shown in 
+    It is good practice to name command line enabled notebooks with an `_NBC` suffix as shown in
     the above example.
-    
+
 The `CORRECT` notebook (last notebook in the example) makes use of a concurrency generating function
 by setting the `use function` parameter. This function must be defined in a code cell in the notebook,
 its parameters should be named like other exposed parameters. It should return a list of of parameters
@@ -99,13 +99,13 @@ function::
                                                        len(seq_nums)//sequences_per_node+1)]
         else:
             return sequences
-         
-         
+
+
 .. note::
 
     Note how imports are inlined in the definition. This is necessary, as only the function code,
     not the entire notebook is executed.
-            
+
 which requires as exposed parameters e.g. ::
 
     in_folder = "/gpfs/exfel/exp/SPB/201701/p002038/raw/" # the folder to read data from, required
diff --git a/docs/source/how_it_works.rst b/docs/source/how_it_works.rst
index d8246c1883afd9fa8fef9bddeb911b913ab8b156..090ebae9fc97591f86af5b71cac651b555faa63d 100644
--- a/docs/source/how_it_works.rst
+++ b/docs/source/how_it_works.rst
@@ -8,14 +8,14 @@ to expose Jupyter_ notebooks to a command line interface. In the process reports
 from these notebooks. The general interface is::
 
     % xfel-calibrate DETECTOR TYPE
-    
+
 where `DETECTOR` and `TYPE` specify the task to be performed.
 
 Additionally, it leverages the DESY/XFEL Maxwell cluster to run these jobs in parallel
 via SLURM_.
 
 Here is a list of :ref:`available_notebooks`. See the :ref:`advanced_topics` if you are
-for details on how to use as detector group staff. 
+for details on how to use as detector group staff.
 
 If you would like to integrate additional notebooks please see the :ref:`development_workflow`.
 
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 06ef255aca0dc5fd2826611c375dc6d8191fe82f..5512e8a006aec326d32864ed11b2321248d428f3 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -10,7 +10,7 @@ Contents:
 
 .. toctree::
    :maxdepth: 2
-   
+
    how_it_works
    installation
    configuration
diff --git a/docs/source/installation.rst b/docs/source/installation.rst
index 38ef40719d5b0e048cca3ebb4edc9e5ee061901c..58f3f72268b460d4adaf297c0e6ede83ba2a0a1d 100644
--- a/docs/source/installation.rst
+++ b/docs/source/installation.rst
@@ -35,9 +35,9 @@ Installation using Anaconda
 
 First you need to load the anaconda/3 environment through::
 
-    1. module load anaconda/3 
+    1. module load anaconda/3
 
-If installing into other python enviroments, this step can be skipped. 
+If installing into other python enviroments, this step can be skipped.
 
 Then the package for the offline calibration can be obtained from the git repository::
 
@@ -75,14 +75,14 @@ folder to match your environment.
 The tool-chain is then available via the::
 
     xfel-calibrate
-    
+
 command.
 
 
 Installation using karabo
 +++++++++++++++++++++++++
 
-If required, one can install into karabo environment. The difference would be to 
+If required, one can install into karabo environment. The difference would be to
 first source activate the karabo envrionment::
 
     1. source karabo/activate
@@ -94,8 +94,8 @@ then after cloning the offline calibration package from git, the requirements ca
 Development Installation
 ------------------------
 
-For a development installation in your home directory, which automatically 
-picks up (most) changes, first install the dependencies as above, 
+For a development installation in your home directory, which automatically
+picks up (most) changes, first install the dependencies as above,
 but then install the tool-chain separately in development mode::
 
    pip install -e . --user
@@ -107,14 +107,12 @@ but then install the tool-chain separately in development mode::
 Installation of New Notebooks
 -----------------------------
 
-To install new, previously untracked notebooks in the home directory, 
-repeat the installation of the the tool-chain, without requirments, 
+To install new, previously untracked notebooks in the home directory,
+repeat the installation of the the tool-chain, without requirments,
 from the package base directory::
 
     pip install --upgrade . --user
-    
+
 Or, in case you are actively developing::
 
     pip install -e . --user
-
-
diff --git a/docs/source/makeAllDocs.sh b/docs/source/makeAllDocs.sh
index 402e2e603b579827c4eae5846a9f1b2e395382fd..a54263b5de157ad92301f687860a6512486c182f 100755
--- a/docs/source/makeAllDocs.sh
+++ b/docs/source/makeAllDocs.sh
@@ -35,4 +35,3 @@ done
 rm *.bak
 
 #cd .. rm api/* sphinx-apidoc -o ./api/ -E ../../iCalibrationDB/
-
diff --git a/docs/source/test_rsts/roles.rst b/docs/source/test_rsts/roles.rst
index 525c6f0af4aa5a93870f59ef90957cb432598a3c..d153c86059f561e86c47fcdd864a2972373b38f7 100644
--- a/docs/source/test_rsts/roles.rst
+++ b/docs/source/test_rsts/roles.rst
@@ -3,4 +3,4 @@
 .. role:: header-failed
 .. role:: passed
 .. role:: skipped
-.. role:: failed
\ No newline at end of file
+.. role:: failed
diff --git a/docs/source/testing.rst b/docs/source/testing.rst
index 214248bf4b7da4a83b160e3e153f5d79221f0ccb..4f9ad478f3a3ec19e7da29e789bd90059f69ec53 100644
--- a/docs/source/testing.rst
+++ b/docs/source/testing.rst
@@ -23,7 +23,7 @@ run can be assigned to that commit::
 To run all tests, navigate to the test directory and execute::
 
     python -m unittest discover
-    
+
 This will usually entail executing a notebook under test via SLURM
 first, then checking its output against the last commited artefacts
 of that test type.
@@ -32,7 +32,7 @@ If individual tests are run, e.g. for debugging, additional options
 exist to skip tests, or notebook execution::
 
    python test_XXX.py --help
-   
+
 where `test_XXX.py` is the test name, will give you a list of options
 available for that test.
 
@@ -44,7 +44,7 @@ generate new artefacts.
 
     Running tests will generate entries for test reports in the
     artefacts directory under the most recent commit.
-    Reviewers should check that such updates are present in the 
+    Reviewers should check that such updates are present in the
     list of changed files.
 
 
@@ -64,7 +64,7 @@ Contrary to running tests alone, new artefacts need to be generated
 for each affected test individually::
 
     python test_XXX.py --generate
-    
+
 replacing `test_XXX.py` with the test you'd like to run. This
 will execute the notebook, create artefact entries in the artefact
 dir, and after that will check for consistency by executing the test against
@@ -76,15 +76,15 @@ commit the new artefacts and create a merge request for your branch::
 
     git add tests/artefacts/
     git commit -m "Added new artefacts for changes related to baseline shifts"
-    
+
 Please also add comments in the MR description on why artefacts have
 changed.
 
 .. note::
 
-    Reviewers should always evaluate if the changes in test artefacts are 
+    Reviewers should always evaluate if the changes in test artefacts are
     appropriate, intended and acceptable.
-    
+
 Test Reports
 ++++++++++++
 
@@ -114,4 +114,3 @@ Repositories of calibration constants used in testing can be found at::
     /gpfs/exfel/exp/XMPL/201750/p700001/usr
 
 .. include:: test_results.rst
-
diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst
index 89607fdb77bb3637f7c42f0aee746d8470a6a5ed..609d97bf6119c8ef034eede79a943f7a2a88202e 100644
--- a/docs/source/tutorial.rst
+++ b/docs/source/tutorial.rst
@@ -17,7 +17,7 @@ The Tutorial consist of this documentation and two very simple notebooks:
    calibration tool-chain.
 
 To have a look at those notebooks start from a shell with the karabo environment::
-  
+
   jupyter-notebook
 
 This will open a jupyter kernel running in your browser where you can then open the notebooks in the folder notebooks/Tutorial. If you in addition also start on another shell the ipcluster as instructed in the calversion.ipynb notebook::
@@ -50,14 +50,14 @@ to install the necessary packages and setup the environment:
 
      ./karabo-2.2.4-Release-CentOS-7-x86_64.sh
 
-     source karabo/activate 
+     source karabo/activate
 
 3. Get the package pycalibration which contains the offline calibration tool-chain::
-  
+
      git clone https://git.xfel.eu/gitlab/detectors/pycalibration.git
 
 4. Install the necessary requirements and the package itself::
- 
+
      cd pycalibration
      pip install -r requirements.txt .
 
diff --git a/docs/source/workflow.rst b/docs/source/workflow.rst
index 8b7d4744a77a5db2b897c51ddfcae3b0a2a42a3f..0b9f0c6ff2d8870b18b0f85959c62d548f1c0ca5 100644
--- a/docs/source/workflow.rst
+++ b/docs/source/workflow.rst
@@ -9,31 +9,31 @@ when developing new offline calibration tools.
 Fresh Start
 -----------
 
-If you are starting a blank notebook from scratch you should first 
+If you are starting a blank notebook from scratch you should first
 think about a few preconsiderations:
 
-* Will the notebook performan a headless task, or will it also be 
-  an important interface for evaluating the results in form of a 
+* Will the notebook performan a headless task, or will it also be
+  an important interface for evaluating the results in form of a
   report.
 * Do you need to run concurrently? Is concurrency handled internally,
-  e.g. by use of ipcluster, or also on a host level, using cluster 
+  e.g. by use of ipcluster, or also on a host level, using cluster
   computing via slurm.
 
 In case you plan on using the notebook as a report tool, you should make
 sure to provide sufficient guidance and textual details using e.g. markdown
-cells in the notebook. You should also structure it into appropriate 
+cells in the notebook. You should also structure it into appropriate
 subsections.
 
 If you plan on running concurrently on the cluster, identify which variable
-should be mapped to concurent runs. For autofilling it an integer list is 
+should be mapped to concurent runs. For autofilling it an integer list is
 needed.
 
 Once you've clarified the above points, you should create a new notebook,
-either in an existing detector folder, or if for a yet not integrated 
+either in an existing detector folder, or if for a yet not integrated
 detector, into a new folder with the detector's name. Give it a suffix
 `_NBC` to denote that it is enabled for the tool chain.
 
-You should then start writing your code following the guidelines 
+You should then start writing your code following the guidelines
 below.
 
 
@@ -41,10 +41,10 @@ From Existing Notebook
 ----------------------
 
 Copy your existing notebook into the appropriate detector directory,
-or create a new one if the detector does not exist yet. Give the copy 
-a suffix `_NBC` to denote that it is enabled for the tool chain. 
+or create a new one if the detector does not exist yet. Give the copy
+a suffix `_NBC` to denote that it is enabled for the tool chain.
 
-You should then start restructuring your code following the guidelines 
+You should then start restructuring your code following the guidelines
 below.
 
 Title and Author Information
@@ -55,11 +55,11 @@ author and version. These should be given in a leading markdown cell in
 the form::
 
     # My Fancy Calculation #
-    
+
     Author: Jane Doe, Version 0.1
-    
+
     A description of the notebook.
-    
+
 Information in the format will allow automatic parsing of author and version.
 
 
@@ -91,7 +91,7 @@ required::
     sequences = [0,1,2,3,4] # sequences files to use, range allowed
     cluster_profile = "noDB" # The ipcluster profile to use
     local_output = False # output constants locally
-    
+
 Here, `in_folder` and `out_folder` are required string values. Values for required parameters have to be given when executing from the command line. This means that any defaults given in the first cell of the code are ignored (they are only used to derive the type of the parameter). `Modules` is a list, which from the command line could also be assigned using a range expression, e.g. `5-10,12,13,18-21`, which would translate to `5,6,7,8,9,12,13,18,19,20`. It is also a required parameter. The parameter `local_output` is a Boolean. The corresponding argument given in the command line will change this parameter from `false` to `True`. There is no way to change this parameter from `True` to `False` from the command line.
 
 The `cluster_profile` parameter is a bit special, in that the tool kit expects exactly this
@@ -124,10 +124,10 @@ to the following parameters being exposed via the command line::
       --no-cluster-job      Do not run as a cluster job
       --report-to str       Filename (and optionally path) for output report
       --modules str [str ...]
-                            modules to work on, required, range allowed. 
+                            modules to work on, required, range allowed.
                             Default: [0]
       --sequences str [str ...]
-                            sequences files to use, range allowed. 
+                            sequences files to use, range allowed.
                             Default: [0, 1, 2, 3, 4]
       --cluster-profile str
                             The ipcluster profile to use. Default: noDB2
@@ -135,7 +135,7 @@ to the following parameters being exposed via the command line::
       --local-output        output constants locally. Default: False
 
     ...
-    
+
 
 .. note::
 
@@ -184,9 +184,9 @@ wanting to run the tool will need to install these requirements as well. Thus,
 
 * keep runtimes and library requirements in mind. A library doing its own parallelism either
   needs to programatically be able to set this up, or automatically do so. If you need to
-  start something from the command line first, things might be tricky as you will likely 
+  start something from the command line first, things might be tricky as you will likely
   need to run this via `POpen` commands with appropriate environment variable.
-  
+
 Writing out data
 ~~~~~~~~~~~~~~~~
 
@@ -197,7 +197,7 @@ possibly done later on in a notebook.
 Also consider using HDF5 via h5py_ as your output format. If you correct or calibrated
 input data, which adhears to the XFEL naming convention, you should maintain the convention
 in your output data. You should not touch any data that you do not actively work on and
-should assure that the `INDEX` and identifier entries are syncronized with respect to 
+should assure that the `INDEX` and identifier entries are syncronized with respect to
 your output data. E.g. if you remove pulses from a train, the `INDEX/.../count` section
 should reflect this.
 
@@ -205,8 +205,8 @@ Finally, XFEL RAW data can contain filler data from the DAQ. One possible way of
 this data is the following::
 
     datapath = "/INSTRUMENT/FXE_DET_LPD1M-1/DET/{}CH0:xtdf/image/cellId".format(channel)
-    
-    count = np.squeeze(infile[datapath])        
+
+    count = np.squeeze(infile[datapath])
     first = np.squeeze(infile[datapath])
     if np.count_nonzero(count != 0) == 0:  # filler data has counts of 0
         print("File {} has no valid counts".format(infile))
@@ -215,14 +215,14 @@ this data is the following::
     idxtrains = np.squeeze(infile["/INDEX/trainId"])
     medianTrain = np.nanmedian(idxtrains)  # protect against freak train ids
     valid &= (idxtrains > medianTrain - 1e4) & (idxtrains < medianTrain + 1e4)
-    
+
     # index ranges in which non-filler data exists
     last_index = int(first[valid][-1]+count[valid][-1])
     first_index = int(first[valid][0])
-    
+
     # access these indices
     cellIds = np.squeeze(np.array(infile[datapath][first_index:last_index, ...]))
-    
+
 
 Plotting
 ~~~~~~~~
@@ -243,10 +243,10 @@ Calibration Database Interaction
 --------------------------------
 
 Tasks which require calibration constants or produce such should do this by interacting with
-the European XFEL calibration database. 
+the European XFEL calibration database.
 
 In terms of developement workflow it is usually easier to work with file-based I/O first and
-only switch over to the database after the algorithmic part of the notebook has matured. 
+only switch over to the database after the algorithmic part of the notebook has matured.
 Reasons for this include:
 
 * for developing against the database new constants will have to be integrated therein first
@@ -263,7 +263,7 @@ Testing
 
 The most important test is that your notebook completes flawlessy outside any special
 tool chain feature. After all, the tool chain will only replace parameters, and then
-launch a concurrent job and generate a report out of notebook. If it fails to run in the 
+launch a concurrent job and generate a report out of notebook. If it fails to run in the
 normal Jupyter notebook environment, it will certainly fail in the tool chain environment.
 
 Once you are satisfied with your current state of initial development, you can add it
@@ -273,7 +273,7 @@ Any changes you now make in the notebook will be automatically propagated to the
 Specifically, you should verify that all arguments are parsed correctly, e.g. by calling::
 
     xfel-calibrate DETECTOR NOTEBOOK_TYPE --help
-    
+
 From then on, check include if parallel slurm jobs are exectuted correctly and if a report
 is generated at the end.
 
@@ -298,4 +298,4 @@ documentation.
 .. _matplotlib: https://matplotlib.org/
 .. _numpy: http://www.numpy.org/
 .. _h5py: https://www.h5py.org/
-.. _iCalibrationDB: https://in.xfel.eu/readthedocs/docs/icalibrationdb/en/latest/
\ No newline at end of file
+.. _iCalibrationDB: https://in.xfel.eu/readthedocs/docs/icalibrationdb/en/latest/
diff --git a/notebooks/AGIPD/playground/AGIPD_SingleM_test_Dark.ipynb b/notebooks/AGIPD/playground/AGIPD_SingleM_test_Dark.ipynb
index 8fc4b3c22d69a3ae99b7eaa00cb676b0565f1c82..533d0f36e367668b26e86611cc63d272c5d75d5e 100644
--- a/notebooks/AGIPD/playground/AGIPD_SingleM_test_Dark.ipynb
+++ b/notebooks/AGIPD/playground/AGIPD_SingleM_test_Dark.ipynb
@@ -860,4 +860,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
\ No newline at end of file
+}
diff --git a/notebooks/AGIPD/playground/Characterize_AGIPD_Gain_FlatFields_NBC.ipynb b/notebooks/AGIPD/playground/Characterize_AGIPD_Gain_FlatFields_NBC.ipynb
index 26671f7109241e39e9982ee26665064ca4765c2a..49c2e334f2632847528d2b53fe2991a56c69e5cc 100644
--- a/notebooks/AGIPD/playground/Characterize_AGIPD_Gain_FlatFields_NBC.ipynb
+++ b/notebooks/AGIPD/playground/Characterize_AGIPD_Gain_FlatFields_NBC.ipynb
@@ -1592,4 +1592,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 1
-}
\ No newline at end of file
+}
diff --git a/notebooks/AGIPD/playground/Chracterize_AGIPD_Gain_PC_mlearn.ipynb b/notebooks/AGIPD/playground/Chracterize_AGIPD_Gain_PC_mlearn.ipynb
index 4761a412a05f1a246b293fac8d19f000f07c3373..831dc4bd2145a533418e4e32a2bb358baba09dfd 100644
--- a/notebooks/AGIPD/playground/Chracterize_AGIPD_Gain_PC_mlearn.ipynb
+++ b/notebooks/AGIPD/playground/Chracterize_AGIPD_Gain_PC_mlearn.ipynb
@@ -607,4 +607,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
\ No newline at end of file
+}
diff --git a/notebooks/DSSC/Characterize_DSSC_Darks_NBC.ipynb b/notebooks/DSSC/Characterize_DSSC_Darks_NBC.ipynb
index 7d4f3ecfd235c162fc9bfdd5f4ecc9b3c430c9b8..6ad80ff6010432c439814a1a91905b44fb842682 100644
--- a/notebooks/DSSC/Characterize_DSSC_Darks_NBC.ipynb
+++ b/notebooks/DSSC/Characterize_DSSC_Darks_NBC.ipynb
@@ -96,9 +96,10 @@
     "                                plot_badpix_3d, show_overview,\n",
     "                                show_processed_modules)\n",
     "from cal_tools.tools import (get_dir_creation_date, get_from_db, \n",
-    "                             get_notebook_name, get_random_db_interface,\n",
+    "                             get_pdu_from_db, get_notebook_name,\n",
+    "                             get_random_db_interface, get_report,\n",
     "                             map_gain_stages, parse_runs,\n",
-    "                             run_prop_seq_from_path, \n",
+    "                             run_prop_seq_from_path,\n",
     "                             save_const_to_h5, send_to_db)\n",
     "from cal_tools.dssclib import (get_dssc_ctrl_data,\n",
     "                               get_pulseid_checksum)\n",
@@ -159,7 +160,9 @@
     "print(f\"Input: {in_folder}\")\n",
     "print(f\"Output: {out_folder}\")\n",
     "print(f\"Bias voltage: {bias_voltage}V\")\n",
-    "file_loc = f'proposal:{prop} runs:{[ v for v in offset_runs.values()][0]}'"
+    "file_loc = f'proposal:{prop} runs:{[ v for v in offset_runs.values()][0]}'\n",
+    "\n",
+    "report = get_report(out_folder)"
    ]
   },
   {
@@ -367,6 +370,21 @@
     "    raise ValueError(\"0 processed memory cells. No raw data available.\")\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# TODO: add db_module when received from myMDC\n",
+    "# Create the modules dict of karabo_das and PDUs\n",
+    "qm_dict = OrderedDict()\n",
+    "for i, k_da in zip(modules, karabo_da):\n",
+    "    qm = f\"Q{i//4+1}M{i%4+1}\"\n",
+    "    qm_dict[qm] = {\"karabo_da\": k_da,\n",
+    "                   \"db_module\": \"\"}"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -379,10 +397,12 @@
     "old_mdata = {}\n",
     "\n",
     "print('Retrieve pre-existing constants for comparison.')\n",
-    "detinst = getattr(Detectors, dinstance)\n",
     "for qm in offset_g.keys():\n",
-    "    device = getattr(detinst, qm)\n",
+    "    qm_db = qm_dict[qm]\n",
+    "    karabo_da = qm_db[\"karabo_da\"]\n",
     "    for const in clist:\n",
+    "        \n",
+    "        dconst =getattr(Constants.DSSC, const)()\n",
     "        condition = Conditions.Dark.DSSC(memory_cells=max_cells,\n",
     "                                         bias_voltage=bias_voltage,\n",
     "                                         pulseid_checksum=checksums[qm],\n",
@@ -390,8 +410,16 @@
     "                                         target_gain=tGain[qm],\n",
     "                                         encoded_gain=encodedGain[qm])\n",
     "\n",
-    "        data, mdata = get_from_db(device,\n",
-    "                                  getattr(Constants.DSSC, const)(),\n",
+    "        # This should be used in case of running notebook \n",
+    "        # by a different method other than myMDC which already\n",
+    "        # sends CalCat info.\n",
+    "        # TODO: Set db_module to \"\" by default in the first cell\n",
+    "        if not qm_db[\"db_module\"]:\n",
+    "            qm_db[\"db_module\"] = get_pdu_from_db(karabo_id, karabo_da, dconst,\n",
+    "                                                 condition, cal_db_interface,\n",
+    "                                                 snapshot_at=creation_time)[0]\n",
+    "        data, mdata = get_from_db(karabo_id, karabo_da,\n",
+    "                                  dconst,\n",
     "                                  condition,\n",
     "                                  None,\n",
     "                                  cal_db_interface, creation_time=creation_time,\n",
@@ -403,9 +431,10 @@
     "            time = mdata.calibration_constant_version.begin_at\n",
     "            old_mdata[const] = time.isoformat()\n",
     "            os.makedirs(f'{out_folder}/old/', exist_ok=True)\n",
-    "            save_const_to_h5(device,\n",
+    "            save_const_to_h5(qm_db[\"db_module\"], karabo_id,\n",
     "                             getattr(Constants.DSSC, const)(),\n",
-    "                             condition, data, file_loc, creation_time,\n",
+    "                             condition, data, file_loc, report,\n",
+    "                             creation_time,\n",
     "                             f'{out_folder}/old/')\n",
     "        else:\n",
     "            old_mdata[const] = \"Not found\""
@@ -450,9 +479,8 @@
     "md = None\n",
     "for dont_use_pulseIds in [True, False]:\n",
     "    for qm in res.keys():\n",
-    "        detinst = getattr(Detectors, dinstance)\n",
-    "        device = getattr(detinst, qm)\n",
-    "\n",
+    "        karabo_da = qm_dict[qm][\"karabo_da\"]\n",
+    "        db_module = qm_dict[qm][\"db_module\"]\n",
     "        for const in res[qm].keys():\n",
     "            dconst = getattr(Constants.DSSC, const)()\n",
     "            dconst.data = res[qm][const]\n",
@@ -471,12 +499,12 @@
     "                                             encoded_gain=encodedgain)\n",
     "            \n",
     "            if db_output:\n",
-    "                md = send_to_db(device, dconst, condition, file_loc, \n",
+    "                md = send_to_db(db_module, karabo_id, dconst, condition, file_loc, report,\n",
     "                                cal_db_interface, creation_time=creation_time, timeout=cal_db_timeout)\n",
     "                \n",
     "            if local_output and dont_use_pulseIds: # Don't save constant localy two times.\n",
-    "                md = save_const_to_h5(device, dconst, condition,\n",
-    "                                      dconst.data, file_loc,\n",
+    "                md = save_const_to_h5(db_module, karabo_id, dconst, condition,\n",
+    "                                      dconst.data, file_loc, report,\n",
     "                                      creation_time, out_folder)\n",
     "                print(f\"Calibration constant {const} is stored locally.\\n\")\n",
     "    \n",
diff --git a/notebooks/FastCCD/Characterize_Darks_NewDAQ_FastCCD_NBC_New_Common_Mode.ipynb b/notebooks/FastCCD/Characterize_Darks_NewDAQ_FastCCD_NBC_New_Common_Mode.ipynb
index 33242e8d5c28f601d8d2069de5334c519f45f2c1..b72c092174d599bfd474bca09b7a3a48c6ebbbb8 100644
--- a/notebooks/FastCCD/Characterize_Darks_NewDAQ_FastCCD_NBC_New_Common_Mode.ipynb
+++ b/notebooks/FastCCD/Characterize_Darks_NewDAQ_FastCCD_NBC_New_Common_Mode.ipynb
@@ -41,7 +41,7 @@
     "\n",
     "number_dark_frames = 0 # number of images to be used, if set to 0 all available images are used\n",
     "# The two operation modes for FastCCD have fixed names which cannot be changed:\n",
-    "operation_mode = \"FF\" # FS stands for frame-store and FF for full-frame opeartion. \n",
+    "operation_mode = \"FF\"  # FS stands for frame-store and FF for full-frame operation. \n",
     "temp_limits = 5 # to find calibration constants later on, the sensor temperature is allowed to vary by 5 units\n",
     "bad_pixel_offset_sigma = 5. # Any pixel whose offset is beyond 5 standard deviations, is a bad pixel\n",
     "bad_pixel_noise_sigma = 5. # Any pixel whose noise is beyond 5 standard deviations, is a bad pixel\n",
@@ -60,8 +60,7 @@
     "ADU_to_electron_upper_mg = 23.4 # for upper hemisphere and medium gain\n",
     "ADU_to_electron_lower_mg = 23.4 # for lower hemisphere and medium gain\n",
     "ADU_to_electron_upper_lg = 49.3 # for upper hemisphere and low gain\n",
-    "ADU_to_electron_lower_lg = 47.3 # for lower hemisphere and low gain\n",
-    "operation_mode = ''  # Detector operation mode, optional"
+    "ADU_to_electron_lower_lg = 47.3 # for lower hemisphere and low gain\n"
    ]
   },
   {
diff --git a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
index 00da5997abba27a77fd5aa54890785ae082ef76c..f1c6c165d58328b8be8e5fbc215825c9d6309f52 100644
--- a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
@@ -17,37 +17,35 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "cluster_profile = \"noDB\" # cluster profile to use\n",
-    "in_folder = \"/gpfs/exfel/exp/FXE/201901/p002210/raw\" # the folder to read data from, required\n",
-    "out_folder =  \"/gpfs/exfel/data/scratch/ahmedk/test/jf\"  # the folder to output to, required\n",
-    "sequences = [-1] # sequences to correct, set to -1 for all, range allowed\n",
-    "run = 249 # runs to process, required\n",
-    "\n",
-    "karabo_id = \"FXE_XAD_JF1M\" # karabo prefix of Jungfrau devices\n",
-    "karabo_da = ['JNGFR01'] # data aggregators\n",
-    "receiver_id = \"RECEIVER-{}\" # inset for receiver devices\n",
-    "receiver_control_id = \"CONTROL\" # inset for control devices\n",
-    "path_template = 'RAW-R{:04d}-{}-S{:05d}.h5'  # template to use for file name, double escape sequence number\n",
+    "in_folder = \"/gpfs/exfel/exp/CALLAB/202031/p900113/raw\"  # the folder to read data from, required\n",
+    "out_folder =  \"/gpfs/exfel/data/scratch/hammerd/issue-242\"  # the folder to output to, required\n",
+    "sequences = [-1]  # sequences to correct, set to [-1] for all, range allowed\n",
+    "run = 9979  # run to process, required\n",
+    "\n",
+    "karabo_id = \"SPB_IRDA_JF4M\"  # karabo prefix of Jungfrau devices\n",
+    "karabo_da = ['JNGFR01']  # data aggregators\n",
+    "receiver_id = \"JNGFR{:02d}\"  # inset for receiver devices\n",
+    "receiver_control_id = \"CONTROL\"  # inset for control devices\n",
+    "path_template = 'RAW-R{:04d}-{}-S{:05d}.h5'  # template to use for file name\n",
     "h5path = '/INSTRUMENT/{}/DET/{}:daqOutput/data'  # path in H5 file under which images are located\n",
-    "h5path_run = '/RUN/{}/DET/{}' # path to run data\n",
-    "h5path_cntrl = '/CONTROL/{}/DET/{}' # path to control data\n",
+    "h5path_run = '/RUN/{}/DET/{}'  # path to run data\n",
+    "h5path_cntrl = '/CONTROL/{}/DET/{}'  # path to control data\n",
     "karabo_id_control = \"\"  # if control is on a different ID, set to empty string if it is the same a karabo-id\n",
-    "karabo_da_control = \"JNGFR01\" # file inset for control data\n",
+    "karabo_da_control = \"JNGFRCTRL00\"  # file inset for control data\n",
     "\n",
-    "use_dir_creation_date = True # use the creation data of the input dir for database queries\n",
-    "cal_db_interface = \"tcp://max-exfl016:8017#8025\" #\"tcp://max-exfl016:8015#8025\" # the database interface to use\n",
-    "cal_db_timeout = 180000 # timeout on caldb requests\",\n",
+    "use_dir_creation_date = True  # use the creation data of the input dir for database queries\n",
+    "cal_db_interface = \"tcp://max-exfl016:8017#8025\" # the database interface to use\n",
+    "cal_db_timeout = 180000  # timeout on caldb requests\n",
     "\n",
-    "overwrite = True # set to True if existing data should be overwritten\n",
-    "no_relative_gain = False # do not do relative gain correction\n",
-    "bias_voltage = 180 # will be overwritten by value in file\n",
-    "sequences_per_node = 5 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel\n",
-    "photon_energy = 9.2 # photon energy in keV\n",
+    "overwrite = True  # set to True if existing data should be overwritten\n",
+    "no_relative_gain = False  # do not do relative gain correction\n",
+    "bias_voltage = 180  # will be overwritten by value in file\n",
+    "sequences_per_node = 5  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel\n",
+    "photon_energy = 9.2  # photon energy in keV\n",
     "chunk_size_idim = 1  # chunking size of imaging dimension, adjust if user software is sensitive to this.\n",
-    "integration_time = 4.96 # integration time in us, will be overwritten by value in file\n",
-    "mem_cells = 0. # leave memory cells equal 0, as it is saved in control information starting 2019.\n",
-    "gmapfile = \"\" # variable is not used but left here for back compatibility\n",
-    "db_module = [\"Jungfrau_M233\"] # ID of module in calibration database\n",
+    "integration_time = 4.96  # integration time in us, will be overwritten by value in file\n",
+    "mem_cells = 0  # leave memory cells equal 0, as it is saved in control information starting 2019.\n",
+    "db_module = [\"Jungfrau_M275\"]  # ID of module in calibration database\n",
     "manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values\n",
     "chunk_size = 0\n",
     "\n",
@@ -62,23 +60,25 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import copy\n",
+    "import multiprocessing\n",
     "import time\n",
-    "from ipyparallel import Client\n",
+    "import warnings\n",
     "from functools import partial\n",
-    "import tabulate\n",
-    "from IPython.display import display, Latex\n",
-    "import copy\n",
+    "from pathlib import Path\n",
+    "\n",
     "import h5py\n",
-    "import os\n",
-    "from cal_tools.tools import (map_modules_from_folder, get_dir_creation_date,\n",
-    "                             get_constant_from_db_and_time)\n",
-    "from iCalibrationDB import (ConstantMetaData, Constants, Conditions, Detectors, Versions)\n",
-    "from cal_tools.enums import BadPixels\n",
-    "import numpy as np\n",
     "import matplotlib\n",
     "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import tabulate\n",
+    "from cal_tools.enums import BadPixels\n",
+    "from cal_tools.tools import (get_constant_from_db_and_time,\n",
+    "                             get_dir_creation_date, map_modules_from_folder)\n",
+    "from iCalibrationDB import Conditions, Constants\n",
+    "from IPython.display import Latex, display\n",
     "from matplotlib.colors import LogNorm\n",
-    "import warnings\n",
+    "\n",
     "warnings.filterwarnings('ignore')\n",
     "\n",
     "matplotlib.use('agg')\n",
@@ -91,23 +91,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "client = Client(profile=cluster_profile)\n",
-    "view = client[:]\n",
-    "view.use_dill()\n",
-    "\n",
+    "in_folder = Path(in_folder)\n",
+    "out_folder = Path(out_folder)\n",
+    "ped_dir = in_folder / f'r{run:04d}'\n",
     "h5path = h5path.format(karabo_id, receiver_id)\n",
-    "ped_dir = \"{}/r{:04d}\".format(in_folder, run)\n",
     "\n",
-    "if ped_dir[-1] == \"/\":\n",
-    "    ped_dir = ped_dir[:-1]\n",
-    "\n",
-    "if not os.path.exists(out_folder):\n",
-    "    os.makedirs(out_folder)\n",
-    "elif not overwrite:\n",
+    "if out_folder.exists() and not overwrite:\n",
     "    raise AttributeError(\"Output path exists! Exiting\")\n",
+    "else:\n",
+    "    out_folder.mkdir(parents=True, exist_ok=True)\n",
     "\n",
     "fp_name_contr = path_template.format(run, karabo_da_control, 0)\n",
-    "fp_path_contr = '{}/{}'.format(ped_dir, fp_name_contr)\n",
+    "fp_path_contr = ped_dir / fp_name_contr\n",
     "\n",
     "if sequences[0] == -1:\n",
     "    sequences = None\n",
@@ -131,7 +126,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def check_memoryCells(file_name, path):\n",
+    "def check_memory_cells(file_name, path):\n",
     "    with h5py.File(file_name, 'r') as f:\n",
     "        t_stamp = np.array(f[path + '/storageCells/timestamp'])\n",
     "        st_cells = np.array(f[path + '/storageCells/value'])\n",
@@ -151,8 +146,9 @@
    "outputs": [],
    "source": [
     "# set everything up filewise\n",
-    "mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)\n",
-    "mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf\n",
+    "mapped_files, mod_ids, total_sequences, sequences_qm, _ = map_modules_from_folder(\n",
+    "    in_folder, run, path_template, karabo_da, sequences\n",
+    ")\n",
     "\n",
     "print(f\"Processing a total of {total_sequences} sequence files\")\n",
     "table = []\n",
@@ -169,8 +165,9 @@
     "                                         headers=[\"#\", \"module\", \"# module\", \"file\"])))\n",
     "\n",
     "# restore the queue\n",
-    "mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)\n",
-    "mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf"
+    "mapped_files, mod_ids, total_sequences, sequences_qm, _ = map_modules_from_folder(\n",
+    "    in_folder, run, path_template, karabo_da, sequences\n",
+    ")"
    ]
   },
   {
@@ -180,7 +177,7 @@
    "outputs": [],
    "source": [
     "if not manual_slow_data:\n",
-    "    with h5py.File(fp_path_contr.format(0), 'r') as f:\n",
+    "    with h5py.File(fp_path_contr, 'r') as f:\n",
     "        run_path = h5path_run.format(karabo_id_control, receiver_control_id)\n",
     "        integration_time = float(f[f'{run_path}/exposureTime/value'][()]*1e6)\n",
     "        bias_voltage = int(np.squeeze(f[f'{run_path}/vHighVoltage/value'])[0])\n",
@@ -188,26 +185,26 @@
     "\n",
     "control_path = h5path_cntrl.format(karabo_id_control, receiver_control_id)\n",
     "try:\n",
-    "    this_run_mcells, sc_start = check_memoryCells(fp_path_contr.format(0), control_path)\n",
+    "    this_run_mcells, sc_start = check_memory_cells(fp_path_contr, control_path)\n",
     "    if this_run_mcells == 1:\n",
-    "        memoryCells = 1\n",
+    "        memory_cells = 1\n",
     "        print(f'Dark runs in single cell mode\\n storage cell start: {sc_start:02d}')\n",
     "    else:\n",
-    "        memoryCells = 16\n",
+    "        memory_cells = 16\n",
     "        print(f'Dark runs in burst mode\\n storage cell start: {sc_start:02d}')\n",
     "except Exception as e:\n",
     "    if \"Unable to open object\" in str(e):\n",
     "        if mem_cells==0:\n",
-    "            memoryCells = 1\n",
+    "            memory_cells = 1\n",
     "        else:\n",
-    "            memoryCells = mem_cells\n",
-    "        print(f'Set memory cells to {memoryCells} as it is not saved in control information.')\n",
+    "            memory_cells = mem_cells\n",
+    "        print(f'Set memory cells to {memory_cells} as it is not saved in control information.')\n",
     "    else:\n",
     "        print(f\"Error trying to access memory cell from contol information: {e}\")\n",
     "\n",
     "print(f\"Integration time is {integration_time} us\")\n",
     "print(f\"Bias voltage is {bias_voltage} V\")\n",
-    "print(f\"Number of memory cells is {memoryCells}\")"
+    "print(f\"Number of memory cells is {memory_cells}\")"
    ]
   },
   {
@@ -216,70 +213,63 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "condition = Conditions.Dark.jungfrau(memory_cells=memoryCells,\n",
-    "                                     bias_voltage=bias_voltage,\n",
-    "                                     integration_time=integration_time)\n",
+    "condition = Conditions.Dark.jungfrau(\n",
+    "    memory_cells=memory_cells,\n",
+    "    bias_voltage=bias_voltage,\n",
+    "    integration_time=integration_time,\n",
+    ")\n",
     "\n",
-    "def get_constant_for_module(karabo_id, condition, cal_db_interface, creation_time, cal_db_timeout,\n",
-    "                            memoryCells, karabo_da):\n",
+    "def get_constants_for_module(karabo_da: str):\n",
     "    \"\"\" Get calibration constants for given module of Jungfrau\n",
-    "    \n",
-    "    Function contains all includes to be used with ipCluster\n",
     "\n",
-    "    :param condition: Calibration condition\n",
-    "    :param cal_db_interface: Interface string, e.g. \"tcp://max-exfl016:8015\"\n",
-    "    :param creation_time: Latest time for constant to be created\n",
-    "    :param cal_db_timeout: Timeout for zmq request\n",
-    "    :param: memoryCells: Number of used memory cells\n",
-    "    :param: db_module: Module of Jungfrau, e.g. \"Jungfrau_M035\"\n",
-    "\n",
-    "    :return: offset_map (offset map), mask (mask of bad pixels), \n",
-    "        gain_map (map of relative gain factors), db_module (name of DB module), \n",
+    "    :return:\n",
+    "        offset_map (offset map),\n",
+    "        mask (mask of bad pixels),\n",
+    "        gain_map (map of relative gain factors),\n",
+    "        db_module (name of DB module),\n",
     "        when (dictionaty: constant - creation time)\n",
     "    \"\"\"\n",
-    "\n",
-    "    from iCalibrationDB import (ConstantMetaData, Constants, Conditions, Detectors, Versions)\n",
-    "    from cal_tools.tools import get_constant_from_db_and_time\n",
-    "    import numpy as np\n",
     "    \n",
     "    when = {}\n",
-    "\n",
-    "    #TODO: Remove condition + constant retrieval duplication from notebook \n",
-    "\n",
-    "    offset_map, when['Offset'] = \\\n",
-    "        get_constant_from_db_and_time(karabo_id, karabo_da,\n",
-    "                                      Constants.jungfrau.Offset(),\n",
-    "                                      condition,\n",
-    "                                      np.zeros((1024, 512, 1, 3)),\n",
-    "                                      cal_db_interface,\n",
-    "                                      creation_time=creation_time,\n",
-    "                                      timeout=cal_db_timeout)\n",
-    "    mask, when['BadPixels'] = \\\n",
-    "        get_constant_from_db_and_time(karabo_id, karabo_da,\n",
-    "                                      Constants.jungfrau.BadPixelsDark(),\n",
-    "                                      condition,\n",
-    "                                      np.zeros((1024, 512, 1, 3)),\n",
-    "                                      cal_db_interface,\n",
-    "                                      creation_time=creation_time,\n",
-    "                                      timeout=cal_db_timeout)\n",
-    "    gain_map, when['Gain'] = \\\n",
-    "        get_constant_from_db_and_time(karabo_id, karabo_da,\n",
-    "                                      Constants.jungfrau.RelativeGain(),\n",
-    "                                      condition,\n",
-    "                                      None,\n",
-    "                                      cal_db_interface,\n",
-    "                                      creation_time=creation_time,\n",
-    "                                      timeout=cal_db_timeout)\n",
-    "\n",
+    "    retrieval_function = partial(\n",
+    "        get_constant_from_db_and_time,\n",
+    "        karabo_id=karabo_id,\n",
+    "        karabo_da=karabo_da,\n",
+    "        condition=condition,\n",
+    "        cal_db_interface=cal_db_interface,\n",
+    "        creation_time=creation_time,\n",
+    "        timeout=cal_db_timeout,\n",
+    "    )\n",
+    "    offset_map, when[\"Offset\"] = retrieval_function(\n",
+    "        constant=Constants.jungfrau.Offset(), empty_constant=np.zeros((1024, 512, 1, 3))\n",
+    "    )\n",
+    "    mask, when[\"BadPixelsDark\"] = retrieval_function(\n",
+    "        constant=Constants.jungfrau.BadPixelsDark(),\n",
+    "        empty_constant=np.zeros((1024, 512, 1, 3)),\n",
+    "    )\n",
+    "    mask_ff, when[\"BadPixelsFF\"] = retrieval_function(\n",
+    "        constant=Constants.jungfrau.BadPixelsFF(),\n",
+    "        empty_constant=None\n",
+    "    )\n",
+    "    gain_map, when[\"Gain\"] = retrieval_function(\n",
+    "        constant=Constants.jungfrau.RelativeGain(),\n",
+    "        empty_constant=None\n",
+    "    )\n",
+    "\n",
+    "    # combine masks\n",
+    "    if mask_ff is not None:\n",
+    "        mask |= np.moveaxis(mask_ff, 0, 1)\n",
+    "    \n",
     "    # move from x,y,cell,gain to cell,x,y,gain\n",
     "    offset_map = np.squeeze(offset_map)\n",
     "    mask = np.squeeze(mask)\n",
-    "    if memoryCells > 1:\n",
+    "    \n",
+    "    if memory_cells > 1:\n",
     "        offset_map = np.moveaxis(np.moveaxis(offset_map, 0, 2), 0, 2)\n",
     "        mask = np.moveaxis(np.moveaxis(mask, 0, 2), 0, 2)\n",
     "\n",
     "    if gain_map is not None:\n",
-    "        if memoryCells > 1:\n",
+    "        if memory_cells > 1:\n",
     "            gain_map = np.moveaxis(np.moveaxis(gain_map, 0, 2), 0, 1)\n",
     "        else:\n",
     "            gain_map = np.squeeze(gain_map)\n",
@@ -288,24 +278,19 @@
     "    return offset_map, mask, gain_map, karabo_da, when\n",
     "\n",
     "\n",
-    "# Retrieve Offset, BadPixels and gain constants for a JungFrau module.\n",
-    "# Run ip Cluster parallelization over modules\n",
-    "p = partial(get_constant_for_module, karabo_id, condition, cal_db_interface, \n",
-    "            creation_time, cal_db_timeout, memoryCells)\n",
-    "\n",
-    "r = view.map_sync(p, karabo_da)\n",
-    "#r = list(map(p, karabo_da))\n",
+    "with multiprocessing.Pool() as pool:\n",
+    "    r = pool.map(get_constants_for_module, karabo_da)\n",
     "\n",
     "constants = {}\n",
-    "for rr in r:\n",
-    "    offset_map, mask, gain_map, k_da, when = rr\n",
+    "for offset_map, mask, gain_map, k_da, when in r:\n",
     "    print(f'Constants for module {k_da}:')\n",
     "    for const in when:\n",
-    "        print(f'{const} injected at {when[const]}')\n",
+    "        print(f'  {const} injected at {when[const]}')\n",
+    "        \n",
     "    if gain_map is None:\n",
-    "        print(\"No gain map found\")\n",
+    "        print(\"  No gain map found\")\n",
     "        no_relative_gain = True\n",
-    "    \n",
+    "        \n",
     "    constants[k_da] = (offset_map, mask, gain_map)"
    ]
   },
@@ -319,11 +304,9 @@
     "    \"\"\" Copy and sanitize data in `infile` that is not touched by `correctLPD`\n",
     "    \"\"\"\n",
     "\n",
-    "    if h5base.startswith(\"/\"):\n",
-    "        h5base = h5base[1:]\n",
+    "    h5base = h5base.lstrip(\"/\")\n",
     "    dont_copy = [\"adc\", ]\n",
-    "    dont_copy = [h5base+\"/{}\".format(do)\n",
-    "                 for do in dont_copy]\n",
+    "    dont_copy = [f'{h5base}/{dnc}' for dnc in dont_copy]\n",
     "\n",
     "    def visitor(k, item):\n",
     "        if k not in dont_copy:\n",
@@ -344,11 +327,7 @@
    "outputs": [],
    "source": [
     "# Correct a chunk of images for offset and gain\n",
-    "def correct_chunk(offset_map, mask, gain_map, memoryCells, no_relative_gain, inp):\n",
-    "    import numpy as np\n",
-    "    import copy\n",
-    "    import h5py\n",
-    "\n",
+    "def correct_chunk(offset_map, mask, gain_map, memory_cells, no_relative_gain, inp):\n",
     "    fim_data = None\n",
     "    gim_data = None\n",
     "    rim_data = None\n",
@@ -360,13 +339,13 @@
     "        g[g==3] = 2\n",
     "\n",
     "        if copy_sample and ind==0:\n",
-    "            if memoryCells==1:\n",
+    "            if memory_cells==1:\n",
     "                rim_data = np.squeeze(copy.copy(d))\n",
     "            else:\n",
     "                rim_data = np.squeeze(copy.copy(d[:,0,...]))\n",
     "\n",
     "        # Select memory cells\n",
-    "        if memoryCells>1:\n",
+    "        if memory_cells>1:\n",
     "            m[m>16] = 0\n",
     "            offset_map_cell = offset_map[m,...]\n",
     "            mask_cell = mask[m,...]\n",
@@ -380,7 +359,7 @@
     "\n",
     "        # Gain correction\n",
     "        if not no_relative_gain:\n",
-    "            if memoryCells>1:\n",
+    "            if memory_cells>1:\n",
     "                gain_map_cell = gain_map[m,...]\n",
     "            else:\n",
     "                gain_map_cell = gain_map\n",
@@ -391,7 +370,7 @@
     "\n",
     "        # Store sample of data for plotting\n",
     "        if copy_sample and ind==0:\n",
-    "            if memoryCells==1:\n",
+    "            if memory_cells==1:\n",
     "                fim_data = np.squeeze(copy.copy(d))\n",
     "                gim_data = np.squeeze(copy.copy(g))\n",
     "                msk_data = np.squeeze(copy.copy(msk))\n",
@@ -403,30 +382,37 @@
     "    except Exception as e:\n",
     "        err = e\n",
     "\n",
-    "    return ind, d, msk, rim_data, fim_data, gim_data, msk_data, err\n",
-    "\n",
+    "    return ind, d, msk, rim_data, fim_data, gim_data, msk_data, err"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "fim_data = {}\n",
     "gim_data = {}\n",
     "rim_data = {}\n",
     "msk_data = {}\n",
     "\n",
+    "# For each module, chunks will be processed by pool\n",
+    "pool = multiprocessing.Pool()\n",
     "# Loop over modules\n",
-    "for i, key in enumerate(mapped_files):\n",
+    "for local_karabo_da, mapped_files_module in zip(karabo_da, mapped_files.values()):\n",
+    "    h5path_f = h5path.format(int(local_karabo_da[-2:]))\n",
     "    # Loop over sequences for given module\n",
-    "    for k, f in enumerate(list(mapped_files[key].queue)):\n",
-    "        \n",
-    "        offset_map, mask, gain_map = constants[karabo_da[i]]\n",
-    "        h5path_f = h5path.format(int(karabo_da[i][-2:]))\n",
+    "    for sequence_file_number, sequence_file in enumerate(mapped_files_module.queue):\n",
+    "        sequence_file = Path(sequence_file)\n",
+    "        offset_map, mask, gain_map = constants[local_karabo_da]\n",
     "                                 \n",
-    "        with h5py.File(f, 'r') as infile:\n",
-    "            \n",
+    "        with h5py.File(sequence_file, 'r') as infile:\n",
     "            # The processed files are saved here in a folder with the run name.\n",
-    "            out_file = \"{}/{}\".format(out_folder, f.split(\"/\")[-1])\n",
-    "            out_file = out_file.replace(\"RAW\", \"CORR\")\n",
-    "            print(f'Process file: {f}, with path {h5path_f}')\n",
+    "            out_filename = out_folder / sequence_file.name.replace(\"RAW\", \"CORR\")\n",
+    "            print(f'Process file: {sequence_file}, with path {h5path_f}')\n",
     "            try:\n",
-    "                with h5py.File(out_file, \"w\") as ofile:\n",
-    "                    copy_and_sanitize_non_cal_data(infile, ofile, h5path_f)\n",
+    "                with h5py.File(out_filename, \"w\") as outfile:\n",
+    "                    copy_and_sanitize_non_cal_data(infile, outfile, h5path_f)\n",
     "\n",
     "                    oshape = infile[h5path_f+\"/adc\"].shape\n",
     "                    print(f'Data shape: {oshape}')\n",
@@ -434,18 +420,18 @@
     "                        raise ValueError(f\"No image data: shape {oshape}\")\n",
     "                    # Chunk always contains >= 1 complete image\n",
     "                    chunk_shape = (chunk_size_idim, 1) + oshape[-2:]\n",
-    "                    ddset = ofile.create_dataset(h5path_f+\"/adc\",\n",
-    "                                                 oshape,\n",
-    "                                                 chunks=chunk_shape,\n",
-    "                                                 dtype=np.float32)\n",
-    "\n",
-    "                    mskset = ofile.create_dataset(h5path_f+\"/mask\",\n",
-    "                                                  oshape,\n",
-    "                                                  chunks=chunk_shape,\n",
-    "                                                  dtype=np.uint32,\n",
-    "                                                  compression=\"gzip\", compression_opts=1, shuffle=True)\n",
-    "\n",
-    "                    # Run ip Cluster parallelization over chunks of images\n",
+    "\n",
+    "                    ddset = outfile.create_dataset(h5path_f+\"/adc\",\n",
+    "                                                   oshape,\n",
+    "                                                   chunks=chunk_shape,\n",
+    "                                                   dtype=np.float32)\n",
+    "\n",
+    "                    mskset = outfile.create_dataset(h5path_f+\"/mask\",\n",
+    "                                                    oshape,\n",
+    "                                                    chunks=chunk_shape,\n",
+    "                                                    dtype=np.uint32,\n",
+    "                                                    compression=\"gzip\", compression_opts=1, shuffle=True)\n",
+    "                    # Parallelize over chunks of images\n",
     "                    inp = []\n",
     "                    max_ind = oshape[0]\n",
     "                    ind = 0\n",
@@ -464,23 +450,21 @@
     "                        else:\n",
     "                            m = None\n",
     "                        print(f'To process: {d.shape}')\n",
-    "                        inp.append((d,g,m, ind, k==0))\n",
+    "                        inp.append((d, g, m, ind, sequence_file_number==0))\n",
     "                        ind += chunk_size\n",
     "\n",
     "                    print('Preparation time: ', time.time() - ts)\n",
     "                    ts = time.time()\n",
     "\n",
     "                    print(f'Run {len(inp)} processes')\n",
-    "                    p = partial(correct_chunk, offset_map, mask, gain_map, memoryCells, no_relative_gain)\n",
+    "                    p = partial(correct_chunk, offset_map, mask, gain_map, memory_cells, no_relative_gain)\n",
     "\n",
-    "                    r = view.map_sync(p, inp)\n",
-    "                    # Used for debugging correct chunk\n",
-    "                    #r = list(map(p, inp))\n",
+    "                    r = pool.map(p, inp)\n",
     "                    \n",
-    "                    if k==0:\n",
+    "                    if sequence_file_number == 0:\n",
     "                        (_,_,_,\n",
-    "                         rim_data[karabo_da[i]], fim_data[karabo_da[i]],\n",
-    "                         gim_data[karabo_da[i]], msk_data[karabo_da[i]], _) = r[0]\n",
+    "                         rim_data[local_karabo_da], fim_data[local_karabo_da],\n",
+    "                         gim_data[local_karabo_da], msk_data[local_karabo_da], _) = r[0]\n",
     "\n",
     "                    print('Correction time: ', time.time() - ts)\n",
     "                    ts = time.time()\n",
@@ -495,7 +479,8 @@
     "\n",
     "                    print('Saving time: ', time.time() - ts)\n",
     "            except Exception as e:\n",
-    "                print(f\"Error: {e}\")"
+    "                print(f\"Error: {e}\")\n",
+    "pool.close()"
    ]
   },
   {
@@ -513,8 +498,7 @@
     "    ax.set_ylabel(y_axis)\n",
     "    ax.set_title(title)\n",
     "    cb = fig.colorbar(im)\n",
-    "    cb.set_label(\"Counts\")\n",
-    "    "
+    "    cb.set_label(\"Counts\")"
    ]
   },
   {
@@ -524,8 +508,10 @@
    "outputs": [],
    "source": [
     "for mod in rim_data: \n",
-    "    h, ex, ey = np.histogram2d(rim_data[mod].flatten(), gim_data[mod].flatten(),\n",
-    "                               bins=[100, 4], range=[[0, 10000], [0,4]])\n",
+    "    h, ex, ey = np.histogram2d(rim_data[mod].flatten(),\n",
+    "                               gim_data[mod].flatten(),\n",
+    "                               bins=[100, 4],\n",
+    "                               range=[[0, 10000], [0, 4]])\n",
     "    do_2d_plot(h, (ex, ey), \"Signal (ADU)\", \"Gain Bit Value\", f'Module {mod}')"
    ]
   },
diff --git a/notebooks/LPD/playground/correct_lpd_batch.py_old b/notebooks/LPD/playground/correct_lpd_batch.py_old
index 304a7ab7fbcad0ddacc663ebe1efb5c2d399e486..ef76d8d32c0bcde5bef3dbe04676a82942f03745 100644
--- a/notebooks/LPD/playground/correct_lpd_batch.py_old
+++ b/notebooks/LPD/playground/correct_lpd_batch.py_old
@@ -45,7 +45,7 @@ elif not overwrite:
 def combine_stack(d, sdim):
     combined = np.zeros((sdim, 2048,2048))
     combined[...] = np.nan
-    
+
     map_x = [1,0,0,1]
     map_y = [1,1,0,0]
     to_map = d
@@ -97,7 +97,7 @@ saveFile.close()
 
 # set everything up filewise
 from queue import Queue
-    
+
 def map_modules_from_files(filelist):
     module_files = {}
     mod_ids = {}
@@ -111,7 +111,7 @@ def map_modules_from_files(filelist):
             for file in filelist:
                 if file_infix in file:
                     module_files[name].put(file)
-                
+
     return module_files, mod_ids
 
 dirlist = os.listdir(in_folder)
@@ -120,14 +120,14 @@ for entry in dirlist:
     #only h5 file
     abs_entry = "{}/{}".format(in_folder, entry)
     if os.path.isfile(abs_entry) and os.path.splitext(abs_entry)[1] == ".h5":
-        
+
         if sequences is None:
             file_list.append(abs_entry)
         else:
             for seq in sequences:
                 if "{:05d}.h5".format(seq) in abs_entry:
                     file_list.append(os.path.abspath(abs_entry))
-                    
+
 mapped_files, mod_ids = map_modules_from_files(file_list)
 
 import copy
@@ -136,7 +136,7 @@ def correct_module(cells, inp):
     import numpy as np
     import copy
     import h5py
-    
+
     def splitOffGainLPD(d):
         msk = np.zeros(d.shape, np.uint16)
         msk[...] = 0b0000111111111111
@@ -145,9 +145,9 @@ def correct_module(cells, inp):
         gain = np.bitwise_and(d, msk)//4096
         gain[gain > 2] = 2
         return data, gain
-    
+
     if True:
-    
+
         filename, filename_out, channel, offset, rel_gain = inp
 
         infile = h5py.File(filename, "r", driver="core")
@@ -176,34 +176,34 @@ def correct_module(cells, inp):
 
         im, gain = splitOffGainLPD(im[:,0,...])
         im = im.astype(np.float32)
-        
+
         im = np.rollaxis(im, 2)
         im = np.rollaxis(im, 2, 1)
 
         gain = np.rollaxis(gain, 2)
         gain = np.rollaxis(gain, 2, 1)
 
-      
+
 
         om = offset
         rc = rel_gain
 
         for cc in range(im.shape[2]//cells):
             tg = gain[...,cc*cells:(cc+1)*cells]
-            
+
             offset = np.choose(tg, (om[...,0], om[...,1], om[...,2]))
             if rc is not None:
                 rel_cor = np.choose(tg, (rc[...,0], rc[...,1], rc[...,2]))
             tim = im[...,cc*cells:(cc+1)*cells]
             tim = tim - offset
-            if rc is not None:      
+            if rc is not None:
                 tim /= rel_cor
             im[...,cc*cells:(cc+1)*cells] = tim
 
         outfile["INSTRUMENT/FXE_DET_LPD1M-1/DET/{}CH0:xtdf/image/data".format(channel)] = np.rollaxis(np.rollaxis(im,1), 2)
         outfile["INSTRUMENT/FXE_DET_LPD1M-1/DET/{}CH0:xtdf/image/gain".format(channel)] = np.rollaxis(np.rollaxis(gain,1), 2)
         outfile.close()
-    
+
 
 done = False
 first_files = []
@@ -228,7 +228,7 @@ while not done:
                     rel_gains[i][...,:max_cells,:] if do_rel_gain else None))
     first = False
     p = partial(correct_module, max_cells)
-    
+
     r = view.map_sync(p, inp)
     done = all(dones)
 
@@ -239,7 +239,7 @@ for i, ff in enumerate(first_files):
     try:
         rf, cf = ff
         if rf is not None:
-        
+
             infile = h5py.File(rf, "r")
             raw.append(np.array(infile["/INSTRUMENT/FXE_DET_LPD1M-1/DET/{}CH0:xtdf/image/data".format(i)][max_cells*3:4*max_cells,0,...]))
             infile.close()
@@ -250,7 +250,7 @@ for i, ff in enumerate(first_files):
             infile.close()
         else:
             raise Exception("File not found")
-        
+
     except Exception as e:
         corrected.append(np.zeros((max_cells, 256, 256)))
         raw.append(np.zeros((max_cells, 256, 256)))
diff --git a/notebooks/LPD/playground/metroLib.py b/notebooks/LPD/playground/metroLib.py
index 0ef031095e14ec7bae0cd1b1b42497635f82194a..222a706f68a2d6b92aca4700a338413b09239ab3 100644
--- a/notebooks/LPD/playground/metroLib.py
+++ b/notebooks/LPD/playground/metroLib.py
@@ -7,28 +7,28 @@ from matplotlib import pylab as plt
 
 
 def getModulePosition(metrologyFile, moduleId):
-    """Position (in mm) of a module relative to the top left 
+    """Position (in mm) of a module relative to the top left
     corner of its quadrant. In case of tile-level positions,
-    the the position refers to the center of the top left 
+    the the position refers to the center of the top left
     pixel.
-    
+
     Args
     ----
-    
+
     metrologyFile : str
         Fully qualified path and filename of the metrology file
     moduleId : str
         Identifier of the module in question (e.g. 'Q1M2T03')
-        
+
     Returns
     -------
-    
-    ndarray: 
+
+    ndarray:
         (x, y)-Position of the module in its quadrant
-    
+
     Raises
     ------
-    
+
     ValueError: In case the moduleId contains invalid module
         identifieres
     """
@@ -38,11 +38,11 @@ def getModulePosition(metrologyFile, moduleId):
     #
     #   QXMYTZZ
     #
-    # where X, Y, and Z are digits. Q denotes the quadrant 
-    # (X = 1, ..., 4), M the supermodule (Y = 1, ..., 4) and T 
+    # where X, Y, and Z are digits. Q denotes the quadrant
+    # (X = 1, ..., 4), M the supermodule (Y = 1, ..., 4) and T
     # the tile (Z = 1, ..., 16; with leading zeros).
     modulePattern = re.compile(r'[QMT]\d+')
-    # Give the module identifier Q1M1T01, the moduleList splits this 
+    # Give the module identifier Q1M1T01, the moduleList splits this
     # into the associated quadrant, supermodule, and tile identifiers:
     # >>> print(moduleList)
     # ['Q1', 'M1', 'T01']
@@ -53,7 +53,7 @@ def getModulePosition(metrologyFile, moduleId):
     # >>> print(h5Keys)
     # ['Q1', 'Q1/M1', 'Q1/M1/T01']
     h5Keys = ['/'.join(moduleList[:idx+1]) for idx in range(len(moduleList))]
-    
+
     # Every module of the detector gives its position relative to
     # the top left corner of its parent structure. Every position
     # is stored in the positions array
@@ -83,17 +83,17 @@ def getModulePosition(metrologyFile, moduleId):
 def translateToModuleBL(tilePositions):
     """Tile coordinates within a supermodule with the
     origin in the bottom left corner.
-    
+
     Parameters
     ----------
-    
+
     tilePositions : ndarray
-        Tile positions as retrieved from the LPD metrology 
+        Tile positions as retrieved from the LPD metrology
         file. Must have shape (16, 2)
-        
+
     Returns
     -------
-    
+
     ndarray
         Tile positions relative to the bottom left corner.
     """
@@ -115,7 +115,7 @@ def translateToModuleBL(tilePositions):
     # In the clockwise order of LPD tiles, the 8th
     # tile in the list is the bottom left tile
     bottomLeft8th = np.asarray([0., moduleCoords[8][1]])
-    # Translate coordinates to the bottom left corner 
+    # Translate coordinates to the bottom left corner
     # of the bottom left tile
     bottomLeft = moduleCoords - bottomLeft8th
     return bottomLeft
@@ -124,44 +124,44 @@ def translateToModuleBL(tilePositions):
 def plotSupermoduleData(tileData, metrologyPositions, zoom=1., vmin=100., vmax=6000.):
     """Plots data of a supermodule with tile positions
     determined by the metrology data.
-    
+
     Parameters
     ----------
-    
+
     tileData : ndarray
-        Supermodule image data separated in individual tiles. 
+        Supermodule image data separated in individual tiles.
         Must have shape (16, 32, 128).
-        
-    metrologyPositions : ndarray 
-        Tile positions as retrieved from the metrology file. 
+
+    metrologyPositions : ndarray
+        Tile positions as retrieved from the metrology file.
         Must have shape (16, 2)
-        
+
     zoom : float, optional
         Can enlarge or decrease the size of the plot. Default = 1.
-        
+
     vmin, vmax : float, optional
         Value range. Default vmin=100., vmax=6000.
-        
+
     Returns
     -------
     matplotlib.Figure
-        Figure object containing the supermodule image        
+        Figure object containing the supermodule image
     """
     # Data needs to have 16 tiles, each with
     # 32x128 pixels
     assert tileData.shape == (16, 32, 128)
-    
+
     # Conversion coefficient, required since
     # matplotlib does its business in inches
     mmToInch = 1./25.4 # inch/mm
-    
+
     # Some constants
     numberOfTiles = 16
     numberOfRows = 8
     numberOfCols = 2
     tileWidth = 65.7 # in mm
     tileHeight = 17.7 # in mm
-    
+
     # Base width and height are given by spatial
     # extend of the modules. The constants 3.4 and 1
     # are estimated as a best guess for gaps between
@@ -169,26 +169,26 @@ def plotSupermoduleData(tileData, metrologyPositions, zoom=1., vmin=100., vmax=6
     figureWidth = zoom * numberOfCols*(tileWidth + 3.4)*mmToInch
     figureHeight = zoom * numberOfRows*(tileHeight + 1.)*mmToInch
     fig = plt.figure(figsize=(figureWidth, figureHeight))
-    
-    # The metrology file references module positions 
+
+    # The metrology file references module positions
     bottomRightCornerCoordinates = translateToModuleBL(metrologyPositions)
-    
+
     # The offset here accounts for the fact that there
     # might be negative x,y values
     offset = np.asarray(
-        [min(bottomRightCornerCoordinates[:, 0]), 
+        [min(bottomRightCornerCoordinates[:, 0]),
          min(bottomRightCornerCoordinates[:, 1])]
     )
-    
+
     # Account for blank borders in the plot
     borderLeft = 0.5 * mmToInch
     borderBottom = 0.5 * mmToInch
-    
+
     # The height and width of the plot remain
     # constant for a given supermodule
     width = zoom * 65.7 * mmToInch / (figureWidth - 2.*borderLeft)
     height = zoom * 17.7 * mmToInch / (figureHeight - 2.*borderBottom)
-    
+
     for i in range(numberOfTiles):
         # This is the top left corner of the tile with
         # respect to the top left corner of the supermodule
@@ -200,38 +200,38 @@ def plotSupermoduleData(tileData, metrologyPositions, zoom=1., vmin=100., vmax=6
         ax = fig.add_axes((ax0, ay0, width, height), frameon=False)
         # Do not display axes, tick markers or labels
         ax.tick_params(
-            axis='both', left='off', top='off', right='off', bottom='off', 
+            axis='both', left='off', top='off', right='off', bottom='off',
             labelleft='off', labeltop='off', labelright='off', labelbottom='off'
         )
         # Plot the image. If one wanted to have a colorbar
         # the img object would be needed to produce one
         img = ax.imshow(
-            tileData[i], 
-            interpolation='nearest', 
+            tileData[i],
+            interpolation='nearest',
             vmin=vmin, vmax=vmax
         )
-        
+
     return fig
 
 
 def splitChannelDataIntoTiles(channelData, clockwiseOrder=False):
     """Splits the raw channel data into indiviual tiles
-    
+
     Args
     ----
-    
+
     channelData : ndarray
         Raw channel data. Must have shape (256, 256)
-        
+
     clockwiseOrder : bool, optional
         If set to True, the sequence of tiles is given
         in the clockwise order starting with the top
         right tile (LPD standard). If set to false, tile
         data is returned in reading order
-        
+
     Returns
     -------
-    
+
     ndarray
         Same data, but reshaped into (12, 32, 128)
     """
@@ -240,8 +240,8 @@ def splitChannelDataIntoTiles(channelData, clockwiseOrder=False):
     orderedTiles = tiles.reshape(16, 32, 128)
     if clockwiseOrder:
         # Naturally, the tile data after splitting is in reading
-        # order (i.e. top left tile is first, top right tile is second, 
-        # etc.). The official LPD tile order however is clockwise, 
+        # order (i.e. top left tile is first, top right tile is second,
+        # etc.). The official LPD tile order however is clockwise,
         # starting with the top right tile. The following array
         # contains indices of tiles in reading order as they would
         # be iterated in clockwise order (starting from the top right)
@@ -253,22 +253,22 @@ def splitChannelDataIntoTiles(channelData, clockwiseOrder=False):
 
 def splitChannelDataIntoTiles2(channelData, clockwiseOrder=False):
     """Splits the raw channel data into indiviual tiles
-    
+
     Args
     ----
-    
+
     channelData : ndarray
         Raw channel data. Must have shape (256, 256)
-        
+
     clockwiseOrder : bool, optional
         If set to True, the sequence of tiles is given
         in the clockwise order starting with the top
         right tile (LPD standard). If set to false, tile
         data is returned in reading order
-        
+
     Returns
     -------
-    
+
     ndarray
         Same data, but reshaped into (12, 32, 128)
     """
@@ -277,8 +277,8 @@ def splitChannelDataIntoTiles2(channelData, clockwiseOrder=False):
     orderedTiles = np.moveaxis(tiles.reshape(16, 128, 32, channelData.shape[2]), 2, 1)
     if clockwiseOrder:
         # Naturally, the tile data after splitting is in reading
-        # order (i.e. top left tile is first, top right tile is second, 
-        # etc.). The official LPD tile order however is clockwise, 
+        # order (i.e. top left tile is first, top right tile is second,
+        # etc.). The official LPD tile order however is clockwise,
         # starting with the top right tile. The following array
         # contains indices of tiles in reading order as they would
         # be iterated in clockwise order (starting from the top right)
@@ -294,7 +294,7 @@ def returnPositioned2(geometry_file, modules, dquads):
     tile_order = [1, 2, 3, 4]
     cells = 0
     for sm, mn in modules:
-        
+
         position = np.asarray([getModulePosition(geometry_file,
                                                  'Q{}/M{:d}/T{:02d}'.format(
                                                      sm//4+1,
@@ -357,7 +357,7 @@ def positionFileList(filelist, datapath, geometry_file, quad_pos, nImages='all')
     data = {}
     for file in files:
         ch = int(re.findall(r'.*-{}([0-9]+)-.*'.format(detector), file)[0])
-        
+
         try:
             with h5py.File(file, 'r', driver='core') as f:
                 d = np.squeeze(f[datapath.format(ch)][()] if nImages == 'all' else f[datapath.format(ch)][:nImages,:,:])
diff --git a/notebooks/pnCCD/Characterize_pnCCD_Gain.ipynb b/notebooks/pnCCD/Characterize_pnCCD_Gain.ipynb
index 4c6a9340026e64e924a6cca47705b28b283a6c5a..f76e5d7058da86af23a9e7c8adcfbace38f5e238 100644
--- a/notebooks/pnCCD/Characterize_pnCCD_Gain.ipynb
+++ b/notebooks/pnCCD/Characterize_pnCCD_Gain.ipynb
@@ -1209,12 +1209,11 @@
     "                                    condition, cal_db_interface,\n",
     "                                    snapshot_at=creation_time)[0]\n",
     "    if db_output:\n",
-    "        try:\n",
-    "            md = send_to_db(db_module, karabo_da, const, condition,\n",
-    "                            file_loc, report,\n",
-    "                            cal_db_interface,\n",
-    "                            creation_time=creation_time,\n",
-    "                            timeout=cal_db_timeout)\n",
+    "        md = send_to_db(db_module, karabo_id, const, condition,\n",
+    "                        file_loc, report,\n",
+    "                        cal_db_interface,\n",
+    "                        creation_time=creation_time,\n",
+    "                        timeout=cal_db_timeout)\n",
     "        \n",
     "    if local_output:\n",
     "        md = save_const_to_h5(db_module, karabo_id, \n",
@@ -1225,7 +1224,7 @@
     "        \n",
     "print(\"\\nGenerated constants with conditions:\\n\")\n",
     "print(f\"â€¢ bias_voltage: {bias_voltage}\\nâ€¢ photon_energy: {photon_energy}\\n\"\n",
-    "      f\"â€¢ top_temperature: {fix_temperature_top}\\nâ€¢ top_temperature: {integration_time}\\n\"\n",
+    "      f\"â€¢ top_temperature: {fix_temperature_top}\\nâ€¢ integration_time: {integration_time}\\n\"\n",
     "      f\"â€¢ gain_setting: {gain}\\nâ€¢ creation_time: {creation_time}\\n\")"
    ]
   },
diff --git a/notebooks/pnCCD/frm6reader.py b/notebooks/pnCCD/frm6reader.py
index a0539603fbd1c64534475daff511d39ffc20dea7..d853a9cadd5e715c4ed21ccbdd4d1bb065317697 100644
--- a/notebooks/pnCCD/frm6reader.py
+++ b/notebooks/pnCCD/frm6reader.py
@@ -331,4 +331,3 @@ class Frms6Reader(object):
             numberOfFrames = int(numberOfFrames)
 
         return (frameWidth, frameHeight, numberOfFrames)
-
diff --git a/reportservice/README.md b/reportservice/README.md
index a4e6da44a3fe67bf39f6e0622de0cd1614314d1b..ccf7f91bab188ec10bffa1638a4fa61b9238aa4c 100644
--- a/reportservice/README.md
+++ b/reportservice/README.md
@@ -4,7 +4,7 @@ Offline Calibration Reportservice
 The Reportserivce is a service responsible for handling requests (manual or automatic triggers)
 for generating the DetectorCharacterization reports based on the requested configurations.
 
-The Reportservice mainly consists of a service, clients and YAML configuration. 
+The Reportservice mainly consists of a service, clients and YAML configuration.
 The service keeps on listening to any ZMQ requests with a given configurations.
 Then based on these configurations, it produces slurm jobs (through xfel-calibrate command line) to generate *.png plots of calibration configurations over time.
 
@@ -39,7 +39,7 @@ and it should generate a very generalized DC report for the available detectors
 
 *local* is the mode used for generating figures locally without uploading the DC report on RTD or pushing figures
 to the git repository, rather generated figures are copied to the local repository and depending on the
-given report-fmt(report format) argument an html or a pdf is generated in doc/_build/ 
+given report-fmt(report format) argument an html or a pdf is generated in doc/_build/
 of the report service out folder (repo-local).
 
 *sim* is a simulation mode, which is mostly used for debugging purposes and tool development without generating any reports locally or over RTD.
@@ -116,9 +116,9 @@ Automatic Launch:
 Manual Launch:
 
     This manual launch script is currently used for debugging purposes, only.
-    
+
     The available command line arguments are:
-    
+
 * --config-file: The path for the configuration file
 * --instrument: A selected list of instruments to generate a report for. This instrument must be in the report_conf.yaml. The default for this argument is ['all]
 * --overwrite-conf: A bool for indicating a new report configuration file(conf-file) should be sent instead of the default report_conf.yaml,
diff --git a/reportservice/automatic_run.py b/reportservice/automatic_run.py
index 708037153f3b84f3d821f5d0814d24973b797504..dc3b7156416ba9d57428d1f7c4f42a8b55a2bc96 100644
--- a/reportservice/automatic_run.py
+++ b/reportservice/automatic_run.py
@@ -53,13 +53,13 @@ async def auto_run(cfg, timeout=3000):
 
             tidx = tidx + 1 if tidx != len(run_time)-1 else 0
 
-        # check every 10mins, if there is 
+        # check every 10mins, if there is
         # a need for an automatic-run.
         await asyncio.sleep(3000)
 
 
 arg_parser = argparse.ArgumentParser(description='Automatic Launch')
-arg_parser.add_argument('--config-file', type=str, 
+arg_parser.add_argument('--config-file', type=str,
                         default='./report_conf.yaml',
                         help='config file path with reportservice port. '
                              'Default=./report_conf.yaml')
diff --git a/reportservice/manual_run.py b/reportservice/manual_run.py
index caa04fcf65a2a3e0449c30ef1d1de8b58c8f48b7..df7ab40f7bf1d7c8323ad021c83ce19d59a35e28 100644
--- a/reportservice/manual_run.py
+++ b/reportservice/manual_run.py
@@ -60,7 +60,7 @@ arg_parser.add_argument('--report-fmt', default='html',
                              'Note: THIS HAS NO EFFECT IN PROD AND SIM MODES!')
 arg_parser.add_argument('--log-file', type=str, default='./report.log',
                         help='The report log file path. Default=./report.log')
-arg_parser.add_argument('--logging', type=str, default="INFO", 
+arg_parser.add_argument('--logging', type=str, default="INFO",
                         help='logging modes: INFO, DEBUG or ERROR. '
                              'Default=INFO',
                         choices=['INFO', 'DEBUG', 'ERROR'])
diff --git a/reportservice/messages.py b/reportservice/messages.py
index 901795b5c8bcaafc84bc8b4ec2e53cd3d19ef830..a4723c1628d4522ec661a87ea2c7863cce9af50b 100644
--- a/reportservice/messages.py
+++ b/reportservice/messages.py
@@ -1,5 +1,3 @@
 class Errors:
     REQUEST_MALFORMED = "FAILED: request {} is malformed, please contact det-support@xfel.eu"
     INSTRUMENT_NOT_FOUND = "FAILED: Instrument {} is not known!, please contact det-support@xfel.eu"
-
-
diff --git a/reportservice/report_conf.yaml b/reportservice/report_conf.yaml
index 4a6ade68ea54fa052ecd838da768b0b26d79734c..aa6a59202e7c7bde890d1bdb816e56fdffe7ad79 100644
--- a/reportservice/report_conf.yaml
+++ b/reportservice/report_conf.yaml
@@ -638,5 +638,3 @@ HED:
         out-folder: "/gpfs/exfel/data/scratch/xcal/report_service/tmp/{instrument}/{detector}/"
         cal-db-timeout: 180000
         cal-db-interface: "tcp://max-exfl016:8015#8025"
-
-
diff --git a/reportservice/report_service.py b/reportservice/report_service.py
index 03d24a9a6d0606920df0668ab4eaf8dae650e09d..4e6859f7e990a481f4a16b5dab10a06745de0b9f 100644
--- a/reportservice/report_service.py
+++ b/reportservice/report_service.py
@@ -293,7 +293,7 @@ async def server_runner(conf_file, mode):
                     try:
                         output = await asyncio.create_subprocess_shell(
                                  " ".join(run_base), stdout=PIPE, stderr=PIPE)
-                        
+
                         launched_jobs.append(output.communicate())
 
                         logging.info('Submission information: {}:'
@@ -402,7 +402,7 @@ async def server_runner(conf_file, mode):
             break
 
 arg_parser = argparse.ArgumentParser(description='Start the report service')
-arg_parser.add_argument('--config-file', type=str, 
+arg_parser.add_argument('--config-file', type=str,
                         default='./report_conf.yaml',
                         help='config file path with '
                              'reportservice port. '
diff --git a/requirements.txt b/requirements.txt
index 199e14468d4b2977690218251f9dcb09aaa5c9dc..d66d4c5d2dbe422bc19b65d443ea9ebe450681b5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,10 @@
-git+file:///gpfs/exfel/sw/calsoft/git/cal_db_interactive@2.0.0
+git+file:///gpfs/exfel/sw/calsoft/git/cal_db_interactive@2.0.1
 git+file:///gpfs/exfel/sw/calsoft/git/nbparameterise@0.3
 git+file:///gpfs/exfel/sw/calsoft/git/pyDetLib@2.5.6-2.10.0#subdirectory=lib
+Cython == 0.29.21
+Jinja2 == 2.11.2
 astcheck == 0.2.5
 astsearch == 0.1.3
-Cython == 0.29.21
 dill == 0.3.0
 extra_data == 1.2.0
 extra_geom == 1.1.1
@@ -11,22 +12,22 @@ fabio == 0.9.0
 gitpython == 3.1.0
 h5py == 2.10.0
 iminuit == 1.3.8
-ipyparallel == 6.2.4
 ipykernel == 5.1.4
+ipyparallel == 6.2.4
 ipython == 7.12.0
 ipython_genutils == 0.2.0
-Jinja2 == 2.11.2
+jupyter-core == 4.6.1
 jupyter_client == 6.1.7
 jupyter_console == 6.1.0
-jupyter-core == 4.6.1
 karabo_data == 0.7.0
 lxml == 4.5.0
 metadata_client == 3.0.8
 nbclient == 0.5.1
 nbconvert == 5.6.1
 nbformat == 5.0.7
-notebook == 6.1.5 
+notebook == 6.1.5
 numpy == 1.19.1
+pre-commit == 2.10.0
 prettytable == 0.7.2
 princess == 0.2
 pypandoc == 1.4
diff --git a/setup.py b/setup.py
index 58b1c9ce10dbb518dd11c7af2bada148433de433..fc9ead08dfc7c9a90354c5bcebc689759ebd36ec 100644
--- a/setup.py
+++ b/setup.py
@@ -68,7 +68,7 @@ setup(
         'xfel_calibrate': ['bin/*.sh'] + data_files + ['titlepage.tmpl',
                                                        'xfel.pdf']
     },
-    
+
     cmdclass={
         'build' : PreInstallCommand,
         'install': PostInstallCommand,
@@ -81,10 +81,9 @@ setup(
     description='',
     entry_points = {
               'console_scripts': [
-                  'xfel-calibrate = xfel_calibrate.calibrate:run',                  
-              ],              
+                  'xfel-calibrate = xfel_calibrate.calibrate:run',
+              ],
           },
     ext_modules=extensions
-    
-)
 
+)
diff --git a/tests/legacy/correction_base.py b/tests/legacy/correction_base.py
index 86d32b605a94384aac40b45337e38e4482833aa0..831fa9478c9c62e49ae8b345201de3ded0ad1157 100644
--- a/tests/legacy/correction_base.py
+++ b/tests/legacy/correction_base.py
@@ -283,7 +283,7 @@ class CorrectionTestBase:
 
         print("Executing {}".format(" ".join(cmd)))
 
-        
+
         print("Creating data paths for artefacts")
         cls.artefact_dir = get_artefact_dir(cls)
         if not os.path.exists(cls.artefact_dir):
@@ -333,10 +333,10 @@ class CorrectionTestBase:
         """
         out_folder = self._output_to_path()
         files_to_check = glob.glob(
-            "{}/*{}".format(out_folder, self.rel_file_ext))  
-        
+            "{}/*{}".format(out_folder, self.rel_file_ext))
+
         for fname in files_to_check:
-            
+
             with h5py.File(fname, "r") as f:
                 d = {}
                 def visitor(k, item):
@@ -344,20 +344,20 @@ class CorrectionTestBase:
                         d[k] = item.fletcher32
 
                 f.visititems(visitor)
-                
+
                 chkfname = "{}.checksum".format(fname)
                 chkpath = "{}/{}".format(self.artefact_dir,
                                          os.path.basename(chkfname))
                 with open(chkpath, 'wb') as fc:
-                    pickle.dump(d, fc, pickle.HIGHEST_PROTOCOL) 
+                    pickle.dump(d, fc, pickle.HIGHEST_PROTOCOL)
 
     @unittest.skipIf(args.skip_checksum,
                      "User requested to skip checksum test")
     def test_checksums(self):
         """ Compare Fletcher32 checksums of notebook's output with artefacts
 
-        This test will verify if datasets with checksums are identical. 
-        Even for small changes in the correction logic this test is likely 
+        This test will verify if datasets with checksums are identical.
+        Even for small changes in the correction logic this test is likely
         to fail.
         If this is the case, it is recommended to verify correctness using
         the other tests, which inspect data, and the create new checksums
@@ -379,12 +379,12 @@ class CorrectionTestBase:
                                 "No comparison checksums found")
             with open(chkpath, 'rb') as fc:
                 d = pickle.load(fc)
-                
+
                 with h5py.File(fname, "r") as f:
- 
+
                     def visitor(k, item):
                         if isinstance(item, h5py.Dataset):
-                            
+
                             msg = "Verify checksum of: {}".format(k)
                             with self.subTest(msg=msg):
                                 self.assertIn(k, d)
@@ -578,7 +578,7 @@ class CorrectionTestBase:
 
             _, last_train = rd.train_from_id(rd.train_ids[-1])
             test_train_info(last_train, "last_train")
-            
+
     @unittest.skipIf(args.skip_karabo_data,
                      "User requested to skip karabo data test")
     def test_karabo_data_self_test(self):
@@ -592,7 +592,7 @@ class CorrectionTestBase:
                      "User requested to skip report generation test")
     def test_report_gen(self):
         """ Verify expected reports are generated
-        
+
         Also verifies that no additional reports are present, and copies
         the report to the artefact dir.
         """
@@ -610,4 +610,3 @@ class CorrectionTestBase:
         pdfs = glob.glob("{}/*.pdf".format(out_folder))
         for pdf in pdfs:
             self.assertIn(os.path.basename(pdf), self.expected_reports)
-                
diff --git a/tests/legacy/readme.md b/tests/legacy/readme.md
index 060f9898283312292a14c51c6d0da0c14cc00ea5..f5d8619b309f4a65beea4f035aea94df8ee40d0c 100644
--- a/tests/legacy/readme.md
+++ b/tests/legacy/readme.md
@@ -4,4 +4,3 @@ They are broken and haven't been looked at. Some may be fixed, some are deprecat
 This directory is excluded from the CI runner.
 
 It does not mean that they can be freely deleted. Each test file should be assayed and fixed, if possible!
-
diff --git a/tests/test_calibrate.py b/tests/test_calibrate.py
new file mode 100644
index 0000000000000000000000000000000000000000..7cc48d61b9ea84a98aae7e34da80a246612af605
--- /dev/null
+++ b/tests/test_calibrate.py
@@ -0,0 +1,35 @@
+import pytest
+
+from xfel_calibrate.calibrate import balance_sequences
+
+
+def test_balance_sequences():
+
+    ret = balance_sequences(in_folder="/gpfs/exfel/exp/CALLAB/202031/p900113/raw",  # noqa
+                            run=9992, sequences=[0, 2, 5, 10, 20, 50, 100],
+                            sequences_per_node=1, karabo_da=["all"],
+                            max_nodes=8)
+
+    expected = [[0], [2]]
+    assert expected == ret
+
+    ret = balance_sequences(in_folder="/gpfs/exfel/exp/CALLAB/202031/p900113/raw",  # noqa
+                            run=9992, sequences=[-1],
+                            sequences_per_node=1, karabo_da=["JNGFR01"],
+                            max_nodes=3)
+    expected = []
+    assert expected == ret
+
+    with pytest.raises(ValueError) as e:
+        balance_sequences(in_folder="/gpfs/exfel/exp/CALLAB/202031/p900113/raw",  # noqa
+                          run=9992, sequences=[1991, 2021],
+                          sequences_per_node=1, karabo_da=["all"],
+                          max_nodes=3)
+        assert 'Selected sequences [1991, 2021]]' in e.value()
+
+    with pytest.raises(ValueError) as e:
+        balance_sequences(in_folder="/gpfs/exfel/exp/CALLAB/202031/p900113/raw",  # noqa
+                          run=9992, sequences=[1991, 2021],
+                          sequences_per_node=1, karabo_da=-1,
+                          max_nodes=3)
+        assert 'karabo_da as a string or list' in e.value()
diff --git a/tests/test_webservice.py b/tests/test_webservice.py
index 1497d9e201c385c54c076ccd17d5e234a7db8786..ff368afe247b878a0e61cafac1c746beae829848 100644
--- a/tests/test_webservice.py
+++ b/tests/test_webservice.py
@@ -1,10 +1,13 @@
 import sys
 from pathlib import Path
+from unittest import mock
 
 import pytest
+from testpath import MockCommand
 
 sys.path.insert(0, Path(__file__).parent / 'webservice')
-from webservice.webservice import check_files  # noqa
+from webservice.webservice import (check_files, merge, parse_config,
+                                   wait_on_transfer)
 
 
 def test_check_files():
@@ -23,3 +26,54 @@ def test_check_files():
     with pytest.raises(PermissionError):
         in_folder = '/gpfs/maxwell/home/achilles'  # arbitrarily chosen
         check_files(in_folder, runs, karabo_das)
+
+
+def test_merge():
+    a = {'some': {'key': {'akey': 'avalue', 'number': 1}}}
+    b = {'some': {'key': {'anotherkey': 'anothervalue', 'number': 5}},
+         'completely': 'different'}
+
+    ret = merge(a, b)
+    expected = {'some': {'key': {'akey': 'avalue',
+                                 'anotherkey': 'anothervalue',
+                                 'number': 1}},
+                'completely': 'different'}
+    assert ret == expected
+
+
+def test_parse_config():
+    cmd = ['whatever']
+    config = {'somebool': True,
+              'notsomebool': False,
+              'alist': [1, 2, 3],
+              'some_empty_key': '""',
+              'other_empty_key': "''",
+              'brian': 'scone'}
+
+    expected = ['whatever', '--somebool', '--alist', '1', '2', '3',
+                '--some_empty_key', '', '--other_empty_key', '',
+                '--brian', 'scone']
+
+    config = parse_config(cmd, config)
+
+    assert config == expected
+    assert '--notsomebool' not in config
+
+    with pytest.raises(ValueError):
+        config = {'some key': 'value'}
+        config = parse_config(cmd, config)
+
+    with pytest.raises(ValueError):
+        config = {'somekey': 'a value'}
+        config = parse_config(cmd, config)
+
+
+@pytest.mark.asyncio
+async def test_wait_on_transfer(tmp_path):
+    mock_getfattr = MockCommand(
+        'getfattr',
+        content="""#!{}\nprint('user.status="dCache"')""".format(sys.executable)
+    )
+    with mock_getfattr:
+        res = await wait_on_transfer(str(tmp_path), max_tries=1)
+    assert res is True
diff --git a/webservice/README.md b/webservice/README.md
index 501e7f9c522eee42904d37209b09d3b91235ec98..fef79ac81a6c2c76b30755fff4dd1d6a2bdfb485 100644
--- a/webservice/README.md
+++ b/webservice/README.md
@@ -2,7 +2,7 @@ Offline Calibration Webservice
 ==============================
 
 The offline calibration webservice interacts with the Metadata Catalogue (MDC),
-such that migration of data to the offline cluster automatically triggers 
+such that migration of data to the offline cluster automatically triggers
 calibration jobs on relevant files.
 
 Installation
@@ -18,7 +18,7 @@ The service needs to be installed under a functional user account which
 * has write permission to the *proc* folders for outputting corrected data
 * is allowed to launch SLURM jobs on the cluster
 
-The hosting system needs to be accessible via ZMQ calls from the MDC. 
+The hosting system needs to be accessible via ZMQ calls from the MDC.
 This requires appropriate DMZ settings. Additionally, it needs to be able
 to interact with the MDC via the MDC client interface
 
@@ -32,10 +32,10 @@ Additionally, the *xfel-calibrate* environment needs to be installed:
    ``` bash
    git clone https://git.xfel.eu/gitlab/detectors/pycalibration.git .
    ```
-   
+
 2. pick the python environment to install into. On Maxwell the anaconda/3
    environment will work:
-   
+
    ``` bash
    module load anaconda/3
    ```
@@ -48,7 +48,7 @@ Additionally, the *xfel-calibrate* environment needs to be installed:
 
 4. some correction notebooks require pyDetLib. It requires manual installation in
    a non-Karabo python environment
-   
+
    ``` bash
    mkdir pydetlib
    cd pydetlib
@@ -57,19 +57,19 @@ Additionally, the *xfel-calibrate* environment needs to be installed:
    pip install --user pycuda
    pip install --user ./lib/
    cd ..
-   
+
 5. install the separate requirements for the webservice:
 
    ``` bash
    cd webservice
    pip install --user -r requirements.txt
    ```
-   
+
 6. install the metadata_client library, according to instructions at
 
    https://git.xfel.eu/gitlab/ITDM/metadata_client
-   
-   
+
+
 You are now good to go.
 
 Configuration
@@ -84,7 +84,7 @@ In the **config-repo** section, the configuration repository needs to be configu
 config-repo:
     url: https://git.xfel.eu/gitlab/detectors/calibration_configurations.git
     local-path: /home/haufs/calibration_config/
-``` 
+```
 Here you should prepend the *url* entry with a gitlab access token, that provides access
 to the calibration_configurations repository.
 
@@ -108,9 +108,9 @@ In the **metadata-client** section, the client interface to the MDC is configure
 ``` YAML
 
 metadata-client:
-    user-id: 
-    user-secret: 
-    user-email: 
+    user-id:
+    user-secret:
+    user-email:
     metadata-web-app-url: 'https://in.xfel.eu/metadata'
     metadata-web-app-url: 'https://in.xfel.eu/metadata'
     token-url: 'https://in.xfel.eu/metadata/oauth/token'
@@ -153,5 +153,5 @@ Use
    ``` bash
    python webservice.py --help
    ```
-  
-to display a list of available options.
\ No newline at end of file
+
+to display a list of available options.
diff --git a/webservice/manual_launch.py b/webservice/manual_launch.py
index 2006d842941d4d3e326df627f2de614c5899cbff..4753bbdafc2797cfc259355c92b8c1bf9598acf8 100644
--- a/webservice/manual_launch.py
+++ b/webservice/manual_launch.py
@@ -24,5 +24,3 @@ stuff = [action, dark_run_id, sase, instrument, cycle, proposal, 'SPB_DET_AGIPD1
 socket.send(str(stuff).encode())
 resp = socket.recv_multipart()[0]
 print(resp.decode())
-
-
diff --git a/webservice/messages.py b/webservice/messages.py
index 3f3255daa3510fe9333024996156344c84dc3369..957ebfe33836474a27ebf82f7bbd56e17acf3caa 100644
--- a/webservice/messages.py
+++ b/webservice/messages.py
@@ -13,6 +13,7 @@ class Errors:
     MDC_RESPONSE = "FAILED: Response error from MDC: {}"
     NOT_CONFIGURED = "FAILED: instrument not configured, please contact det-support@xfel.eu"
     NOT_SUBMITTED = "FAILED: correction of {} failed during submision, please contact det-support@xfel.eu"
+    OTHER_ERROR = "FAILED: Error {}, please contact det-support@xfel.eu"
 
 
 class MDC:
diff --git a/webservice/serve_overview.yaml b/webservice/serve_overview.yaml
index 14948daae12171a587e67b5643df5694393c5b95..dace52af08542e0bd6a1818bc5d24badba4aefef 100644
--- a/webservice/serve_overview.yaml
+++ b/webservice/serve_overview.yaml
@@ -19,9 +19,9 @@ shell-commands:
   cat-log: "cat web.log"
 
 run-candidates:
-    - "--run-high" 
+    - "--run-high"
     - "--run-med"
-    - "--run-low" 
+    - "--run-low"
     - "--run"
 
 server-config:
diff --git a/webservice/sqlite_view.py b/webservice/sqlite_view.py
index c1672721386a0a5024b12bc9f2a5fb3247b4d630..29670a19f3b02b710da98e713bf9f176e56809d4 100644
--- a/webservice/sqlite_view.py
+++ b/webservice/sqlite_view.py
@@ -24,4 +24,3 @@ for r in c.fetchall():
     rid, jobid, db_proposal, db_run, status, time, _, _ = r
     if db_proposal == proposal and db_run == run:
         print(r)
-        
diff --git a/webservice/templates/checkbox.html b/webservice/templates/checkbox.html
index 154128addd5ce626fed7589df1e37708469c963f..ea734cd7a2de793123a2251cff68a1989205425d 100644
--- a/webservice/templates/checkbox.html
+++ b/webservice/templates/checkbox.html
@@ -8,4 +8,4 @@
 {% for run_name in runs %}
     <label >{{run_name}}:</label>
     <input type="number" id="run{{loop.index}}" name="{{run_name}}" min="1" max="999999" size="4">
-{% endfor %}
\ No newline at end of file
+{% endfor %}
diff --git a/webservice/templates/log_output.html b/webservice/templates/log_output.html
index b5479747c5873a02673383c7c3c6be535b4eb3ec..8d70880e92045c981e7d8768a7bd21122a3b1f46 100644
--- a/webservice/templates/log_output.html
+++ b/webservice/templates/log_output.html
@@ -3,4 +3,4 @@
    <div class="log-out">
    {{ logout }}
    </div>
-</div>
\ No newline at end of file
+</div>
diff --git a/webservice/update_mdc.py b/webservice/update_mdc.py
index fd8d09e895490b5bbd418c9824c7e34f51285f7a..52750549997d7b45e938678fa45e68088bfff8be 100644
--- a/webservice/update_mdc.py
+++ b/webservice/update_mdc.py
@@ -39,4 +39,3 @@ if response.status_code == 200:
     print('Run is updated')
 else:
     print(f'Update failed {response}')
-
diff --git a/webservice/webservice.py b/webservice/webservice.py
index 3ec8b64fa50437e1828d6f4d153041c46ec2f426..0d1696e53e29fb83971c47cf4784062198834d93 100644
--- a/webservice/webservice.py
+++ b/webservice/webservice.py
@@ -3,17 +3,16 @@ import asyncio
 import copy
 import getpass
 import glob
+import inspect
 import json
 import logging
 import os
 import sqlite3
-import subprocess  # FIXME: use asyncio.create_subprocess_*
-import traceback
 import urllib.parse
 from asyncio import get_event_loop, shield
 from datetime import datetime
 from pathlib import Path
-from typing import List
+from typing import Any, Dict, List, Optional
 
 import yaml
 import zmq
@@ -43,34 +42,38 @@ async def init_job_db(config):
     # https://pypi.org/project/databases/
     logging.info("Initializing database")
     conn = sqlite3.connect(config['web-service']['job-db'])
-    c = conn.cursor()
-    try:
-        c.execute("SELECT * FROM jobs")
-    except Exception:  # TODO: is it sqlite3.OperationalError?
-        logging.info("Creating initial job database")
-        c.execute("CREATE TABLE jobs(rid, jobid, proposal, run, status, time, det, act)") # noqa
+    conn.execute(
+        "CREATE TABLE IF NOT EXISTS "
+        "jobs(rid, jobid, proposal, run, status, time, det, act)"
+    )
     return conn
 
 
-async def init_md_client(config):
-    """ Initialize an MDC client connection
+async def init_md_client(config: Dict[str, Dict[str, str]]) -> MetadataClient:
+    """Initialize an MDC client connection.
 
     :param config: the configuration parsed from the webservice YAML config
     :return: an MDC client connection
     """
-    # FIXME: this blocks the even loop, should use asyncio.Task
-    # FIXME: calls to this coro should be shielded
     # TODO: could the client be a global? This would recuce passing it around
-    mdconf = config['metadata-client']
-    client_conn = MetadataClient(client_id=mdconf['user-id'],
-                                 client_secret=mdconf['user-secret'],
-                                 user_email=mdconf['user-email'],
-                                 token_url=mdconf['token-url'],
-                                 refresh_url=mdconf['refresh-url'],
-                                 auth_url=mdconf['auth-url'],
-                                 scope=mdconf['scope'],
-                                 base_api_url=mdconf['base-api-url'])
-    return client_conn
+
+    # During MetadataClient initialisation, this object requests authentication from MyMDC
+    # As such, it is needed to run the initialisation in a thread.
+    def _init_client():
+        mdconf = config['metadata-client']
+        client_conn = MetadataClient(client_id=mdconf['user-id'],
+                                     client_secret=mdconf['user-secret'],
+                                     user_email=mdconf['user-email'],
+                                     token_url=mdconf['token-url'],
+                                     refresh_url=mdconf['refresh-url'],
+                                     auth_url=mdconf['auth-url'],
+                                     scope=mdconf['scope'],
+                                     base_api_url=mdconf['base-api-url'])
+        return client_conn
+
+    loop = get_event_loop()
+    client = await shield(loop.run_in_executor(None, _init_client))
+    return client
 
 
 def init_config_repo(config):
@@ -92,10 +95,9 @@ def init_config_repo(config):
     logging.info("Config repo is initialized")
 
 
-async def upload_config(socket, config, yaml, instrument, cycle, proposal):
+async def upload_config(config, yaml, instrument, cycle, proposal) -> bytes:
     """ Upload a new configuration YAML
 
-    :param socket: ZMQ socket to send reply on
     :param config: the configuration defined in the `config-repo` section
         of the webservice.yaml configuration.
     :param yaml: the YAML contents to update
@@ -125,12 +127,11 @@ async def upload_config(socket, config, yaml, instrument, cycle, proposal):
                                             datetime.now().isoformat()))
     repo.remote().push()
     logging.info(Success.UPLOADED_CONFIG.format(cycle, proposal))
-    socket.send(Success.UPLOADED_CONFIG.format(cycle, proposal).encode())
+    return Success.UPLOADED_CONFIG.format(cycle, proposal).encode()
 
 
-def merge(source, destination):
-    """
-    Deep merge two dictionaries
+def merge(source: Dict, destination: Dict) -> Dict:
+    """Deep merge two dictionaries.
 
     :param source: source dictionary to merge into destination
     :param destination: destination dictionary which is being merged in
@@ -154,8 +155,8 @@ def merge(source, destination):
     return destination
 
 
-async def change_config(socket, config, updated_config, karabo_id, instrument,
-                        cycle, proposal, apply=False):
+def change_config(config, updated_config, karabo_id, instrument,
+                        cycle, proposal, apply=False) -> bytes:
     """
     Change the configuration of a proposal
 
@@ -164,7 +165,6 @@ async def change_config(socket, config, updated_config, karabo_id, instrument,
 
     Changes are committed to git.
 
-    :param socket: ZMQ socket to send reply on
     :param config: repo config as given in YAML config file
     :param updated_config: a dictionary containing the updated config
     :param instrument: the instrument to change config for
@@ -204,7 +204,17 @@ async def change_config(socket, config, updated_config, karabo_id, instrument,
             "Update to proposal YAML: {}".format(datetime.now().isoformat()))
         repo.remote().push()
     logging.info(Success.UPLOADED_CONFIG.format(cycle, proposal))
-    socket.send(yaml.dump(new_conf, default_flow_style=False).encode())
+    return yaml.dump(new_conf, default_flow_style=False).encode()
+
+
+async def run_proc_async(cmd: List[str]) -> (int, bytes):
+    """Run a subprocess to completion using asyncio, capturing stdout
+
+    Returns the numeric exit code and stdout (bytes)
+    """
+    proc = await asyncio.create_subprocess_exec(*cmd, stdout=asyncio.subprocess.PIPE)
+    stdout, _ = await proc.communicate()
+    return proc.returncode, stdout
 
 
 async def slurm_status(filter_user=True):
@@ -217,9 +227,9 @@ async def slurm_status(filter_user=True):
     cmd = ["squeue"]
     if filter_user:
         cmd += ["-u", getpass.getuser()]
-    ret = subprocess.run(cmd, stdout=subprocess.PIPE)  # FIXME: asyncio
-    if ret.returncode == 0:
-        rlines = ret.stdout.decode().split("\n")
+    retcode, stdout = await run_proc_async(cmd)
+    if retcode == 0:
+        rlines = stdout.decode().split("\n")
         statii = {}
         for r in rlines[1:]:
             try:
@@ -240,9 +250,9 @@ async def slurm_job_status(jobid):
     """
     cmd = ["sacct", "-j", str(jobid), "--format=JobID,Elapsed,state"]
 
-    ret = subprocess.run(cmd, stdout=subprocess.PIPE)  # FIXME: asyncio
-    if ret.returncode == 0:
-        rlines = ret.stdout.decode().split("\n")
+    retcode, stdout = await run_proc_async(cmd)
+    if retcode == 0:
+        rlines = stdout.decode().split("\n")
 
         logging.debug("Job {} state {}".format(jobid, rlines[2].split()))
         if len(rlines[2].split()) == 3:
@@ -250,9 +260,9 @@ async def slurm_job_status(jobid):
     return "NA", "NA", "NA"
 
 
-async def query_rid(conn, socket, rid):
+def query_rid(conn, rid) -> bytes:
     c = conn.cursor()
-    c.execute("SELECT * FROM jobs WHERE rid LIKE '{}'".format(rid))
+    c.execute("SELECT * FROM jobs WHERE rid LIKE ?", rid)
     combined = {}
     for r in c.fetchall():
         rid, jobid, proposal, run, status, time_, _ = r
@@ -281,19 +291,30 @@ async def query_rid(conn, socket, rid):
         msg += "\n".join(statii)
     if msg == "":
         msg = 'NA'
-    socket.send(msg.encode())
+    return msg.encode()
+
 
+def parse_config(cmd: List[str], config: Dict[str, Any]) -> List[str]:
+    """Convert a dictionary to a list of arguments.
 
-def parse_config(cmd, config):
+       Values that are not strings will be cast.
+       Lists will be converted to several strings following their `--key`
+       flag.
+       Booleans will be converted to a `--key` flag, where `key` is the
+       dictionary key.
+    """
     for key, value in config.items():
+        if ' ' in key or (isinstance(value, str) and ' ' in value):
+            raise ValueError('Spaces are not allowed', key, value)
+
         if isinstance(value, list):
-            cmd += ["--{}".format(key)]
+            cmd.append(f"--{key}")
             cmd += [str(v) for v in value]
         elif isinstance(value, bool):
             if value:
                 cmd += ["--{}".format(key)]
         else:
-            if value == '""' or value == "''":
+            if value in ['""', "''"]:
                 value = ""
             cmd += ["--{}".format(key), str(value)]
 
@@ -327,37 +348,40 @@ async def update_job_db(config):
                 rid, jobid, proposal, run, status, time, _, action = r
                 logging.debug("DB info {}".format(r))
 
-                cflg, cstatus = combined.get(rid, ([], []))
+                cflg, cstatus = combined.get((rid, action), ([], []))
                 if jobid in statii:
                     slstatus, runtime = statii[jobid]
-                    query = "UPDATE jobs SET status='{status}', time='{runtime}' WHERE jobid LIKE '{jobid}'"  # noqa
-                    c.execute(query.format(status=slstatus,
-                                           runtime=runtime,
-                                           jobid=jobid))
+                    query = "UPDATE jobs SET status=?, time=? WHERE jobid LIKE ?"
+                    c.execute(query, (slstatus, runtime, jobid))
 
                     cflg.append('R')
                     cstatus.append("{}-{}".format(slstatus, runtime))
                 else:
                     _, sltime, slstatus = await slurm_job_status(jobid)
-                    query = "UPDATE jobs SET status='{slstatus}' WHERE jobid LIKE '{jobid}'"  # noqa
-                    c.execute(query.format(jobid=jobid, slstatus=slstatus))
+                    query = "UPDATE jobs SET status=? WHERE jobid LIKE ?"
+                    c.execute(query, (slstatus, jobid))
 
                     if slstatus == 'COMPLETED':
                         cflg.append("A")
                     else:
                         cflg.append("NA")
                     cstatus.append(slstatus)
-                combined[rid] = cflg, cstatus
+                combined[rid, action] = cflg, cstatus
             conn.commit()
 
             flg_order = {"R": 2, "A": 1, "NA": 0}
             dark_flags = {'NA': 'E', 'R': 'IP', 'A': 'F'}
-            for rid, value in combined.items():
+            for rid, action in combined:
                 if int(rid) == 0:  # this job was not submitted from MyMDC
                     continue
-                flgs, statii = value
+                flgs, statii = combined[rid, action]
                 # sort by least done status
                 flg = max(flgs, key=lambda i: flg_order[i])
+                if flg != 'R':
+                    logging.info(
+                        "Jobs finished - action: %s, run id: %s, status: %s",
+                        action, rid, flg,
+                    )
                 msg = "\n".join(statii)
                 msg_debug = f"Update MDC {rid}, {msg}"
                 logging.debug(msg_debug.replace('\n', ', '))
@@ -370,10 +394,11 @@ async def update_job_db(config):
                                          'calcat_feedback': msg}}
                     response = mdc.update_dark_run_api(rid, data)
                 if response.status_code != 200:
+                    logging.error("Failed to update MDC for action %s, rid %s",
+                                  action, rid)
                     logging.error(Errors.MDC_RESPONSE.format(response))
-        except Exception as e:
-            e = str(e)
-            logging.error(f"Failure to update job DB: {e}")
+        except Exception:
+            logging.error(f"Failure to update job DB", exc_info=True)
 
         await asyncio.sleep(time_interval)
 
@@ -395,7 +420,7 @@ async def copy_untouched_files(file_list, out_folder, run):
         logging.info("Copying {} to {}".format(f, of))
 
 
-async def run_action(job_db, cmd, mode, proposal, run, rid):
+async def run_action(job_db, cmd, mode, proposal, run, rid) -> str:
     """ Run action command (CORRECT OR DARK)
 
     :param job_db: jobs database
@@ -404,7 +429,7 @@ async def run_action(job_db, cmd, mode, proposal, run, rid):
                  but the command will be logged
     :param proposal: proposal the command was issued for
     :param run: run the command was issued for
-    :param: rid: run id in the MDC
+    :param rid: run id in the MDC
 
     Returns a formatted Success or Error message indicating outcome of the
     execution.
@@ -412,24 +437,24 @@ async def run_action(job_db, cmd, mode, proposal, run, rid):
     # FIXME: this coro has too many returns that can be simplified
     if mode == "prod":
         logging.info(" ".join(cmd))
-        ret = subprocess.run(cmd, stdout=subprocess.PIPE)  # FIXME: asyncio
-        if ret.returncode == 0:
+        retcode, stdout = await run_proc_async(cmd)
+        if retcode == 0:
             if "DARK" in cmd:
                 logging.info(Success.START_CHAR.format(proposal, run))
             else:
                 logging.info(Success.START_CORRECTION.format(proposal, run))
             # enter jobs in job db
             c = job_db.cursor()  # FIXME: asyncio
-            rstr = ret.stdout.decode()
+            rstr = stdout.decode()
 
-            query = "INSERT INTO jobs VALUES ('{rid}', '{jobid}', '{proposal}', '{run}', 'PD', '{now}', '{det}', '{act}')"  # noqa
             for r in rstr.split("\n"):
                 if "Submitted job:" in r:
                     _, jobid = r.split(":")
-                    c.execute(query.format(rid=rid, jobid=jobid.strip(),
-                                           proposal=proposal, run=run,
-                                           now=datetime.now().isoformat(),
-                                           det=cmd[3], act=cmd[4]))
+                    c.execute(
+                        "INSERT INTO jobs VALUES (?, ?, ?, ?, 'PD', ?, ?, ?)",
+                        (rid, jobid.strip(), proposal, run,
+                         datetime.now().isoformat(), cmd[3], cmd[4])
+                    )
             job_db.commit()
             logging.debug((" ".join(cmd)).replace(',', '').replace("'", ""))
             if "DARK" in cmd:
@@ -437,8 +462,8 @@ async def run_action(job_db, cmd, mode, proposal, run, rid):
             else:
                 return Success.START_CORRECTION.format(proposal, run)
         else:
-            logging.error(Errors.JOB_LAUNCH_FAILED.format(cmd, ret.returncode))
-            return Errors.JOB_LAUNCH_FAILED.format(cmd, ret.returncode)
+            logging.error(Errors.JOB_LAUNCH_FAILED.format(cmd, retcode))
+            return Errors.JOB_LAUNCH_FAILED.format(cmd, retcode)
 
     else:
         if "DARK" in cmd:
@@ -453,7 +478,7 @@ async def run_action(job_db, cmd, mode, proposal, run, rid):
             return Success.START_CORRECTION_SIM.format(proposal, run)
 
 
-async def wait_on_transfer(rpath, max_tries=300):
+async def wait_on_transfer(rpath, max_tries=300) -> bool:
     """
     Wait on data files to be transferred to Maxwell
 
@@ -466,8 +491,6 @@ async def wait_on_transfer(rpath, max_tries=300):
     # check the copy is finished (ie. that the files are complete).
     if 'pnfs' in os.path.realpath(rpath):
         return True
-    rstr = None
-    ret = None
     tries = 0
 
     # FIXME: if not kafka, then do event-driven, no sleep
@@ -480,16 +503,44 @@ async def wait_on_transfer(rpath, max_tries=300):
 
     # FIXME: if not kafka, then do event-driven, no sleep
     # wait until files are migrated
-    while rstr is None or 'status="online"' in rstr or 'status="Online"' in rstr or ret.returncode != 0:  # noqa
-        await asyncio.sleep(10)
-        # FIXME: make use of asyncio.subprocess.run
-        ret = subprocess.run(["getfattr", "-n", "user.status", rpath],
-                             stdout=subprocess.PIPE)
-        rstr = ret.stdout.decode()
+    while True:
+        retcode, stdout = await run_proc_async([
+            "getfattr", "-n", "user.status", rpath
+        ])
+        if retcode == 0 and 'status="online"' not in stdout.decode().lower():
+            return True
         if tries > max_tries:
             return False
         tries += 1
-    return ret.returncode == 0
+        await asyncio.sleep(10)
+
+
+async def wait_transfers(
+        wait_runs: List[str], in_folder: str, proposal: str
+) -> bool:
+    """Wait for multiple runs to be transferred to Maxwell.
+
+    :param wait_runs: Run numbers to wait for
+    :param in_folder: Proposal raw directory containing runs
+    :param proposal: Proposal number
+    :return: True if all runs transferred, false on timeout
+    """
+    logging.debug("Waiting for: propsal %s, runs %s", proposal, wait_runs)
+
+    # FIXME: this loop should be an asyncio.gather
+    for runnr in wait_runs:
+        rpath = "{}/r{:04d}/".format(in_folder, int(runnr))
+        transfer_complete = await wait_on_transfer(rpath)
+        if not transfer_complete:
+            logging.error(
+                Errors.TRANSFER_EVAL_FAILED.format(proposal, runnr)
+            )
+            return False
+
+    logging.info(
+        "Transfer complete: proposal %s, runs %s", proposal, wait_runs
+    )
+    return True
 
 
 def check_files(in_folder: str,
@@ -534,6 +585,7 @@ async def update_darks_paths(mdc: MetadataClient, rid: int, in_path: str,
     response = await shield(loop.run_in_executor(None, mdc.update_dark_run_api,
                                                  rid, data))
     if response.status_code != 200:
+        logging.error("Failed to update MDC dark report path for run id %s", rid)
         logging.error(Errors.MDC_RESPONSE.format(response))
 
 
@@ -565,441 +617,561 @@ async def update_mdc_status(mdc: MetadataClient, action: str,
         func = mdc.update_run_api
         data = {'flg_cal_data_status': flag, 'cal_pipeline_reply': message}
 
-    if action == 'dark_request':
+    elif action == 'dark_request':
         func = mdc.update_dark_run_api
         data = {'dark_run': {'flg_status': flag, 'calcat_feedback': message}}
+    else:
+        raise ValueError(f"Unexpected action: {action}")
 
     loop = get_event_loop()
     response = await shield(loop.run_in_executor(None, func, rid, data))
 
     if response.status_code != 200:
+        logging.error("Failed to update MDC status for action %s, run id %s",
+                      action, rid)
         logging.error(Errors.MDC_RESPONSE.format(response))
 
 
-async def server_runner(config, mode):
-    """ The main server loop
-
-    The main server loop handles remote requests via a ZMQ interface.
-
-    Requests are the form of ZMQ.REQuest and have the format
-
-        command, *params
-
-    where *parms is a string-encoded python list as defined by the
-    commands. The following commands are currently understood:
-
-    - correct, with parmeters rid, sase, instrument, cycle, proposal, runnr
-
-       where
-
-       :param rid: is the runid within the MDC database
-       :param sase: is the sase beamline
-       :param instrument: is the instrument
-       :param cycle: is the facility cycle
-       :param proposal: is the proposal id
-       :param runnr: is the run number in integer form, e.g. without leading
-                    "r"
-
-       This will trigger a correction process to be launched for that run in
-       the given cycle and proposal.
-
-    - dark_request, with parameters rid, sase, instrument, cycle, proposal,
-      did, operation_mode, pdu_names, karabo_das, runnr
-
-       where
-
-       :param rid: is the runid within the MDC database
-       :param sase: is the sase beamline
-       :param instrument: is the instrument
-       :param cycle: is the facility cycle
-       :param proposal: is the proposal id
-       :param did: is the detector karabo id
-       :param operation_mode: is the detector's operation mode, as defined in
-              CalCat
-       :param pdu_names: physical detector units for each modules
-       :param karabo_das: the Data Agreggators representing which detector
-              modules to calibrate
-       :param runnr: is the run number in integer form, i.e. without leading
-                    "r"
-
-    - upload-yaml, with parameters sase, instrument, cycle, proposal, yaml
-
-       where
-
-       :param sase: is the sase beamline
-       :param instrument: is the instrument
-       :param cycle: is the facility cycle
-       :param proposal: is the proposal id
-       :param yaml: is url-encoded (quotes and spaces) representation of
-                    new YAML file
-
-       This will create or replace the existing YAML configuration for the
-       proposal and cycle with the newly sent one, and then push it to the git
-       configuration repo.
-
-    """
-
-    init_config_repo(config['config-repo'])
-    job_db = await init_job_db(config)
-    mdc = await init_md_client(config)
-
-    context = zmq.asyncio.Context()
-    auth = zmq.auth.thread.ThreadAuthenticator(context)
-    if mode == "prod-auth":
-        auth.start()
-        auth.allow(config['web-service']['allowed-ips'])
+class ActionsServer:
+    def __init__(self, config, mode, job_db, mdc):
+        self.config = config
+        self.mode = mode
+        self.job_db = job_db
+        self.mdc = mdc
+
+        # Set up a ZMQ socket to listen for requests
+        self.zmq_ctx = zmq.asyncio.Context()
+        auth = zmq.auth.thread.ThreadAuthenticator(self.zmq_ctx)
+        if mode == "prod-auth":
+            auth.start()
+            auth.allow(config['web-service']['allowed-ips'])
+
+        self.socket = self.zmq_ctx.socket(zmq.REP)
+        self.socket.zap_domain = b'global'
+        self.socket.bind("{}:{}".format(config['web-service']['bind-to'],
+                                        config['web-service']['port']))
+
+    # __init__ can't be async - this is a workaround
+    @classmethod
+    async def ainit(cls, config, mode):
+        init_config_repo(config['config-repo'])
+        job_db = await init_job_db(config)
+        mdc = await init_md_client(config)
+        return cls(config, mode, job_db, mdc)
+
+    @classmethod
+    async def launch(cls, config, mode):
+        server = await cls.ainit(config, mode)
+        return await server.run()
+
+    async def run(self):
+        """The main server loop
+
+        The main server loop handles remote requests via a ZMQ interface.
+
+        Requests are the form of ZMQ.REQuest and have the format
+
+            command, *params
+
+        where *parms is a string-encoded python list as defined by the
+        commands.
+        """
+        while True:
+            req = await self.socket.recv_multipart()
+            logging.debug("Raw request data: %r", req)
+            try:
+                resp = await self.handle_one_req(req)
+            except Exception as e:
+                logging.error("Unexpected error handling request", exc_info=e)
+                resp = Errors.OTHER_ERROR.format(e).encode()
 
-    socket = context.socket(zmq.REP)
-    socket.zap_domain = b'global'
-    socket.bind("{}:{}".format(config['web-service']['bind-to'],
-                               config['web-service']['port']))
+            logging.debug("Sending response: %r", resp)
+            await self.socket.send(resp)
 
-    while True:
-        response = await socket.recv_multipart()
-        if isinstance(response, list) and len(response) == 1:
+    async def handle_one_req(self, req: List[bytes]) -> bytes:
+        if len(req) == 1:
             try:  # protect against unparseable requests
-                response = eval(response[0])
+                req = eval(req[0])
             except SyntaxError as e:
                 logging.error(str(e))
-                socket.send(Errors.REQUEST_FAILED.encode())
-                continue
+                return Errors.REQUEST_FAILED.encode()
 
-        if len(response) < 2:  # catch parseable but malformed requests
-            logging.error(Errors.REQUEST_MALFORMED.format(response))
-            socket.send(Errors.REQUEST_MALFORMED.format(response).encode())
-            continue
+        if len(req) < 2:  # catch parseable but malformed requests
+            logging.error(Errors.REQUEST_MALFORMED.format(req))
+            return Errors.REQUEST_MALFORMED.format(req).encode()
 
-        # FIXME: action should be an enum
-        action, payload = response[0], response[1:]
+        action, *payload = req
 
-        if action not in ['correct', 'dark', 'dark_request', 'query-rid',
-                          'upload-yaml', 'update_conf']:
+        if action not in self.accepted_actions:
             logging.warning(Errors.UNKNOWN_ACTION.format(action))
-            socket.send(Errors.UNKNOWN_ACTION.format(action).encode())
-            continue
+            return Errors.UNKNOWN_ACTION.format(action).encode()
 
-        logging.debug('{}, {}'.format(action, payload))
+        logging.info("Handling request for action %s", action)
+        logging.debug('Running action %s, payload %r', action, payload)
 
-        if action == "query-rid":
-            rid = payload[0]
-            await query_rid(job_db, socket, rid)
-            continue
+        handler = getattr(self, 'handle_' + action.replace('-', '_'))
 
-        async def do_action(action, payload):  # FIXME: this needn't be nested
-            in_folder = None
-            run_mapping = {}
-            priority = None  # TODO: Investigate argument
-
-            if action == 'update_conf':
-                updated_config = None
-                try:
-                    sase, karabo_id, instrument, cycle, proposal, config_yaml, apply = payload  # noqa
-                    updated_config = json.loads(config_yaml)
-                    await change_config(socket, config['config-repo'],
-                                        updated_config, karabo_id, instrument,
-                                        cycle, proposal,
-                                        apply.upper() == "TRUE")
-                except Exception as e:
-                    e = str(e)
-                    err_msg = (f"Failure applying config for {proposal}:"
-                               f" {e}: {updated_config}")
-                    logging.error(err_msg)
-                    logging.error(f"Unexpected error: {traceback.format_exc()}")  # noqa
-                    socket.send(yaml.dump(err_msg,
-                                          default_flow_style=False).encode())
-
-            if action in ['dark', 'correct', 'dark_request']:
-                request_time = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
-                try:
-                    wait_runs: List[str] = []
-                    rid, sase, instrument, cycle, proposal, *payload = payload
-
-                    if action == 'correct':
-                        runnr, priority = payload
-                        runnr = runnr.strip('r')
-                        wait_runs = [runnr]
-
-                    if action == 'dark':
-                        karabo_ids, karabo_das, *runs = payload
-
-                        karabo_ids = karabo_ids.split(',')
-                        karabo_das = karabo_das.split(',')
-                        for i, run in enumerate(runs):
-                            erun = eval(run)
-                            if isinstance(erun, (list, tuple)):
-                                typ, runnr = erun
-                                if typ == "reservation":
-                                    continue
-                                runnr = runnr.strip('r')
-                                run_mapping[typ] = runnr
-                                wait_runs.append(runnr)
-                            else:
-                                run_mapping['no_mapping_{}'.format(i)] = erun
-                                wait_runs.append(erun)
-
-                    if action == 'dark_request':
-                        karabo_id, operation_mode, *payload = payload
-                        payload = eval(','.join(payload))
-                        pdus, karabo_das, wait_runs = payload
-
-                        karabo_das = [val.strip() for val in karabo_das]
-                        wait_runs = [str(val) for val in wait_runs]
-
-                    proposal = proposal.strip('p')
-                    proposal = "{:06d}".format(int(proposal))
-
-                    logging.info(f'{action} of {proposal} run {wait_runs} at '
-                                 f'{instrument} is requested. Checking files.')
-
-                    # Read calibration configuration from yaml
-                    conf_file = Path(config['config-repo']['local-path'],
-                                     cycle, f'{proposal}.yaml')
-                    if not conf_file.exists():
-                        conf_file = Path(config['config-repo']['local-path'],
-                                         "default.yaml")
-
-                    with open(conf_file, "r") as f:
-                        pconf_full = yaml.load(f.read(),
-                                               Loader=yaml.FullLoader)
-
-                    # FIXME: remove once MyMDC sends `dark` action
-                    action_ = 'dark' if action == 'dark_request' else action
-                    data_conf = pconf_full['data-mapping']
-                    if instrument in pconf_full[action_]:
-                        pconf = pconf_full[action_][instrument]
-                    else:
-                        socket.send(Errors.NOT_CONFIGURED.encode())
-                        logging.info(f'Instrument {instrument} is unknown')
-                        return
+        # Verify that requests contains the right number of parameters
+        sig = inspect.signature(handler)
+        try:
+            sig.bind(*payload)
+        except TypeError:
+            logging.error(
+                "Wrong number of arguments for action %s", action, exc_info=True
+            )
+            return Errors.REQUEST_MALFORMED.format(req).encode()
+
+        res = handler(*payload)
+        if asyncio.iscoroutine(res):
+            res = await res
+        return res
+
+    accepted_actions = {
+        'correct', 'dark', 'dark_request', 'query-rid', 'upload-yaml',
+        'update_conf',
+    }
+
+    # Handler methods for each available action ------------------------------
+
+    async def handle_correct(
+            self, rid, _sase, instrument, cycle, proposal, runnr, priority
+    ):
+        """Launch detector correction
+
+        :param rid: is the runid within the MDC database
+        :param _sase: is the sase beamline
+        :param instrument: is the instrument
+        :param cycle: is the facility cycle
+        :param proposal: is the proposal id
+        :param runnr: is the run number in integer form, e.g. without leading
+                     "r"
+
+        This will trigger a correction process to be launched for that run in
+        the given cycle and proposal.
+        """
+        request_time = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
+        try:
+            runnr = runnr.strip('r')
 
-                    in_folder = config[action_]['in-folder'].format(
-                        instrument=instrument, cycle=cycle, proposal=proposal)
+            proposal = self._normalise_proposal_num(proposal)
+            pconf_full = self.load_proposal_config(cycle, proposal)
 
-                    msg = Success.QUEUED.format(proposal, wait_runs)
-                    socket.send(msg.encode())
-                    logging.debug(msg)
+            data_conf = pconf_full['data-mapping']
+            if instrument in pconf_full['correct']:
+                pconf = pconf_full['correct'][instrument]
+            else:
+                logging.info(f'Instrument {instrument} is unknown')
+                return Errors.NOT_CONFIGURED.encode()
 
-                    if action in ['correct', 'dark_request']:
-                        await update_mdc_status(mdc, action, rid, msg)
+            in_folder = self.config['correct']['in-folder'].format(
+                instrument=instrument, cycle=cycle, proposal=proposal)
+            out_folder = self.config['correct']['out-folder'].format(
+                instrument=instrument, cycle=cycle, proposal=proposal,
+                run='r{:04d}'.format(int(runnr))
+            )
 
-                except Exception as e:
-                    e = str(e)
-                    msg = Errors.JOB_LAUNCH_FAILED.format(action, e)
-                    logging.error(msg)
-                    socket.send(msg.encode())
+        except Exception as e:
+            msg = Errors.JOB_LAUNCH_FAILED.format('correct', e)
+            logging.error(msg, exc_info=e)
+            asyncio.ensure_future(
+                update_mdc_status(self.mdc, 'correct', rid, msg)
+            )
+            return msg.encode()
 
-                    if action in ['correct', 'dark_request']:
-                        await update_mdc_status(mdc, action, rid, msg)
-                    return
+        queued_msg = Success.QUEUED.format(proposal, [runnr])
+        logging.debug(queued_msg)
 
-                # Check if all files for given runs are transferred
-                all_transfers = []
-
-                # FIXME: this loop should be an asyncio.gather
-                for runnr in wait_runs:
-                    rpath = "{}/r{:04d}/".format(in_folder, int(runnr))
-                    transfer_complete = await wait_on_transfer(rpath)
-                    all_transfers.append(transfer_complete)
-                    if not transfer_complete:
-                        logging.error(
-                            Errors.TRANSFER_EVAL_FAILED.format(proposal,
-                                                               runnr))
-                        if action in ['correct', 'dark_request']:
-                            await update_mdc_status(mdc, action, rid,
-                                                    MDC.MIGRATION_TIMEOUT)
-
-                if not all(all_transfers):
-                    logging.error(Errors.TRANSFER_EVAL_FAILED.format(proposal, ','.join(wait_runs)))  # noqa
+        async def _continue():
+            """Runs in the background after we reply to the 'correct' request"""
+            await update_mdc_status(self.mdc, 'correct', rid, queued_msg)
+            try:
+                transfer_complete = await wait_transfers(
+                    [runnr], in_folder, proposal
+                )
+                if not transfer_complete:
+                    # Timed out
+                    await update_mdc_status(self.mdc, 'correct', rid,
+                                            MDC.MIGRATION_TIMEOUT)
                     return
 
-            logging.debug(f"Now doing: {action}")
-            ts = datetime.now().strftime('%y%m%d_%H%M%S')
-            if action == 'dark':
+                rpath = "{}/r{:04d}/".format(in_folder, int(runnr))
+
+                # Prepare configs for all detectors in run
+                fl = glob.glob(f"{rpath}/*.h5")
+                corr_file_list = set()
+                copy_file_list = set(fl)
                 detectors = {}
-                out_folder = config[action]['out-folder'].format(
-                    instrument=instrument, cycle=cycle, proposal=proposal,
-                    runs="_".join(wait_runs))
+                for karabo_id in pconf:
+                    dconfig = data_conf[karabo_id]
+                    # check for files according to mapping in raw run dir.
+                    if any(y in x for x in fl
+                           for y in dconfig['karabo-da']):
+                        for karabo_da in dconfig['karabo-da']:
+                            tfl = glob.glob(f"{rpath}/*{karabo_da}*.h5")
+                            corr_file_list = corr_file_list.union(set(tfl))
+                        thisconf = copy.copy(dconfig)
+                        if isinstance(pconf[karabo_id], dict):
+                            thisconf.update(copy.copy(pconf[karabo_id]))
+                        thisconf["in-folder"] = in_folder
+                        thisconf["out-folder"] = out_folder
+                        thisconf["karabo-id"] = karabo_id
+                        thisconf["run"] = runnr
+                        if priority:
+                            thisconf["priority"] = str(priority)
 
-                # Run over all available detectors
-                if karabo_ids[0] == 'all':
-                    karabo_ids = list(pconf.keys())
+                        detectors[karabo_id] = thisconf
+                copy_file_list = copy_file_list.difference(corr_file_list)
+                asyncio.ensure_future(copy_untouched_files(copy_file_list,
+                                                           out_folder,
+                                                           runnr))
+            except Exception as corr_e:
+                logging.error(f"Error during correction", exc_info=corr_e)
+                await update_mdc_status(self.mdc, 'correct', rid,
+                                        Errors.REQUEST_FAILED)
+                return
+
+            if len(detectors) == 0:
+                msg = Errors.NOTHING_TO_DO.format(rpath)
+                logging.warning(msg)
+                await update_mdc_status(self.mdc, 'correct', rid, msg)
+                return
+
+            ret, _ = await self.launch_jobs(
+                [runnr], rid, detectors, 'correct', instrument, cycle, proposal,
+                request_time,
+            )
+            await update_mdc_status(self.mdc, 'correct', rid, ret)
+        # END of part to run after sending reply
+
+        asyncio.ensure_future(_continue())
+
+        return queued_msg.encode()
+
+    async def handle_dark(
+            self, rid, _sase, instrument, cycle, proposal, karabo_ids,
+            karabo_das, *runs
+    ):
+        request_time = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
+        try:
+            run_mapping = {}
+            wait_runs = []
+
+            karabo_ids = karabo_ids.split(',')
+            karabo_das = karabo_das.split(',')
+
+            for i, run in enumerate(runs):
+                erun = eval(run)
+                if isinstance(erun, (list, tuple)):
+                    typ, runnr = erun
+                    if typ == "reservation":
+                        continue
+                    runnr = runnr.strip('r')
+                    run_mapping[typ] = runnr
+                    wait_runs.append(runnr)
+                else:
+                    run_mapping['no_mapping_{}'.format(i)] = erun
+                    wait_runs.append(erun)
 
-                # Prepare configs for all requested detectors
-                for karabo_id in karabo_ids:
+            proposal = self._normalise_proposal_num(proposal)
+            pconf_full = self.load_proposal_config(cycle, proposal)
 
-                    # use selected karabo_das
-                    if karabo_das[0] == 'all':
-                        karabo_da = data_conf[karabo_id]["karabo-da"]
+            data_conf = pconf_full['data-mapping']
+            if instrument in pconf_full['dark']:
+                pconf = pconf_full['dark'][instrument]
+            else:
+                logging.info(f'Instrument {instrument} is unknown')
+                return Errors.NOT_CONFIGURED.encode()
 
-                    # Check if any files for given karabo-das exists
-                    if check_files(in_folder, wait_runs, karabo_da):
-                        thisconf = copy.copy(data_conf[karabo_id])
+            # Run over all available detectors
+            if karabo_ids[0] == 'all':
+                karabo_ids = list(pconf.keys())
 
-                        if (karabo_id in pconf and
-                                isinstance(pconf[karabo_id], dict)):
-                            thisconf.update(copy.copy(pconf[karabo_id]))
+            in_folder = self.config['dark']['in-folder'].format(
+                instrument=instrument, cycle=cycle, proposal=proposal)
+            out_folder = self.config['dark']['out-folder'].format(
+                instrument=instrument, cycle=cycle, proposal=proposal,
+                runs="_".join(wait_runs))
 
-                        thisconf["in-folder"] = in_folder
-                        thisconf["out-folder"] = '/'.join((out_folder,
-                                                           karabo_id.replace('-', '_')))  # noqa  FIXME Make use of pathlib
-                        thisconf["karabo-id"] = karabo_id
-                        thisconf["karabo-da"] = karabo_da
+        except Exception as e:
+            msg = Errors.JOB_LAUNCH_FAILED.format('dark', e)
+            logging.error(msg, exc_info=e)
+            return msg.encode()
+
+        async def _continue():
+            """Runs in the background after we reply to the 'dark' request"""
+            transfer_complete = await wait_transfers(
+                wait_runs, in_folder, proposal
+            )
+            if not transfer_complete:
+                return  # Timed out
+
+            detectors = {}
+
+            # Prepare configs for all requested detectors
+            for karabo_id in karabo_ids:
+
+                # use selected karabo_das
+                karabo_das_for_id = karabo_das
+                if karabo_das[0] == 'all':
+                    karabo_das_for_id = data_conf[karabo_id]["karabo-da"]
+
+                # Check if any files for given karabo-das exists
+                if check_files(in_folder, wait_runs, karabo_das_for_id):
+                    thisconf = copy.copy(data_conf[karabo_id])
+
+                    if (karabo_id in pconf and
+                            isinstance(pconf[karabo_id], dict)):
+                        thisconf.update(copy.copy(pconf[karabo_id]))
+
+                    thisconf["in-folder"] = in_folder
+                    thisconf["out-folder"] = os.path.join(
+                        out_folder, karabo_id.replace('-', '_')
+                    )
+                    thisconf["karabo-id"] = karabo_id
+                    thisconf["karabo-da"] = karabo_das_for_id
+
+                    run_config = []
+                    for typ, run in run_mapping.items():
+                        if "no_mapping" in typ:
+                            run_config.append(run)
+                        else:
+                            thisconf[typ] = run
+                    if len(run_config):
+                        thisconf["runs"] = ",".join(run_config)
+
+                    detectors[karabo_id] = thisconf
+                else:
+                    logging.warning(
+                        "File list for %s in proposal %s runs %s is empty",
+                        karabo_id, proposal, wait_runs
+                    )
+            if len(detectors) == 0:
+                logging.warning(Errors.NOTHING_TO_DO.format(wait_runs))
+                return
+
+            await self.launch_jobs(
+                wait_runs, 0, detectors, 'dark', instrument, cycle, proposal,
+                request_time,
+            )
+        # END of part to run after sending reply
+
+        asyncio.ensure_future(_continue())
+
+        msg = Success.QUEUED.format(proposal, wait_runs)
+        logging.debug(msg)
+        return msg.encode()
+
+    async def handle_dark_request(
+            self, rid, _sase, instrument, cycle, proposal, karabo_id,
+            operation_mode, *extra
+    ):
+        """Launch dark run processing
+
+        :param rid: is the runid within the MDC database
+        :param _sase: is the sase beamline
+        :param instrument: is the instrument
+        :param cycle: is the facility cycle
+        :param proposal: is the proposal id
+        :param karabo_id: is the detector karabo id
+        :param operation_mode: is the detector's operation mode, as defined in
+               CalCat
+        :param pdu_names: physical detector units for each modules
+        :param karabo_das: the Data Agreggators representing which detector
+               modules to calibrate
+        :param runnr: is the run number in integer form, i.e. without leading
+                     "r"
+        """
+        request_time = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
+        try:
+            pdus, karabo_das, wait_runs = eval(','.join(extra))
 
-                        run_config = []
-                        for typ, run in run_mapping.items():
-                            if "no_mapping" in typ:
-                                run_config.append(run)
-                            else:
-                                thisconf[typ] = run
-                        if len(run_config):
-                            thisconf["runs"] = ",".join(run_config)
+            karabo_das = [val.strip() for val in karabo_das]
+            runs = [str(val) for val in wait_runs]
 
-                        detectors[karabo_id] = thisconf
-                    else:
-                        logging.warning("File list for {} at {} is empty"
-                                        .format(karabo_id,
-                                                "{}/*.h5".format(rpath)))
-
-                if len(detectors) == 0:
-                    logging.warning(Errors.NOTHING_TO_DO.format(rpath))
-
-            if action == 'correct':
-                try:
-                    runnr = wait_runs[0]
-                    rpath = "{}/r{:04d}/".format(in_folder, int(runnr))
-
-                    out_folder = config[action]['out-folder'].format(
-                        instrument=instrument, cycle=cycle, proposal=proposal,
-                        run='r{:04d}'.format(int(runnr)))
-
-                    # Prepare configs for all detectors in run
-                    fl = glob.glob(f"{rpath}/*.h5")
-                    corr_file_list = set()
-                    copy_file_list = set(fl)
-                    detectors = {}
-                    for karabo_id in pconf:
-                        dconfig = data_conf[karabo_id]
-                        # check for files according to mapping in raw run dir.
-                        if any(y in x for x in fl
-                               for y in dconfig['karabo-da']):
-                            for karabo_da in dconfig['karabo-da']:
-                                tfl = glob.glob(f"{rpath}/*{karabo_da}*.h5")
-                                corr_file_list = corr_file_list.union(set(tfl))
-                            thisconf = copy.copy(dconfig)
-                            if isinstance(pconf[karabo_id], dict):
-                                thisconf.update(copy.copy(pconf[karabo_id]))
-                            thisconf["in-folder"] = in_folder
-                            thisconf["out-folder"] = out_folder
-                            thisconf["karabo-id"] = karabo_id
-                            thisconf["run"] = runnr
-                            if priority:
-                                thisconf["priority"] = str(priority)
-
-                            detectors[karabo_id] = thisconf
-                    copy_file_list = copy_file_list.difference(corr_file_list)
-                    asyncio.ensure_future(copy_untouched_files(copy_file_list,
-                                                               out_folder,
-                                                               runnr))
-                    if len(detectors) == 0:
-                        logging.warning(Errors.NOTHING_TO_DO.format(rpath))
-                        await update_mdc_status(mdc, action, rid,
-                                                MDC.NOTHING_TO_DO)
-                        return
-
-                except Exception as corr_e:
-                    logging.error(f"Error during correction: {str(corr_e)}")
-                    await update_mdc_status(mdc, action, rid,
-                                            Errors.REQUEST_FAILED)
-
-            if action == 'dark_request':
-                runs = [str(r) for r in wait_runs]
-
-                # Notebooks require one or three runs, depending on the
-                # detector type and operation mode.
-                triple = any(det in karabo_id for det in
-                             ["LPD", "AGIPD", "JUNGFRAU", "JF", "JNGFR"])
-
-                if triple and len(runs) == 1:
-                    runs_dict = {'run-high': runs[0],
-                                 'run-med': '0',
-                                 'run-low': '0'}
-                elif triple and len(runs) == 3:
-                    runs_dict = {'run-high': runs[0],
-                                 'run-med': runs[1],
-                                 'run-low': runs[2]}
-                else:  # single
-                    runs_dict = {'run': runs[0]}
-
-                out_folder = config['dark']['out-folder'].format(
-                    instrument=instrument, cycle=cycle, proposal=proposal,
-                    runs='_'.join(runs))
-                out_folder = str(Path(out_folder,
-                                      karabo_id.replace('-', '_')))
-
-                # We assume that MyMDC does not allow dark request if the data
-                # is not migrated, thus skipping some validation here.
-                thisconf = copy.copy(data_conf[karabo_id])
-
-                if (karabo_id in pconf
-                   and isinstance(pconf[karabo_id], dict)):
-                    thisconf.update(copy.copy(pconf[karabo_id]))
-
-                thisconf['in-folder'] = in_folder
-                thisconf['out-folder'] = out_folder
-                thisconf['karabo-id'] = karabo_id
-                thisconf['karabo-da'] = karabo_das
-                thisconf['operation-mode'] = operation_mode
-
-                thisconf.update(runs_dict)
-
-                detectors = {karabo_id: thisconf}
-
-            if action in ['correct', 'dark', 'dark_request']:
-                # run xfel_calibrate
-                action_ = 'dark' if action == 'dark_request' else action
-                for karabo_id, dconfig in detectors.items():
-                    detector = dconfig['detector-type']
-                    del dconfig['detector-type']
-                    cmd = config[action_]['cmd'].format(
-                        detector=detector,
-                        sched_prio=str(config[action_]['sched-prio']),
-                        action=action_, instrument=instrument,
-                        cycle=cycle, proposal=proposal,
-                        runs="_".join([f"r{r}" for r in wait_runs]),
-                        time_stamp=ts,
-                        det_instance=karabo_id,
-                        request_time=request_time
-                    ).split()
-
-                    cmd = parse_config(cmd, dconfig)
-
-                    rid = rid if action in ['correct', 'dark_request'] else 0
-                    ret = await run_action(job_db, cmd, mode,
-                                           proposal, runnr, rid)
-
-                    if action == 'correct':
-                        await update_mdc_status(mdc, action, rid, ret)
-                    if action == 'dark_request':
-                        await update_mdc_status(mdc, action, rid, ret)
-                        report_idx = cmd.index('--report-to') + 1
-                        report = cmd[report_idx] + '.pdf'
-                        await update_darks_paths(mdc, rid, in_folder,
-                                                 out_folder, report)
-
-            # TODO: moving this block further up reduces the need of so
-            #       many nested ifs. Move up and return directly
-            if action == 'upload-yaml':
-                sase, instrument, cycle, proposal, this_yaml = payload
-                this_yaml = urllib.parse.unquote_plus(this_yaml)
-                await upload_config(socket, config['config-repo'], this_yaml,
-                                    instrument, cycle, proposal)
+            proposal = self._normalise_proposal_num(proposal)
 
-        try:
-            asyncio.ensure_future(
-                do_action(copy.copy(action), copy.copy(payload)))
-        except Exception as e:  # actions that fail are only error logged
-            logging.error(str(e))
+            pconf_full = self.load_proposal_config(cycle, proposal)
+
+            data_conf = pconf_full['data-mapping']
+            if instrument in pconf_full['dark']:
+                pconf = pconf_full['dark'][instrument]
+            else:
+                logging.info(f'Instrument {instrument} is unknown')
+                return Errors.NOT_CONFIGURED.encode()
 
+            in_folder = self.config['dark']['in-folder'].format(
+                instrument=instrument, cycle=cycle, proposal=proposal)
+            out_folder = self.config['dark']['out-folder'].format(
+                instrument=instrument, cycle=cycle, proposal=proposal,
+                runs='_'.join(runs))
+            out_folder = str(Path(out_folder, karabo_id.replace('-', '_')))
+
+        except Exception as e:
+            msg = Errors.JOB_LAUNCH_FAILED.format('dark_request', e)
+            logging.error(msg, exc_info=e)
+            asyncio.ensure_future(
+                update_mdc_status(self.mdc, 'dark_request', rid, msg)
+            )
+            return msg.encode()
+
+        queued_msg = Success.QUEUED.format(proposal, runs)
+        logging.debug(queued_msg)
+
+        async def _continue():
+            """Runs in the background after we reply to the 'dark_request' request"""
+            await update_mdc_status(self.mdc, 'dark_request', rid, queued_msg)
+
+            transfer_complete = await wait_transfers(
+                runs, in_folder, proposal
+            )
+            if not transfer_complete:
+                # Timed out
+                await update_mdc_status(
+                    self.mdc, 'dark_request', rid, MDC.MIGRATION_TIMEOUT
+                )
+                return
+
+            # Notebooks require one or three runs, depending on the
+            # detector type and operation mode.
+            triple = any(det in karabo_id for det in
+                         ["LPD", "AGIPD", "JUNGFRAU", "JF", "JNGFR"])
+
+            if triple and len(runs) == 1:
+                runs_dict = {'run-high': runs[0],
+                             'run-med': '0',
+                             'run-low': '0'}
+            elif triple and len(runs) == 3:
+                runs_dict = {'run-high': runs[0],
+                             'run-med': runs[1],
+                             'run-low': runs[2]}
+            else:  # single
+                runs_dict = {'run': runs[0]}
+
+            # We assume that MyMDC does not allow dark request if the data
+            # is not migrated, thus skipping some validation here.
+            thisconf = copy.copy(data_conf[karabo_id])
+
+            if (karabo_id in pconf
+                    and isinstance(pconf[karabo_id], dict)):
+                thisconf.update(copy.copy(pconf[karabo_id]))
+
+            thisconf['in-folder'] = in_folder
+            thisconf['out-folder'] = out_folder
+            thisconf['karabo-id'] = karabo_id
+            thisconf['karabo-da'] = karabo_das
+            thisconf['operation-mode'] = operation_mode
+
+            thisconf.update(runs_dict)
+
+            detectors = {karabo_id: thisconf}
+
+            ret, report_path = await self.launch_jobs(
+                runs, rid, detectors, 'dark', instrument, cycle, proposal,
+                request_time
+            )
+            await update_mdc_status(self.mdc, 'dark_request', rid, ret)
+            if report_path is None:
+                logging.warning("Failed to identify report path for dark_request")
+            else:
+                await update_darks_paths(self.mdc, rid, in_folder,
+                                         out_folder, report_path)
+        # END of part to run after sending reply
+
+        asyncio.ensure_future(_continue())
+
+        return queued_msg.encode()
+
+    def handle_query_rid(self, rid):
+        return query_rid(self.job_db, rid)
+
+    def handle_upload_yaml(self, _sase, instrument, cycle, proposal, this_yaml):
+        """Reconfigure detector calibration for the specified proposal
+
+        :param _sase: is the sase beamline
+        :param instrument: is the instrument
+        :param cycle: is the facility cycle
+        :param proposal: is the proposal id
+        :param yaml: is url-encoded (quotes and spaces) representation of
+                     new YAML file
+
+        This will create or replace the existing YAML configuration for the
+        proposal and cycle with the newly sent one, and then push it to the git
+        configuration repo.
+        """
+        this_yaml = urllib.parse.unquote_plus(this_yaml)
+        return upload_config(
+            self.config['config-repo'], this_yaml, instrument, cycle, proposal
+        )
+
+    async def handle_update_conf(
+            self, sase, karabo_id, instrument, cycle, proposal, config_yaml,
+            apply
+    ):
+        updated_config = None
+        try:
+            updated_config = json.loads(config_yaml)
+            return change_config(self.config['config-repo'],
+                                updated_config, karabo_id, instrument,
+                                cycle, proposal,
+                                apply.upper() == "TRUE")
+        except Exception as e:
+            err_msg = (f"Failure applying config for {proposal}:"
+                       f" {e}: {updated_config}")
+            logging.error(err_msg, exc_info=e)
+            return yaml.dump(err_msg, default_flow_style=False).encode()
+
+    # Helper methods for handlers ---------------------------------------------
+
+    @staticmethod
+    def _normalise_proposal_num(p: str) -> str:
+        return "{:06d}".format(int(p.strip('p')))
+
+    def load_proposal_config(self, cycle, proposal) -> Dict:
+        # Read calibration configuration from yaml
+        conf_file = Path(
+            self.config['config-repo']['local-path'], cycle, f'{proposal}.yaml'
+        )
+        if not conf_file.exists():
+            conf_file = Path(
+                self.config['config-repo']['local-path'], "default.yaml"
+            )
+
+        logging.debug("Loading config for cycle %s, proposal %s from %s",
+                      cycle, proposal, conf_file)
+
+        with open(conf_file, "r") as f:
+            return yaml.load(f.read(), Loader=yaml.FullLoader)
+
+    async def launch_jobs(
+            self, run_nrs, rid, detectors, action, instrument, cycle, proposal,
+            request_time
+    ) -> (str, Optional[str]):
+        # run xfel_calibrate
+        for karabo_id, dconfig in detectors.items():
+            detector = dconfig['detector-type']
+            del dconfig['detector-type']
+            cmd = self.config[action]['cmd'].format(
+                detector=detector,
+                sched_prio=str(self.config[action]['sched-prio']),
+                action=action, instrument=instrument,
+                cycle=cycle, proposal=proposal,
+                runs="_".join([f"r{r}" for r in run_nrs]),
+                time_stamp=datetime.now().strftime('%y%m%d_%H%M%S'),
+                det_instance=karabo_id,
+                request_time=request_time
+            ).split()
+
+            cmd = parse_config(cmd, dconfig)
+
+            ret = await run_action(self.job_db, cmd, self.mode,
+                                   proposal, run_nrs[-1], rid)
+
+            if '--report-to' in cmd[:-1]:
+                report_idx = cmd.index('--report-to') + 1
+                report = cmd[report_idx] + '.pdf'
+            else:
+                report = None
+            return ret, report
 
 parser = argparse.ArgumentParser(
     description='Start the calibration webservice')
@@ -1009,7 +1181,7 @@ parser.add_argument('--mode', type=str, default="sim", choices=['sim', 'prod'])
 parser.add_argument('--logging', type=str, default="INFO",
                     choices=['INFO', 'DEBUG', 'ERROR'])
 
-if __name__ == "__main__":
+def main():
     args = vars(parser.parse_args())
     conf_file = args["config_file"]
     with open(conf_file, "r") as f:
@@ -1022,5 +1194,9 @@ if __name__ == "__main__":
                         format=fmt)
     loop = asyncio.get_event_loop()
     loop.create_task(update_job_db(config))
-    loop.run_until_complete(server_runner(config, mode))
+    loop.run_until_complete(ActionsServer.launch(config, mode))
     loop.close()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/webservice/webservice.yaml b/webservice/webservice.yaml
index 59f77be0bdfe0deaf32b1b541849a014788ab2ab..bce222c10206cd20db850fd38aefa5f32a8cb830 100644
--- a/webservice/webservice.yaml
+++ b/webservice/webservice.yaml
@@ -31,7 +31,7 @@ correct:
         --slurm-scheduling {sched_prio}
         --slurm-mem 750
         --request-time {request_time}
-        --slurm-name {action}_{instrument}_{detector}_{cycle}_p{proposal}_r{runs}
+        --slurm-name {action}_{instrument}_{detector}_{cycle}_p{proposal}_{runs}
         --report-to /gpfs/exfel/exp/{instrument}/{cycle}/p{proposal}/usr/Reports/{runs}/{det_instance}_{action}_{proposal}_{runs}_{time_stamp}
         --cal-db-timeout 300000
         --cal-db-interface tcp://max-exfl016:8015#8044
@@ -45,7 +45,7 @@ dark:
         --concurrency-par karabo_da
         --slurm-scheduling {sched_prio}
         --request-time {request_time}
-        --slurm-name {action}_{instrument}_{detector}_{cycle}_p{proposal}_r{runs}
+        --slurm-name {action}_{instrument}_{detector}_{cycle}_p{proposal}_{runs}
         --report-to /gpfs/exfel/d/cal/caldb_store/xfel/reports/{instrument}/{det_instance}/{action}/{action}_{proposal}_{runs}_{time_stamp}
         --cal-db-interface tcp://max-exfl016:8015#8044
         --db-output
diff --git a/xfel_calibrate/calibrate.py b/xfel_calibrate/calibrate.py
index 6289bdd514a72cc80493577665a28803e35b90f3..c85c1e6b9092ed9d309af83308bd6f2a9a764d22 100755
--- a/xfel_calibrate/calibrate.py
+++ b/xfel_calibrate/calibrate.py
@@ -2,6 +2,7 @@
 
 import argparse
 import inspect
+import math
 import os
 import pprint
 import re
@@ -12,10 +13,12 @@ import warnings
 from datetime import datetime
 from pathlib import Path
 from subprocess import DEVNULL, check_output
+from typing import List, Union
 
 import cal_tools.tools
 import nbconvert
 import nbformat
+import numpy as np
 from jinja2 import Template
 from nbparameterise import (extract_parameters, parameter_values,
                             replace_definitions)
@@ -255,44 +258,64 @@ def get_notebook_function(nb, fname):
     return None
 
 
-def balance_sequences(in_folder, run, sequences, sequences_per_node,
-                      path_inset, max_nodes=8):
+def balance_sequences(in_folder: str, run: int, sequences: List[int],
+                      sequences_per_node: int, karabo_da: Union[list, str],
+                      max_nodes: int = 8):
+    """Return balance list of sequences to be executed on slurm nodes
+    Total list of sequences is splitted onto several nodes based on
+    sequences_per_node. If the number of the required nodes is more than
+    the max_nodes, the number of sequences_per_node is adjusted.
+
+    :param in_folder: Path to the input raw data without the run number.
+    :param run: Run number.
+    :param sequences: List of sequences. [-1] for obtaining all.
+    :param sequences_per_node: Number of sequences to process per a node.
+    :param karabo_da: Karabo data aggregator used as data file inset.
+    :param max_nodes: Maximum number of maxwell nodes to use.
+    :return: Balanced list of sequences.
     """
-    Return balance list of sequences to be executed on slurm nodes
-    Total list of sequences is splitted to several nodes, with a number of
-    sequences per node defined in the input parameter. if number
-    of required nodes is more than max_nodes, the number of sequences_per_node
-    will be increased to run on all on max_nodes.
-
-    :param in_folder: Path to raw data
-    :param run: Run number
-    :param sequences: List of sequences
-    :param sequences_per_node: Number of sequences per node
-    :param path_inset: Inset of path to data file
-    :param max_nodes: Maximum number of Maxwell nodes to use
-    :return: Balanced list of list of sequences
-    """
-    import glob
-
-    import numpy as np
-    if sequences[0] == -1:
-        path = os.path.join(in_folder, f"r{run:04d}", f"*{path_inset}-S*.h5")
-        sequence_files = glob.glob(path)
-        seq_nums = set()
-        for sf in sequence_files:
-            seqnum = re.findall(r".*-S([0-9]*).h5", sf)[0]
-            seq_nums.add(int(seqnum))
-        seq_nums -= set(sequences)
-    else:
-        seq_nums = set(sequences)
-    nsplits = len(seq_nums) // sequences_per_node + 1
-    while nsplits > max_nodes:
-        sequences_per_node += 1
-        nsplits = len(seq_nums) // sequences_per_node + 1
-        print("Changed to {} sequences per node".format(sequences_per_node))
+    # TODO: some small detector notebooks have karabo_da as a list.
+    # remove this str check after unifying the expected type across
+    # correction notebooks.
+    if isinstance(karabo_da, str):
+        karabo_da = [karabo_da]
+    elif not isinstance(karabo_da, list):
+        raise ValueError("Balance sequences expects "
+                         "karabo_da as a string or list.")
+
+    in_path = Path(in_folder, f"r{run:04d}")
+
+    # TODO: remove ["-1"] after karabo_da refactor
+    if karabo_da in [["-1"], ["all"]]:
+        karabo_da = [""]
+
+    # Get all possible sequences for the selected karabo_da
+    sequence_files = []
+    for k_da in karabo_da:
+        sequence_files.extend(in_path.glob(f"*{k_da}-S*.h5"))
+
+    # Extract sequences from input files.
+    seq_nums = set([int(sf.stem[-5:]) for sf in sequence_files])
+
+    # Validate selected sequences with sequences in in_folder
+    if sequences != [-1]:
+        seq_nums = sorted(seq_nums.intersection(sequences))
+        if len(seq_nums) == 0:
+            raise ValueError(f"Selected sequences {sequences} are not "
+                             f"available in {in_path}")
+
+    # Validate required nodes with max_nodes
+    nsplits = len(seq_nums) // sequences_per_node
+    if nsplits > max_nodes:
+        sequences_per_node = math.ceil(len(seq_nums)/max_nodes)
+        nsplits = max_nodes
+        print(f"Changed to {sequences_per_node} sequences per node")
         print(f"to have a maximum of {max_nodes} concurrent jobs")
-    return [l.tolist() for l in np.array_split(list(seq_nums), nsplits) if
-            l.size > 0]
+    elif nsplits == 0:
+        nsplits = 1
+
+    return [l.tolist() for l in np.array_split(list(seq_nums), nsplits)
+            if l.size > 0]
 
 
 def make_extended_parser() -> argparse.ArgumentParser:
@@ -367,7 +390,7 @@ def add_args_from_nb(nb, parser, cvar=None, no_required=False):
     :param bool no_required: If True, none of the added options are required.
     """
     parser.description = make_epilog(nb)
-    parms = extract_parameters(nb)
+    parms = extract_parameters(nb, lang='python3')
 
     for p in parms:
 
@@ -625,7 +648,7 @@ def concurrent_run(temp_path, nb, nbname, args, cparm=None, cval=None,
     suffix = flatten_list(cval)
 
     # first convert the notebook
-    parms = extract_parameters(nb)
+    parms = extract_parameters(nb, lang='python3')
 
     if has_parm(parms, "cluster_profile"):
         cluster_profile = f"{args['cluster_profile']}_{suffix}"
@@ -635,7 +658,7 @@ def concurrent_run(temp_path, nb, nbname, args, cparm=None, cval=None,
 
     params = parameter_values(parms, **args)
     params = parameter_values(params, cluster_profile=cluster_profile)
-    new_nb = replace_definitions(nb, params, execute=False)
+    new_nb = replace_definitions(nb, params, execute=False, lang='python3')
     if not show_title:
         first_markdown_cell(new_nb).source = ''
     set_figure_format(new_nb, args["vector_figs"])
@@ -794,7 +817,7 @@ def run():
     if ext_func is not None:
         extend_params(nb, ext_func)
 
-    parms = extract_parameters(nb)
+    parms = extract_parameters(nb, lang='python3')
 
     title, author, version = extract_title_author_version(nb)
 
diff --git a/xfel_calibrate/finalize.py b/xfel_calibrate/finalize.py
index f3b3ae552d161882305087bcae42f70b8f7db64f..9f3fd19d516c83e1df35921cad2b319e5a964f85 100644
--- a/xfel_calibrate/finalize.py
+++ b/xfel_calibrate/finalize.py
@@ -9,6 +9,7 @@ from shutil import copy, copytree, move, rmtree
 from subprocess import CalledProcessError, check_call, check_output
 from textwrap import dedent
 from time import sleep
+from typing import Dict, List
 
 import cal_tools.tools
 import tabulate
@@ -127,37 +128,47 @@ def prepare_plots(run_path, threshold=1000000):
                        shell=False)
 
 
-def make_timing_summary(run_path, joblist, request_time, submission_time):
+def get_job_info(jobs: List[str], fmt: List[str]) -> List[List[str]]:
+    """Returns list of job information from sacct
+
+    :param jobs: List of job names
+    :param fmt: List of fields to query for each job (passed to sacct)
+    Result ordered according to order of jobs given
+    Order of fields in inner lists follows fmt
+    """
+
+    # will use JobID to match results to jobs (duplicate field in fmt is OK)
+    fmt_query = ",".join(["JobID"] + fmt)
+    sacct_out = check_output(["sacct", "--truncate", "--parsable2", "--noheader",
+                              f"--jobs={','.join(jobs)}",
+                              f"--format={fmt_query}"])
+    lines = sacct_out.decode().split("\n")
+
+    missing_info = ["not-found"] * len(fmt)
+    job_info = {job: missing_info for job in jobs}
+    for line in lines:
+        parts = line.split("|")
+        if parts[0] in job_info:
+            job_info[parts[0]] = parts[1:]
+
+    return [job_info[job] for job in jobs]
+
+
+def make_timing_summary(run_path: Path, job_times: List[List[str]],
+                        job_time_fmt: List[str], pipeline_times: Dict[str, str]):
     """
     Create an rst file with timing summary of executed notebooks
 
     :param run_path: Run path of the slurm job
-    :param joblist: List of slurm jobs
-    :param request_time: Timestamp of notebook request
-    :param submission_time: Timestamp for slurm jobs being submitted
+    :param job_times: List of job information as returned by get_job_info
+    :param job_time_fmt: List of headers to use for job_times
+    :param pipeline_times: Dictionary of pipeline step -> timestamp
     """
     print('Prepare timing summary')
-    run_path = path.abspath(run_path)
-    pars_vals = []
-    pars = 'JobID,Start,End,Elapsed,Suspended,State'
-    pars_name = pars.split(',')
-
-    for job in joblist:
-        out = check_output(['sacct', '-T', '-j', job,
-                            '--format={}'.format(pars)],
-                           shell=False)
-        lines = str(out).split('\\n')
-
-        # loop over output lines, skip first two lines with header.
-        for line in lines[2:]:
-            s = line.split()
-            if len(s) == len(pars_name):
-                pars_vals.append(s)
-                break
 
     tmpl = Template('''
                     Runtime summary
-                    ==============
+                    ===============
 
                     .. math::
                         {% for line in time_table %}
@@ -171,27 +182,24 @@ def make_timing_summary(run_path, joblist, request_time, submission_time):
 
                     ''')
 
-    time_vals = [['Time of Request', request_time],
-                 ['Job submission', submission_time],
-                 ['Report compilation',
-                  datetime.now().strftime('%Y-%m-%dT%H:%M:%S')]]
-
-    with open("{}/timing_summary.rst".format(run_path), "w+") as gfile:
-
-        if len(pars_vals) > 0:
-            job_table = tabulate.tabulate(pars_vals, tablefmt='latex',
-                                      headers=pars_name)
+    time_vals = [
+        ["Time of Request", pipeline_times["request-time"]],
+        ["Job submission", pipeline_times["submission-time"]],
+        ["Report compilation", pipeline_times["report-compilation-time"]],
+    ]
 
-            time_table = tabulate.tabulate(time_vals, tablefmt='latex',
-                                      headers=['Processing step',
-                                               'Timestamp'])
+    with (run_path / "timing_summary.rst").open("w+") as fd:
+        job_table = tabulate.tabulate(job_times, tablefmt="latex",
+                                      headers=job_time_fmt)
+        time_table = tabulate.tabulate(time_vals, tablefmt="latex",
+                                       headers=["Processing step", "Timestamp"])
 
-            gfile.write(dedent(tmpl.render(job_table=job_table.split('\n'),
-                                           time_table=time_table.split('\n'))))
+        fd.write(dedent(tmpl.render(job_table=job_table.split("\n"),
+                                    time_table=time_table.split("\n"))))
 
 
-def make_report(run_path: str, tmp_path: str, out_path: str, project: str,
-                author: str, version: str, report_to: str):
+def make_report(run_path: Path, tmp_path: Path, out_path: Path, project: str,
+                author: str, version: str, report_to: Path):
     """
     Create calibration report (pdf file)
 
@@ -207,11 +215,9 @@ def make_report(run_path: str, tmp_path: str, out_path: str, project: str,
     :param version: Version of the notebook
     :param report_to: report path tailed with report name
     """
-    run_path = path.abspath(run_path)
-    report_path, report_name = path.split(report_to)
 
-    if not report_path:
-        report_path = out_path
+    report_name = report_to.stem
+    report_dir = report_to.parent
 
     try:
         check_call([sys.executable, "-m", "sphinx.cmd.quickstart",
@@ -231,13 +237,14 @@ def make_report(run_path: str, tmp_path: str, out_path: str, project: str,
                         "Generated simple index.rst instead")
 
     # quickbuild went well we need to edit the index.rst and conf.py files
-    module_path = "{}".format(path.abspath(path.dirname(__file__)))
+    module_path = Path(__file__).absolute().parent
+    conf_fn = run_path / "conf.py"
+    tmp_conf_fn = run_path / "conf.py.tmp"
 
-    conf = SourceFileLoader("conf",
-                            "{}/conf.py".format(run_path)).load_module()
+    conf = SourceFileLoader("conf", str(conf_fn)).load_module()
     l_var = [v for v in dir(conf) if not v.startswith('__')]
 
-    with open("{}/conf.py.tmp".format(run_path), "w") as mf:
+    with tmp_conf_fn.open("w") as mf:
         latex_elements = {'extraclassoptions': ',openany, oneside',
                           'preamble': r'\usepackage{longtable}',
                           'maketitle': r'\input{titlepage.tex.txt}'}
@@ -261,29 +268,29 @@ def make_report(run_path: str, tmp_path: str, out_path: str, project: str,
 
             mf.write(tmpl.format(var, v))
 
-    remove("{}/conf.py".format(run_path))
-    move("{}/conf.py.tmp".format(run_path), "{}/conf.py".format(run_path))
+    conf_fn.unlink()
+    move(str(tmp_conf_fn), str(conf_fn))
 
-    direntries = listdir(run_path)
+    direntries = list(run_path.iterdir())
     lead_rstfiles = ['InputParameters.rst', 'timing_summary.rst']
 
     # Order rst files based on the known order(lead_rstfiles).
     # TODO: fix order somewhere else instead of munging filenames
     def sort_key(f):
-        if f in lead_rstfiles:
-            return lead_rstfiles.index(f), f
-        elif "summary" in f.lower():
-            return len(lead_rstfiles), f
-        elif "precorrection" in f.lower():
-            return len(lead_rstfiles) + 1, f
+        if f.name in lead_rstfiles:
+            return lead_rstfiles.index(f.name), f.name
+        elif "summary" in f.name.lower():
+            return len(lead_rstfiles), f.name
+        elif "precorrection" in f.name.lower():
+            return len(lead_rstfiles) + 1, f.name
         else:
-            return len(lead_rstfiles) + 2, f
+            return len(lead_rstfiles) + 2, f.name
     direntries.sort(key=sort_key)
 
     files_to_handle = []
     for entry in direntries:
-        if isfile("{}/{}".format(run_path, entry)):
-            name, ext = splitext("{}".format(entry))
+        if entry.is_file():
+            name, ext = entry.stem, entry.suffix
             if ext == ".rst" and "index" not in name:
                 files_to_handle.append(name.strip())
 
@@ -298,7 +305,7 @@ def make_report(run_path: str, tmp_path: str, out_path: str, project: str,
                            {%- endfor %}
                         ''')
 
-    with open("{}/index.rst".format(run_path), "w+") as mf:
+    with (run_path / "index.rst").open("w+") as mf:
         mf.write(dedent(index_tmp.render(keys=files_to_handle)))
 
     # finally call the make scripts
@@ -313,29 +320,21 @@ def make_report(run_path: str, tmp_path: str, out_path: str, project: str,
               f"can be inspected at: {run_path}")
         return
 
-    print(f"Moving report to final location: {report_path}")
-    makedirs(report_path, exist_ok=True)
-    copy(f'{run_path}/_build/latex/{report_name}.pdf', report_path)
+    print(f"Moving report to final location: {report_dir}")
+    report_dir.mkdir(parents=True, exist_ok=True)
+    copy(run_path / "_build" / "latex" / f"{report_name}.pdf", report_dir)
 
-    temp_dirs = glob(f'{tmp_path}/*/')
     # Remove folders with figures and sphinx files.
-    print(f"Removing directories [{temp_dirs}] in temp folder: {tmp_path}")
-    for dtmp in temp_dirs:
-        rmtree(f'{dtmp}/')
-
-    # Archiving files in slurm_tmp
-    out_path = Path(out_path)
-    tmp_path = Path(tmp_path)
-    metadata = cal_tools.tools.CalibrationMetadata(out_path)
-    # TODO: add runtime summary
-    metadata.save_copy(tmp_path)
+    for tmp_subdir in tmp_path.iterdir():
+        if tmp_subdir.is_dir():
+            print(f"Removing temporary subdir: {tmp_subdir}")
+            rmtree(tmp_subdir)
 
     # Moving temporary files to out-folder after successful execution
-    # This helps in keeping elements needed for re-producibility.
-    print(f"Moving temporary files to final location"
-          f": {report_path}/{os.path.basename(tmp_path)} with name: "
-          f"slurm_out_{report_name}")
-    move(tmp_path, f"{report_path}/slurm_out_{report_name}")
+    # This helps in keeping elements needed for reproducibility.
+    slurm_archive_dir = report_dir / f"slurm_out_{report_name}"
+    print(f"Moving temporary files to final location: {slurm_archive_dir}")
+    move(str(tmp_path), str(slurm_archive_dir))
 
 
 def make_titlepage(sphinx_path, project, data_path, version):
@@ -387,6 +386,8 @@ def tex_escape(text):
 def finalize(joblist, finaljob, run_path, out_path, project, calibration,
              author, version, report_to, data_path='Unknown',
              request_time='', submission_time=''):
+    run_path = Path(run_path)
+    out_path = Path(out_path)
     print("Waiting on jobs to finish: {}".format(joblist))
     while True:
         found_jobs = set()
@@ -400,9 +401,38 @@ def finalize(joblist, finaljob, run_path, out_path, project, calibration,
         sleep(10)
 
     prepare_plots(run_path)
-    make_timing_summary(run_path, joblist + [str(finaljob)], request_time,
-                        submission_time)
+
+    # Archiving files in slurm_tmp
+    joblist.append(str(finaljob))
+    metadata = cal_tools.tools.CalibrationMetadata(out_path)
+    job_time_fmt = 'JobID,Start,End,Elapsed,Suspended,State'.split(',')
+    job_time_summary = get_job_info(joblist, job_time_fmt)
+    pipeline_time_summary = {
+        "request-time": request_time,
+        "submission-time": submission_time,
+        "report-compilation-time": datetime.now().strftime("%Y-%m-%dT%H:%M:%S"),
+    }
+    make_timing_summary(run_path, job_time_summary, job_time_fmt, pipeline_time_summary)
+    metadata.update(
+        {
+            "runtime-summary": {
+                "calibration-jobs": [dict(zip(job_time_fmt, job_info))
+                                     for job_info in job_time_summary],
+                "pipeline-steps": pipeline_time_summary,
+            }
+        }
+    )
+    metadata.save()
+    metadata.save_copy(run_path)
+
     sphinx_path = combine_report(run_path, calibration)
     make_titlepage(sphinx_path, project, data_path, version)
-    make_report(sphinx_path, run_path, out_path, project, author, version,
-                report_to)
+    make_report(
+        Path(sphinx_path),
+        run_path,
+        out_path,
+        project,
+        author,
+        version,
+        Path(report_to),
+    )
diff --git a/xfel_calibrate/notebooks.py b/xfel_calibrate/notebooks.py
index 5019736610c389a37ec238a1801cb83c62bb3a9b..c584b6657c00b97d3a34afdc6c7b94fb26ceddbf 100644
--- a/xfel_calibrate/notebooks.py
+++ b/xfel_calibrate/notebooks.py
@@ -45,7 +45,7 @@ notebooks = {
                             "cluster cores": 8},
         },
         "FF_HISTS": {
-            "notebook": 
+            "notebook":
                 "notebooks/AGIPD/AGIPD_FF_Histogramming.ipynb",
             "concurrency": {"parameter": "modules",
                             "default concurrency": list(range(16)),