Compare revisions

Thomas Kluyver · Thomas Kluyver · Thomas Kluyver · Thomas Kluyver · Thomas Kluyver · Thomas Kluyver
--- a/README.rst
+++ b/README.rst
@@ -50,14 +50,6 @@ Clone from git::
    git clone https://git.xfel.eu/gitlab/detectors/pycalibration.git
    cd pycalibration

-Edit path to the python environment in the bin/activate.sh file. Change::
-
-    export PATH=/home/${USER}/.local/bin:$PATH
-
-to::
-
-    export PATH=/path/to/new/virtual/environment/bin:$PATH
-
 Install the package::

    pip install -r requirements.txt

--- a/bin/activate.sh
+++ b/bin/activate.sh
-source /etc/profile.d/modules.sh
-module load anaconda/3
-module load texlive
-# export path to python environment
-export PATH=/home/${USER}/.local/bin:$PATH
--- a/bin/slurm_calibrate.sh
+++ b/bin/slurm_calibrate.sh
@@ -3,26 +3,18 @@
 # set paths to use
 nb_path=$1
 python_path=$2
-ipython_path=$3
-jupyter_path=$4
-ipcluster_path="${5}"
-activate_path=$6
-uuid=$7
-notebook=$8
-detector=$9
-caltype=${10}
-final=${11}
-finalize=${12}
-cluster_cores=${13}
+ipcluster_profile=$3
+notebook=$4
+detector=$5
+caltype=$6
+final=$7
+finalize=$8
+cluster_cores=$9

 echo "Running with the following parameters:"
 echo "Notebook path: $nb_path"
 echo "Python path: $python_path"
-echo "IPython path: $ipython_path"
-echo "Jupyter path: $jupyter_path"
-echo "IP-Cluster path: $ipcluster_path"
-echo "Environment activate: $activate_path"
-echo "IP-Cluster profile: $uuid"
+echo "IP-Cluster profile: $ipcluster_profile"
 echo "notebook: $notebook"
 echo "detector: $detector"
 echo "caltype: $caltype"
@@ -34,7 +26,9 @@ echo "job ID: $SLURM_JOB_ID"
 export CAL_NOTEBOOK_NAME=$notebook

 # set-up enviroment
-source ${activate_path}
+source /etc/profile.d/modules.sh
+module load anaconda/3
+module load texlive/2019

 echo "Starting influx feeder"
 ./cal_influx_feeder.sh $notebook $detector $caltype > /dev/null 2>&1  &
@@ -43,28 +37,28 @@ echo "Starting influx feeder"
 export MPLBACKEND=AGG

 # start an ip cluster if requested
-if [ "${uuid}" != "NO_CLUSTER" ]
+if [ "${ipcluster_profile}" != "NO_CLUSTER" ]
 then
-    ${ipython_path} profile create ${uuid} --parallel
-    ${ipcluster_path} start --n=${cluster_cores} --profile=${uuid} --daemon &
+    ${python_path} -m IPython profile create ${ipcluster_profile} --parallel
+    ${python_path} -m ipyparallel.cluster start --n=${cluster_cores} --profile=${ipcluster_profile} --daemonize &
    sleep 15
 fi


 echo "Running script"
-${jupyter_path} nbconvert --to rst --ExecutePreprocessor.timeout=36000 --ExecutePreprocessor.allow_errors=True --TemplateExporter.exclude_input=True --execute ${nb_path}
+${python_path} -m nbconvert --to rst --ExecutePreprocessor.timeout=36000 --ExecutePreprocessor.allow_errors=True --TemplateExporter.exclude_input=True --execute ${nb_path}

 # stop the cluster if requested
-if [ "${uuid}" != "NO_CLUSTER" ]
+if [ "${ipcluster_profile}" != "NO_CLUSTER" ]
 then
-    ${ipcluster_path} stop --profile=${uuid}
-    profile_path=`${ipython_path} locate profile ${uuid}`
+    ${python_path} -m ipyparallel.cluster stop --profile=${ipcluster_profile}
+    profile_path=$(${python_path} -m IPython locate profile ${ipcluster_profile})
    echo "Removing cluster profile from: $profile_path"
    rm -rf $profile_path
 fi

 if [ "${final}" == "FINAL" ]
 then
-   ${finalize} $SLURM_JOB_ID
+   ${python_path} ${finalize} $SLURM_JOB_ID
 fi
 killall -9 cal_influx_feeder.sh || true
--- a/cal_tools/cal_tools/agipdlib.py
+++ b/cal_tools/cal_tools/agipdlib.py
+from pathlib import Path
+from typing import Optional, Tuple
+
 import h5py
 import numpy as np
 import sharedmem
@@ -22,21 +25,65 @@ def get_num_cells(fname, loc, module):
        return options[np.argmin(dists)]


-def get_acq_rate(fname, loc, module):
-    with h5py.File(fname, "r") as f:
-        try:
-            pulses = \
-                np.squeeze(f[f"INSTRUMENT/{loc}/DET/{module}CH0:xtdf/image/pulseId"][:2])  # noqa
-            diff = pulses[1] - pulses[0]
-        except:
-            diff = 0
-        options = {8: 0.5, 4: 1.1, 2: 2.2, 1: 4.5}
-        return options.get(diff, None)
+def get_acq_rate(fast_paths: Tuple[str, str, int],
+                 slow_paths: Optional[Tuple[str, str]] = ('', '')
+                 ) -> Optional[float]:
+    """Get the acquisition rate from said detector module.
+
+    If the data is available from the middlelayer FPGA_COMP device, then it is
+    retrieved from there. If not, the rate is calculated from two different
+    pulses time.
+
+    The first entry is deliberatly not used, as the detector just began
+    operating, and it might have skipped a train.
+
+    :param slow_paths: in which file and h5 path to look for slow data.
+                       The first string is the filename with complete path,
+                       the second string is the key `karabo_id_control`

+    :param fast_paths: in which module file and h5 path to look for pulses.
+                       The first string is the filename with complete path,
+                       the second string is the module device name `karabo_id`,
+                       the third parameter is the module number, used to
+                       navigate through the h5 file structure.

-def get_gain_setting(fname, h5path_ctrl):
+    :return acq_rate: the acquisition rate.
+                      If not found in either files, return None.
    """
-    Return gain setting base on setupr and patternTypeIndex
+    # Attempt to look for acquisition rate in slow data
+    slow_data_file, karabo_id_control = slow_paths
+    slow_data_file = Path(slow_data_file)
+    if slow_data_file.is_file():
+        slow_data_path = f'CONTROL/{karabo_id_control}/MDL/FPGA_COMP/bunchStructure/repetitionRate/value'  # noqa
+        with h5py.File(slow_data_file, "r") as fin:
+            if slow_data_path in fin:
+                # The acquisition rate value is stored in a 1D array of type
+                # float. Use the 3rd value, arbitrarily chosen. It's okay to
+                # loose precision here because the usage is about defining the
+                # rate for meta-data.
+                return round(fin[slow_data_path][3], 1)
+
+    # Compute acquisition rate from fast data
+    fast_data_file, karabo_id, module = fast_paths
+    fast_data_file = Path(fast_data_file)
+    if fast_data_file.is_file():
+        fast_data_path = f'INSTRUMENT/{karabo_id}/DET/{module}CH0:xtdf/image/pulseId'  # noqa
+        with h5py.File(fast_data_file) as fin:
+            if fast_data_path in fin:
+                # pulses is of shape (NNNN, 1), of type uint8.
+                # Squeeze out the data, and subtract the 3rd entry from the 2nd
+                # to get a rate.
+                pulses = np.squeeze(fin[fast_data_path][1:3])
+                diff = pulses[1] - pulses[0]
+                options = {8: 0.5, 4: 1.1, 2: 2.2, 1: 4.5}
+                return options.get(diff, None)
+
+
+def get_gain_setting(fname: str, h5path_ctrl: str) -> int:
+    """
+    If the data is available from the middlelayer FPGA_COMP device, then it is
+    retrieved from there.
+    If not, the setting is calculated off `setupr` and `patternTypeIndex`

    gain-setting 1: setupr@dark=8, setupr@slopespc=40
    gain-setting 0: setupr@dark=0, setupr@slopespc=32
@@ -50,11 +97,18 @@ def get_gain_setting(fname, h5path_ctrl):
    :param h5path_ctrl: path to control information inside the file
    :return: gain setting
    """
-    with h5py.File(fname, "r") as f:
-        train_id = f["INDEX/trainId"][()]
+    gain_path = f'{h5path_ctrl}/gain/value'
+    with h5py.File(fname, "r") as fin:
+        if gain_path in fin:
+            return fin[gain_path][0]
+
+        # Get the index at which the train is not zero.
+        train_id = fin["INDEX/trainId"][()]
        idx = np.nonzero(train_id)[0][0]
-        setupr = f[f'{h5path_ctrl}/setupr/value'][idx]
-        pattern_type_idx = f[f'{h5path_ctrl}/patternTypeIndex/value'][idx]
+
+        setupr = fin[f'{h5path_ctrl}/setupr/value'][idx]
+        pattern_type_idx = fin[f'{h5path_ctrl}/patternTypeIndex/value'][idx]
+
        if (setupr == 0 and pattern_type_idx < 4) or (
                setupr == 32 and pattern_type_idx == 4):
            return 0
@@ -65,6 +119,30 @@ def get_gain_setting(fname, h5path_ctrl):
            raise ValueError('Could not derive gain setting from setupr and patternTypeIndex')  # noqa


+def get_bias_voltage(fname: str, karabo_id_control: str,
+                     module: Optional[int] = 0) -> int:
+    """Read the voltage information from the FPGA device of module 0.
+
+    Different modules may operate at different voltages. In practice, they all
+    operate at the same voltage. As such, it is okay to read a single module's
+    value.
+
+    This value is read from slow data.
+
+    If the file cannot be accessed, an OSError will be raised.
+    If the hdf5 path cannot be accessed, None will be returned.
+
+    :param fname: path to slow data file with control information
+    :param karabo_id: The detector Karabo id, for creating the hdf5 path
+    :param module: defaults to module 0
+    :return: voltage, a uint16
+    """
+    voltage_path = f'/CONTROL/{karabo_id_control}/FPGA/M_{module}/highVoltage/actual/value'  # noqa
+    with h5py.File(fname, "r") as fin:
+        if voltage_path in fin:
+            return fin[voltage_path][0]
+
+
 class AgipdCorrections:

    def __init__(self, max_cells, max_pulses,
@@ -235,24 +313,46 @@ class AgipdCorrections:
        data_path = f'{agipd_base}/image'
        data_dict = self.shared_dict[i_proc]

-        with h5py.File(file_name, 'r') as infile:
-            with h5py.File(ofile_name, 'w') as outfile:
+        image_fields = [
+            'trainId', 'pulseId', 'cellId', 'data', 'gain', 'mask', 'blShift',
+        ]
+        compress_fields = ['gain', 'mask']

-                n_img = data_dict['nImg'][0]
-                if n_img == 0:
-                    return
-                trains = data_dict['trainId'][:n_img]
+        n_img = data_dict['nImg'][0]
+        if n_img == 0:
+            return
+        trains = data_dict['trainId'][:n_img]
+
+        with h5py.File(ofile_name, 'w') as outfile:
+            # Copy any other data from the input file.
+            # This includes indexes, so it's important that the corrected data
+            # we write is aligned with the raw data.
+            with h5py.File(file_name, 'r') as infile:
                self.copy_and_sanitize_non_cal_data(infile, outfile,
                                                    agipd_base,
                                                    idx_base, trains)

-                outfile[data_path]['data'] = data_dict['data'][:n_img]
-                outfile[data_path]['gain'] = data_dict['gain'][:n_img]
-                outfile[data_path]['blShift'] = data_dict['blShift'][:n_img]
-                outfile[data_path]['mask'] = data_dict['mask'][:n_img]
-                outfile[data_path]['cellId'] = data_dict['cellId'][:n_img]
-                outfile[data_path]['pulseId'] = data_dict['pulseId'][:n_img]
-                outfile[data_path]['trainId'] = data_dict['trainId'][:n_img]
+            # All corrected data goes in a /INSTRUMENT/.../image group
+            image_grp = outfile[data_path]
+
+            # Set up all the datasets before filling them. This puts the
+            # metadata about the datasets together at the start of the file,
+            # so it's efficient to examine the file structure.
+            for field in image_fields:
+                arr = data_dict[field][:n_img]
+                kw = {'fletcher32': True}
+                if field in compress_fields:
+                    kw.update(compression='gzip', compression_opts=1, shuffle=True)
+                if arr.ndim > 1:
+                    kw['chunks'] = (1,) + arr.shape[1:]  # 1 chunk = 1 image
+
+                image_grp.create_dataset(
+                    field, shape=arr.shape, dtype=arr.dtype, **kw
+                )
+
+            # Write the corrected data
+            for field in image_fields:
+                image_grp[field][:] = data_dict[field][:n_img]

    def cm_correction(self, i_proc, asic):
        """
@@ -1000,14 +1100,14 @@ class AgipdCorrections:
        for i in range(n_cores_files):
            self.shared_dict.append({})
            self.shared_dict[i]['cellId'] = sharedmem.empty(shape[0],
-                                                            dtype='i4')
+                                                            dtype='u2')
            self.shared_dict[i]['pulseId'] = sharedmem.empty(shape[0],
-                                                             dtype='i4')
+                                                             dtype='u8')
            self.shared_dict[i]['trainId'] = sharedmem.empty(shape[0],
-                                                             dtype='i4')
+                                                             dtype='u8')
            self.shared_dict[i]['moduleIdx'] = sharedmem.empty(1, dtype='i4')
            self.shared_dict[i]['nImg'] = sharedmem.empty(1, dtype='i4')
-            self.shared_dict[i]['mask'] = sharedmem.empty(shape, dtype='i4')
+            self.shared_dict[i]['mask'] = sharedmem.empty(shape, dtype='u4')
            self.shared_dict[i]['data'] = sharedmem.empty(shape, dtype='f4')
            self.shared_dict[i]['rawgain'] = sharedmem.empty(shape, dtype='u2')
            self.shared_dict[i]['gain'] = sharedmem.empty(shape, dtype='u1')

--- a/cal_tools/cal_tools/metrology.py
+++ b/cal_tools/cal_tools/metrology.py
@@ -6,7 +6,7 @@ from matplotlib import pylab as plt

 def getModulePosition(metrologyFile, moduleId):
    """Position (in mm) of a module relative to the top left 
-    corner of it's quadrant. In case of tile-level positions,
+    corner of its quadrant. In case of tile-level positions,
    the the position refers to the center of the top left 
    pixel.
    
@@ -22,7 +22,7 @@ def getModulePosition(metrologyFile, moduleId):
    -------
    
    ndarray: 
-        (x, y)-Position of the module in it's quadrant
+        (x, y)-Position of the module in its quadrant
    
    Raises
    ------
@@ -52,7 +52,7 @@ def getModulePosition(metrologyFile, moduleId):
    # ['Q1', 'Q1/M1', 'Q1/M1/T01']
    h5Keys = ['/'.join(moduleList[:idx+1]) for idx in range(len(moduleList))]
    
-    # Every module of the detector gives it's position relative to
+    # Every module of the detector gives its position relative to
    # the top left corner of its parent structure. Every position
    # is stored in the positions array
    positions = []
@@ -150,7 +150,7 @@ def plotSupermoduleData(tileData, metrologyPositions, zoom=1., vmin=100., vmax=6
    assert tileData.shape == (16, 32, 128)
    
    # Conversion coefficient, required since
-    # matplotlib does it's business in inches
+    # matplotlib does its business in inches
    mmToInch = 1./25.4 # inch/mm
    
    # Some constants

--- a/cal_tools/cal_tools/plotting.py
+++ b/cal_tools/cal_tools/plotting.py
@@ -9,7 +9,7 @@ def show_overview(d, cell_to_preview, gain_to_preview, out_folder=None, infix=No
    """
    Show an overview
    :param d: A dict with the number of modules and
-        a dict for the constant names and it's data.
+        a dict for the constant names and their data.
    :param cell_to_preview: Integer number of memory cells to preview.
    :param gain_to_preview: Integer number for the gain stages to preview.
    :param out_folder: Output folder for saving the plotted.png image.

--- a/cal_tools/cal_tools/tools.py
+++ b/cal_tools/cal_tools/tools.py
@@ -7,18 +7,20 @@ from os.path import isfile, splitext
 from queue import Queue
 import re
 from time import sleep
+from typing import Optional
 from urllib.parse import urljoin

 import dateutil.parser
-
 import ipykernel
 from metadata_client.metadata_client import MetadataClient
 from notebook.notebookapp import list_running_servers
 import numpy as np
 import requests

-from .mdc_config import MDC_config
 from .ana_tools import save_dict_to_hdf5
+from iCalibrationDB import ConstantMetaData, Versions
+from .mdc_config import MDC_config
+import zmq

 def parse_runs(runs, return_type=str):
    pruns = []
@@ -226,7 +228,9 @@ def get_run_info(proposal, run):
    return resp.json()


-def get_dir_creation_date(directory, run, tsdir=False, verbosity=0):
+def get_dir_creation_date(directory: str, run: int,
+                          tsdir: Optional[bool] = False,
+                          verbosity: Optional[int] = 0):
    """
    Return run starting time from the MDC.
    If not succeeded, return modification time of oldest file.h5
@@ -264,14 +268,39 @@ def get_dir_creation_date(directory, run, tsdir=False, verbosity=0):
            ntries -= 1


-def save_const_to_h5(metadata, out_folder):
+def save_const_to_h5(device, constant, condition, data,
+                    file_loc, creation_time, out_folder):
    """
-    Save constant in h5 file with it's metadata
+    Save constant in h5 file with its metadata
    (e.g. db_module, condition, creation_time)

-    :param metadata: Metadata
+    :param device: Instance of detector
+    :type device: iCalibrationDB.detectors object
+    :param constant: Calibration constant known for given detector
+    :type constant: iCalibrationDB.know_constants object
+    :param condition: Calibration condition
+    :type condition: iCalibrationDB.know_detector_conditions object
+    :param data: Constant data to save 
+    :type data: ndarray
+    :param file_loc: Location of raw data "proposal:{} runs:{} {} {}"
+    :type file_loc: str
+    :param creation_time: creation_time for the saved constant
+    :type creation_time: datetime object
    :param out_folder: path to output folder
+    :type out_folder: str
    """
+    metadata = ConstantMetaData()
+    metadata.calibration_constant = constant
+    metadata.detector_condition = condition
+    if creation_time is None:
+        metadata.calibration_constant_version = Versions.Now(
+            device=device)
+    else:
+        metadata.calibration_constant_version = Versions.Timespan(
+            device=device,
+            start=creation_time)
+
+    metadata.calibration_constant_version.raw_data_location = file_loc

    dpar = {}
    for parm in metadata.detector_condition.parameters:
@@ -289,15 +318,16 @@ def save_const_to_h5(metadata, out_folder):
    data_to_store['condition'] = dpar
    data_to_store['db_module'] = db_module
    data_to_store['constant'] = constant_name
-    data_to_store['data'] = metadata.calibration_constant.data
+    data_to_store['data'] = data
    data_to_store['creation_time'] = creation_time
    data_to_store['file_loc'] = raw_data

-    ofile = "{}/const_{}_{}.h5".format(out_folder, constant_name, db_module)
+    ofile = f"{out_folder}/const_{constant_name}_{db_module}.h5"
    if isfile(ofile):
-        print('File {} already exists and will be overwritten'.format(ofile))
+        print(f'File {ofile} already exists and will be overwritten')
    save_dict_to_hdf5(data_to_store, ofile)

+    return metadata

 def get_random_db_interface(cal_db_interface):
    """
@@ -337,8 +367,6 @@ def get_from_db(device, constant, condition, empty_constant,
    :param doraise: (bool) if True raise errors during communication with DB.
    :return: Calibration constant, metadata
    """
-    from iCalibrationDB import ConstantMetaData, Versions
-    import zmq

    if version_info:
        meta_only = False
@@ -384,7 +412,6 @@ def get_from_db(device, constant, condition, empty_constant,
                if ntries == 0 and doraise:
                    raise RuntimeError(f'{e}')

-
        if ntries > 0:
            if verbosity > 0:
                if constant.name not in already_printed or verbosity > 1:
@@ -401,26 +428,33 @@ def get_from_db(device, constant, condition, empty_constant,

 def send_to_db(device, constant, condition, file_loc,
               cal_db_interface, creation_time=None,
-               verbosity=1, timeout=30000, ntries=7, doraise=False):
+               timeout=30000, ntries=7, doraise=False):
    """
    Return calibration constants and metadata requested from CalDB

    :param device: Instance of detector
+    :type device: iCalibrationDB.detectors object
    :param constant: Calibration constant known for given detector
+    :type constant: iCalibrationDB.know_constants object
    :param condition: Calibration condition
+    :type condition: iCalibrationDB.know_detector_conditions object
    :param file_loc: Location of raw data.
+    :type file_loc: str
    :param cal_db_interface: Interface string, e.g. "tcp://max-exfl016:8015"
+    :type cal_db_interface: str
    :param creation_time: Latest time for constant to be created
-    :param verbosity: Level of verbosity (0 - silent)
+    :type creation_time: datetime object
    :param timeout: Timeout for zmq request
+    :type timeout: int
    :param ntries: number of tries to contact the database,
    ntries is set to 7 so that if the timeout started at 30s last timeout
    will be ~ 1h.
-    :param doraise: (bool) if True raise errors during communication with DB
+    :type ntries: int
+    :param doraise: if True raise errors during communication with DB
+    :type doraise: bool
    """
-    from iCalibrationDB import ConstantMetaData, Versions
-    import zmq

+    success = False
    if device:
        metadata = ConstantMetaData()
        metadata.calibration_constant = constant
@@ -440,6 +474,7 @@ def send_to_db(device, constant, condition, file_loc,
            this_interface = get_random_db_interface(cal_db_interface)
            try:
                r = metadata.send(this_interface, timeout=timeout)
+                success = True
                break
            except zmq.error.Again:
                ntries -= 1
@@ -448,8 +483,13 @@ def send_to_db(device, constant, condition, file_loc,
                if ntries == 0 and doraise:
                    raise
            except Exception as e:
-                if verbosity > 0:
-                    print(e)
+                if "has already been taken" in str(e):
+                    print(f"WARNING: {constant.name} has already been "
+                          "injected with the same parameter "
+                          "conditions\n")
+                else:
+                  print(f"{e}\n")
+
                if 'missing_token' in str(e):
                    ntries -= 1
                else:
@@ -457,14 +497,10 @@ def send_to_db(device, constant, condition, file_loc,
                if ntries == 0 and doraise:
                    raise RuntimeError(f'{e}')

-        if ntries > 0:
-            if verbosity > 0:
-                if constant.name not in already_printed or verbosity > 1:
-                    already_printed[constant.name] = True
-                    begin_at = metadata.calibration_constant_version.begin_at
-                    print("{} was sent on: {}".format(constant.name,
-                                                      begin_at))
-
+        if success:
+            print(f"{constant.name} is injected with creation-time: "
+                  f"{metadata.calibration_constant_version.begin_at}\n")
+    return metadata

 def get_constant_from_db(device, constant, condition, empty_constant,
                         cal_db_interface, creation_time=None,

--- a/cal_tools/setup.py
+++ b/cal_tools/setup.py
-from setuptools import setup
-
-setup(
-    name='calTools',
-    version='0.1',
-    packages=['cal_tools',],
-    url='',
-    license='(c) European XFEL GmbH',
-    author='Steffen Hauf',
-    author_email='steffen.hauf@xfel.eu',
-    description=''
-)
-
--- a/docs/source/configuration.rst
+++ b/docs/source/configuration.rst
@@ -20,18 +20,6 @@ python file of the form::
    # Path to use for calling Python. If the environment is correctly set, simply the command
    python_path = "python"

-    # Path to use for calling iPython. If the environment is correctly set, simply the command
-    ipython_path = "ipython"
-
-    # Path to use for calling Jupyter. If the environment is correctly set, simply the command
-    jupyter_path = "jupyter"
-
-    # Path to find the Karabo activate script at, set to an empty string if not running in a Karabo environemnt
-    karabo_activate_path = "/opt/karabo/activate"
-
-    # Path to use for calling ipcluster. If the environment is correctly set, simply the command
-    ipcluster_path = "/opt/karabo/extern/bin/ipcluster"
-
    # Path to store reports in
    report_path = "{}/calibration_reports/".format(os.getcwd())


--- a/docs/source/tutorial.rst
+++ b/docs/source/tutorial.rst
@@ -61,9 +61,6 @@ to install the necessary packages and setup the environment:
     cd pycalibration
     pip install -r requirements.txt .

-5. Adjust xfel_calibrate/settings.py by changing "karabo_activate_path"
-   and "ipcluster_path according" to where you installed karabo.
-

 Create your own notebook
 ------------------------

--- a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
+++ b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
 %% Cell type:markdown id: tags:

 # AGIPD Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
-cluster_profile = "noDB"
 in_folder = "/gpfs/exfel/exp/MID/202030/p900137/raw" # the folder to read data from, required
 out_folder = "/gpfs/exfel/exp/MID/202030/p900137/scratch/karnem/r449_v06"  # the folder to output to, required
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 run = 449 # runs to process, required

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information
 karabo_id_control = "MID_IRU_AGIPD1M1" # karabo-id for control device
 karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milli seconds
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants

 max_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300 # Bias voltage
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
 photon_energy = 9.2 # photon energy in keV
 overwrite = True # set to True if existing data should be overwritten
 max_pulses = [0, 500, 1] # range list [st, end, step] of maximum pulse indices within a train. 3 allowed maximum list input elements.
 mem_cells_db = 0 # set to a value different than 0 to use this value for DB queries
 cell_id_preview = 1 # cell Id used for preview in single-shot plots

 # Correction parameters
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
 chunk_size_idim = 1  # chunking size of imaging dimension, adjust if user software is sensitive to this.
 hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
 mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
 noisy_adc_threshold = 0.25 # threshold to mask complete adc
 cm_dark_fraction = 0.66 # threshold for empty pixels to consider module enough dark to perform CM correction
 cm_n_itr = 4 # number of iterations for common mode correction

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 zero_nans = False # set NaN values in corrected data to 0
 zero_orange = False # set to 0 very negative and very large values in corrected data
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
 corr_asic_diag = False # if set, diagonal drop offs on ASICs are correted
 force_hg_if_below = True # set high gain if mg offset subtracted value is below hg_hard_threshold
 force_mg_if_below = True # set medium gain if mg offset subtracted value is below mg_hard_threshold
 mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
 common_mode = True # Common mode correction
 melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
 mask_zero_std = False # Mask pixels with zero standard deviation across train
 low_medium_gap = True # 5% separation in thresholding between low and medium gain

 # Paralellization parameters
 sequences_per_node = 2 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
 chunk_size = 1000 # Size of chunk for image-weise correction
 n_cores_correct = 16 # Number of chunks to be processed in parallel
 n_cores_files = 4 # Number of files to be processed in parallel

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import copy
 from datetime import timedelta
 from dateutil import parser
 import gc
 import glob
 import itertools
 from IPython.display import HTML, display, Markdown, Latex
 import math
 from multiprocessing import Pool
 import os
 import re
 import sys
 import traceback
 from time import time, sleep, perf_counter
 import tabulate
 import warnings
 warnings.filterwarnings('ignore')
 import yaml

 from extra_geom import AGIPD_1MGeometry
 from extra_data import RunDirectory, stack_detector_data
 from iCalibrationDB import Detectors
 from mpl_toolkits.mplot3d import Axes3D
 from matplotlib.ticker import LinearLocator, FormatStrFormatter
 from matplotlib.colors import LogNorm
 from matplotlib import cm as colormap
 import matplotlib.pyplot as plt
 import matplotlib
 matplotlib.use("agg")
 %matplotlib inline
 import numpy as np

 from cal_tools.agipdlib import (AgipdCorrections, get_num_cells, get_acq_rate, get_gain_setting)
 from cal_tools.cython import agipdalgs as calgs
 from cal_tools.ana_tools import get_range
 from cal_tools.enums import BadPixels
 from cal_tools.tools import get_dir_creation_date, map_modules_from_folder
 from cal_tools.step_timing import StepTimer
 ```

 %% Cell type:markdown id: tags:

 ## Evaluated parameters ##

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the herarichy and dependability for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["zero_nans"] = zero_nans
    corr_bools["zero_orange"] = zero_orange
    corr_bools["mask_noisy_adc"] = mask_noisy_adc
    corr_bools["force_hg_if_below"] = force_hg_if_below
    corr_bools["force_mg_if_below"] = force_mg_if_below
    corr_bools["common_mode"] = common_mode
    corr_bools["melt_snow"] = melt_snow
    corr_bools["mask_zero_std"] = mask_zero_std
    corr_bools["low_medium_gap"] = low_medium_gap

 ```

 %% Cell type:code id: tags:

 ``` python
 if in_folder[-1] == "/":
    in_folder = in_folder[:-1]
 if sequences[0] == -1:
    sequences = None

 control_fname = f'{in_folder}/r{run:04d}/RAW-R{run:04d}-{karabo_da_control}-S00000.h5'
 h5path_ctrl = h5path_ctrl.format(karabo_id_control)
 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)

 print(f'Path to control file {control_fname}')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create output folder
 os.makedirs(out_folder, exist_ok=overwrite)

 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
 else:
    dinstance = "AGIPD1M2"

 # Evaluate requested modules
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(16))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 def mod_name(modno):
    return f"Q{modno // 4 + 1}M{modno % 4 + 1}"

 print("Process modules: ", ', '.join(
    [mod_name(x) for x in modules]))
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Display Information about the selected pulses indices for correction.
 pulses_lst = list(range(*max_pulses)) if not (len(max_pulses)==1 and max_pulses[0]==0) else max_pulses

 try:
    if len(pulses_lst) > 1:
        print("A range of {} pulse indices is selected: from {} to {} with a step of {}"
               .format(len(pulses_lst), pulses_lst[0] , pulses_lst[-1] + (pulses_lst[1] - pulses_lst[0]),
                       pulses_lst[1] - pulses_lst[0]))
    else:
        print("one pulse is selected: a pulse of idx {}".format(pulses_lst[0]))
 except Exception as e:
    raise ValueError('max_pulses input Error: {}'.format(e))
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mmf = map_modules_from_folder(in_folder, run, path_template,
                              karabo_da, sequences)
 mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf
 file_list = []

 # ToDo: Split table over pages
 print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
 table = []
 ti = 0
 for k, files in mapped_files.items():
    i = 0
    for f in list(files.queue):
        file_list.append(f)
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["#", "module", "# module", "file"])))
 file_list = sorted(file_list, key=lambda name: name[-10:])
 ```

 %% Cell type:code id: tags:

 ``` python
 filename = file_list[0]
 channel = int(re.findall(r".*-AGIPD([0-9]+)-.*", filename)[0])

 # Evaluate number of memory cells
 mem_cells = get_num_cells(filename, karabo_id, channel)
 if mem_cells is None:
    raise ValueError(f"No raw images found in {filename}")

 mem_cells_db = mem_cells if mem_cells_db == 0 else mem_cells_db
 max_cells = mem_cells if max_cells == 0 else max_cells

 # Evaluate aquisition rate
 if acq_rate == 0:
-    acq_rate = get_acq_rate(filename, karabo_id, channel)
+    acq_rate = get_acq_rate((filename, karabo_id, channel))
 else:
    acq_rate = None

 print(f"Maximum memory cells to calibrate: {max_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate creation time
 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    offset = parser.parse(creation_date_offset)
    delta = timedelta(hours=offset.hour,
                      minutes=offset.minute, seconds=offset.second)
    creation_time += delta

 # Evaluate gain setting
 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
        except Exception as e:
            print(f'Error while reading gain setting from: \n{control_fname}')
            print(e)
            print("Set gain settion to 0")
            gain_setting = 0

 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Using {creation_time} as creation time")
 print(f"Operating conditions are:\n• Bias voltage: {bias_voltage}\n• Memory cells: {mem_cells_db}\n"
              f"• Acquisition rate: {acq_rate}\n• Gain setting: {gain_setting}\n• Photon Energy: {photon_energy}\n")
 ```

 %% Cell type:markdown id: tags:

 ## Data processing ##

 %% Cell type:code id: tags:

 ``` python
 agipd_corr = AgipdCorrections(max_cells, max_pulses,
                              h5_data_path=h5path,
                              h5_index_path=h5path_idx,
                              corr_bools=corr_bools)

 agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
 agipd_corr.hg_hard_threshold = hg_hard_threshold
 agipd_corr.mg_hard_threshold = mg_hard_threshold

 agipd_corr.cm_dark_fraction = cm_dark_fraction
 agipd_corr.cm_n_itr = cm_n_itr
 agipd_corr.noisy_adc_threshold = noisy_adc_threshold
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve calibration constants to RAM
 agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))

 const_yaml = None
 if os.path.isfile(f'{out_folder}/retrieved_constants.yml'):
    with open(f'{out_folder}/retrieved_constants.yml', "r") as f:
        const_yaml = yaml.load(f.read(), Loader=yaml.FullLoader)

 # retrive constants
 def retrieve_constants(mod):
    """
    Retrieve calibration constants and load them to shared memory

    Metadata for constants is taken from yml file or retrieved from the DB
    """
    device = getattr(getattr(Detectors, dinstance), mod_name(mod))
    err = ''
    try:
        # check if there is a yaml file in out_folder that has the device constants.
        if const_yaml and device.device_name in const_yaml:
            when = agipd_corr.initialize_from_yaml(const_yaml, mod, device)
        else:
            when = agipd_corr.initialize_from_db(cal_db_interface, creation_time, mem_cells_db, bias_voltage,
                                                 photon_energy, gain_setting, acq_rate, mod, device, False)
    except Exception as e:
        err = f"Error: {e}\nError traceback: {traceback.format_exc()}"
        when = None
    return err, mod, when, device.device_name


 ts = perf_counter()
 with Pool(processes=16) as pool:
    const_out = pool.map(retrieve_constants, modules)
 print(f"Constants were loaded in {perf_counter()-ts:.01f}s")
 ```

 %% Cell type:code id: tags:

 ``` python
 # allocate memory for images and hists
 n_images_max = max_cells*256
 data_shape = (n_images_max, 512, 128)
 agipd_corr.allocate_images(data_shape, n_cores_files)
 ```

 %% Cell type:code id: tags:

 ``` python
 def batches(l, batch_size):
    """Group a list into batches of (up to) batch_size elements"""
    start = 0
    while start < len(l):
        yield l[start:start + batch_size]
        start += batch_size
 ```

 %% Cell type:code id: tags:

 ``` python
 def imagewise_chunks(img_counts):
    """Break up the loaded data into chunks of up to chunk_size

    Yields (file data slot, start index, stop index)
    """
    for i_proc, n_img in enumerate(img_counts):
        n_chunks = math.ceil(n_img / chunk_size)
        for i in range(n_chunks):
            yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id: tags:

 ``` python
 with Pool() as pool:
    for file_batch in batches(file_list, n_cores_files):
        # TODO: Move some printed output to logging or similar
        print(f"Processing next {len(file_batch)} files:")
        for file_name in file_batch:
            print(" ", file_name)
        step_timer.start()

        img_counts = pool.starmap(agipd_corr.read_file, enumerate(file_batch))
        step_timer.done_step('Load')

        # Evaluate zero-data-std mask
        pool.starmap(agipd_corr.mask_zero_std, itertools.product(
            range(len(file_batch)), np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
        ))
        step_timer.done_step('Mask 0 std')

        # Perform image-wise correction
        pool.starmap(agipd_corr.correct_agipd, imagewise_chunks(img_counts))
        step_timer.done_step("Image-wise correction")

        # Perform cross-file correction parallel over asics
        pool.starmap(agipd_corr.cm_correction, itertools.product(
            range(len(file_batch)), range(16)  # 16 ASICs per module
        ))
        step_timer.done_step("Common-mode correction")


        # Save corrected data
        pool.starmap(agipd_corr.write_file, [
            (i_proc, file_name, os.path.join(out_folder, os.path.basename(file_name).replace("RAW", "CORR")))
            for i_proc, file_name in enumerate(file_batch)
        ])
        step_timer.done_step("Save")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Correction of {len(file_list)} files is finished")
 print(f"Total processing time {step_timer.timespan():.01f} s")
 print(f"Timing summary per batch of {n_cores_files} files:")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 # if there is a yml file that means a leading notebook got processed
 # and the reporting would be generated from it.
 fst_print = True

 to_store = []
 line = []
 for i, (error, modno, when, mod_dev) in enumerate(const_out):
    qm = mod_name(modno)
    # expose errors while applying correction
    if error:
        print("Error: {}".format(error) )

    if not const_yaml or mod_dev not in const_yaml:
        if fst_print:
            print("Constants are retrieved with creation time: ")
            fst_print = False

        line = [qm]

        # If correction is crashed
        if not error:
            print(f"{qm}:")
            for key, item in when.items():
                if hasattr(item, 'strftime'):
                    item = item.strftime('%y-%m-%d %H:%M')
                when[key] = item
                print('{:.<12s}'.format(key), item)

        # Store few time stamps if exists
        # Add NA to keep array structure
        for key in ['Offset', 'SlopesPC', 'SlopesFF']:
            if when and key in when and when[key]:
                line.append(when[key])
            else:
                if error is not None:
                    line.append('Err')
                else:
                    line.append('NA')

        if len(line) > 0:
            to_store.append(line)

 seq = sequences[0] if sequences else 0

 if len(to_store) > 0:
    with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fyml:
        yaml.safe_dump({"time-summary": {f"S{seq}":to_store}}, fyml)
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.gca(projection='3d')

    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)
    Z = data.T

    # Plot the surface.
    surf = ax.plot_surface(X, Y, Z, cmap=colormap.coolwarm,
                           linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")


 def do_2d_plot(data, edges, y_axis, x_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),
              np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
                   norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 def get_trains_data(run_folder, source, include, tid=None, path='*/DET/*'):
    """
    Load single train for all module

    :param run_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param tid: Train Id to be loaded. First train is considered if None is given
    :param path: Path to find image data inside h5 file

    """
    run_data = RunDirectory(run_folder, include)
    if tid:
        tid, data = run_data.select('*/DET/*', source).train_from_id(tid)
        return tid, stack_detector_data(data, source)
    else:
        for tid, data in run_data.select('*/DET/*', source).trains(require_all=True):
            return tid, stack_detector_data(data, source)
    return None, None
 ```

 %% Cell type:code id: tags:

 ``` python
 geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
    (-525, 625),
    (-550, -10),
    (520, -160),
    (542.5, 475),
 ])
 ```

 %% Cell type:code id: tags:

 ``` python
 include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
 tid, corrected = get_trains_data(f'{out_folder}/', 'image.data', include)
 _, gains = get_trains_data(f'{out_folder}/', 'image.gain', include, tid)
 _, mask = get_trains_data(f'{out_folder}/', 'image.mask', include, tid)
 _, blshift = get_trains_data(f'{out_folder}/', 'image.blShift', include, tid)
 _, cellId = get_trains_data(f'{out_folder}/', 'image.cellId', include, tid)
 _, pulseId = get_trains_data(f'{out_folder}/', 'image.pulseId', include, tid)
 _, raw = get_trains_data(f'{in_folder}/r{run:04d}/', 'image.data', include, tid)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Analogue Gain ###

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(raw[:,0,...].flatten().astype(np.float32),
                                         raw[:,1,...].flatten().astype(np.float32),
                                         bins=(100, 100),
                                         range=[[4000, 8192], [4000, 8192]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Digitized Gain ###

 The following plot shows plots signal vs. digitized gain

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(corrected.flatten().astype(np.float32),
                                         gains.flatten().astype(np.float32), bins=(100, 3),
                                         range=[[-50, 8192], [0, 3]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Gain statistics in %")
 table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
          f'{gains[gains==1].size/gains.size*100:.03f}',
          f'{gains[gains==2].size/gains.size*100:.03f}']]
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["High", "Medium", "Low"])))
 ```

 %% Cell type:markdown id: tags:

 ### Intensity per Pulse ###

 %% Cell type:code id: tags:

 ``` python
 pulse_range = [np.min(pulseId[pulseId>=0]), np.max(pulseId[pulseId>=0])]

 mean_data = np.nanmean(corrected, axis=(2, 3))
 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100, int(pulse_range[1])),
                                      range=[[-50, 1000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")

 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100,  int(pulse_range[1])),
                                      range=[[-50, 200000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 ```

 %% Cell type:markdown id: tags:

 ### Baseline shift ###

 Estimated base-line shift with respect to the total ADU counts of corrected image.

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(blshift.flatten(), bins=100, log=True)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 10))
 corrected_ave = np.nansum(corrected, axis=(2, 3))
 plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)

 plt.xlabel('Illuminated corrected [MADU] ')
 _ = plt.ylabel('Estimated baseline shift [ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Raw preview ###\n'))
 display(Markdown(f'Mean over images of the RAW data\n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 data = np.mean(raw[:, 0, ...], axis=0)
 vmin, vmax = get_range(data, 5)
 ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'Single shot of the RAW data from cell {np.max(cellId[cell_id_preview])} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(raw[cell_id_preview, 0, ...], 5)
 ax = geom.plot_data_fast(raw[cell_id_preview, 0, ...], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Corrected preview ###\n'))
 display(Markdown(f'A single shot image from cell {np.max(cellId[cell_id_preview])} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_id_preview], 7)
 ax = geom.plot_data_fast(corrected[cell_id_preview], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(corrected[cell_id_preview].flatten(), bins=1000, range=(-50, 2000), log=True)
 _ = plt.xlabel('[ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Mean CORRECTED Preview ###\n'))
 display(Markdown(f'A mean across one train \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 data = np.mean(corrected, axis=0)
 vmin, vmax = get_range(data, 5)
 ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(corrected.flatten(), bins=1200,
            range=(-200, 20000), histtype='step', log=True, label = 'All')
 _ = ax.hist(corrected[gains == 0].flatten(), bins=1200, range=(-200, 20000),
            alpha=0.5, log=True, label='High gain', color='green')
 _ = ax.hist(corrected[gains == 1].flatten(), bins=1200, range=(-200, 20000),
            alpha=0.5, log=True, label='Medium gain', color='red')
 _ = ax.hist(corrected[gains == 2].flatten(), bins=1200,
            range=(-200, 20000), alpha=0.5, log=True, label='Low gain', color='yellow')
 _ = ax.legend()
 _ = plt.xlabel('[ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Maximum GAIN Preview ###\n'))
 display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.max(gains, axis=0), ax=ax,
                         cmap="jet", vmin=-1, vmax=3)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'### Single Shot Bad Pixels ### \n'))
 display(Markdown(f'A single shot bad pixel map from cell {np.max(cellId[cell_id_preview])} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.log2(mask[cell_id_preview]), ax=ax, vmin=0, vmax=32, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train  ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.mean(mask>0, axis=0),
                         vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train. Only Dark Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 cm = np.copy(mask)
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 ax = geom.plot_data_fast(np.mean(cm>0, axis=0),
                         vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 # AGIPD Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
-cluster_profile = "noDB"
 in_folder = "/gpfs/exfel/exp/MID/202030/p900137/raw" # the folder to read data from, required
 out_folder = "/gpfs/exfel/exp/MID/202030/p900137/scratch/karnem/r449_v06"  # the folder to output to, required
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 run = 449 # runs to process, required

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information
 karabo_id_control = "MID_IRU_AGIPD1M1" # karabo-id for control device
 karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milli seconds
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants

 max_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300 # Bias voltage
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
 photon_energy = 9.2 # photon energy in keV
 overwrite = True # set to True if existing data should be overwritten
 max_pulses = [0, 500, 1] # range list [st, end, step] of maximum pulse indices within a train. 3 allowed maximum list input elements.
 mem_cells_db = 0 # set to a value different than 0 to use this value for DB queries
 cell_id_preview = 1 # cell Id used for preview in single-shot plots

 # Correction parameters
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
 chunk_size_idim = 1  # chunking size of imaging dimension, adjust if user software is sensitive to this.
 hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
 mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
 noisy_adc_threshold = 0.25 # threshold to mask complete adc
 cm_dark_fraction = 0.66 # threshold for empty pixels to consider module enough dark to perform CM correction
 cm_n_itr = 4 # number of iterations for common mode correction

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 zero_nans = False # set NaN values in corrected data to 0
 zero_orange = False # set to 0 very negative and very large values in corrected data
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
 corr_asic_diag = False # if set, diagonal drop offs on ASICs are correted
 force_hg_if_below = True # set high gain if mg offset subtracted value is below hg_hard_threshold
 force_mg_if_below = True # set medium gain if mg offset subtracted value is below mg_hard_threshold
 mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
 common_mode = True # Common mode correction
 melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
 mask_zero_std = False # Mask pixels with zero standard deviation across train
 low_medium_gap = True # 5% separation in thresholding between low and medium gain

 # Paralellization parameters
 sequences_per_node = 2 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
 chunk_size = 1000 # Size of chunk for image-weise correction
 n_cores_correct = 16 # Number of chunks to be processed in parallel
 n_cores_files = 4 # Number of files to be processed in parallel

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import copy
 from datetime import timedelta
 from dateutil import parser
 import gc
 import glob
 import itertools
 from IPython.display import HTML, display, Markdown, Latex
 import math
 from multiprocessing import Pool
 import os
 import re
 import sys
 import traceback
 from time import time, sleep, perf_counter
 import tabulate
 import warnings
 warnings.filterwarnings('ignore')
 import yaml

 from extra_geom import AGIPD_1MGeometry
 from extra_data import RunDirectory, stack_detector_data
 from iCalibrationDB import Detectors
 from mpl_toolkits.mplot3d import Axes3D
 from matplotlib.ticker import LinearLocator, FormatStrFormatter
 from matplotlib.colors import LogNorm
 from matplotlib import cm as colormap
 import matplotlib.pyplot as plt
 import matplotlib
 matplotlib.use("agg")
 %matplotlib inline
 import numpy as np

 from cal_tools.agipdlib import (AgipdCorrections, get_num_cells, get_acq_rate, get_gain_setting)
 from cal_tools.cython import agipdalgs as calgs
 from cal_tools.ana_tools import get_range
 from cal_tools.enums import BadPixels
 from cal_tools.tools import get_dir_creation_date, map_modules_from_folder
 from cal_tools.step_timing import StepTimer
 ```

 %% Cell type:markdown id: tags:

 ## Evaluated parameters ##

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the herarichy and dependability for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["zero_nans"] = zero_nans
    corr_bools["zero_orange"] = zero_orange
    corr_bools["mask_noisy_adc"] = mask_noisy_adc
    corr_bools["force_hg_if_below"] = force_hg_if_below
    corr_bools["force_mg_if_below"] = force_mg_if_below
    corr_bools["common_mode"] = common_mode
    corr_bools["melt_snow"] = melt_snow
    corr_bools["mask_zero_std"] = mask_zero_std
    corr_bools["low_medium_gap"] = low_medium_gap

 ```

 %% Cell type:code id: tags:

 ``` python
 if in_folder[-1] == "/":
    in_folder = in_folder[:-1]
 if sequences[0] == -1:
    sequences = None

 control_fname = f'{in_folder}/r{run:04d}/RAW-R{run:04d}-{karabo_da_control}-S00000.h5'
 h5path_ctrl = h5path_ctrl.format(karabo_id_control)
 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)

 print(f'Path to control file {control_fname}')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create output folder
 os.makedirs(out_folder, exist_ok=overwrite)

 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
 else:
    dinstance = "AGIPD1M2"

 # Evaluate requested modules
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(16))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 def mod_name(modno):
    return f"Q{modno // 4 + 1}M{modno % 4 + 1}"

 print("Process modules: ", ', '.join(
    [mod_name(x) for x in modules]))
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Display Information about the selected pulses indices for correction.
 pulses_lst = list(range(*max_pulses)) if not (len(max_pulses)==1 and max_pulses[0]==0) else max_pulses

 try:
    if len(pulses_lst) > 1:
        print("A range of {} pulse indices is selected: from {} to {} with a step of {}"
               .format(len(pulses_lst), pulses_lst[0] , pulses_lst[-1] + (pulses_lst[1] - pulses_lst[0]),
                       pulses_lst[1] - pulses_lst[0]))
    else:
        print("one pulse is selected: a pulse of idx {}".format(pulses_lst[0]))
 except Exception as e:
    raise ValueError('max_pulses input Error: {}'.format(e))
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mmf = map_modules_from_folder(in_folder, run, path_template,
                              karabo_da, sequences)
 mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf
 file_list = []

 # ToDo: Split table over pages
 print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
 table = []
 ti = 0
 for k, files in mapped_files.items():
    i = 0
    for f in list(files.queue):
        file_list.append(f)
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["#", "module", "# module", "file"])))
 file_list = sorted(file_list, key=lambda name: name[-10:])
 ```

 %% Cell type:code id: tags:

 ``` python
 filename = file_list[0]
 channel = int(re.findall(r".*-AGIPD([0-9]+)-.*", filename)[0])

 # Evaluate number of memory cells
 mem_cells = get_num_cells(filename, karabo_id, channel)
 if mem_cells is None:
    raise ValueError(f"No raw images found in {filename}")

 mem_cells_db = mem_cells if mem_cells_db == 0 else mem_cells_db
 max_cells = mem_cells if max_cells == 0 else max_cells

 # Evaluate aquisition rate
 if acq_rate == 0:
-    acq_rate = get_acq_rate(filename, karabo_id, channel)
+    acq_rate = get_acq_rate((filename, karabo_id, channel))
 else:
    acq_rate = None

 print(f"Maximum memory cells to calibrate: {max_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate creation time
 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    offset = parser.parse(creation_date_offset)
    delta = timedelta(hours=offset.hour,
                      minutes=offset.minute, seconds=offset.second)
    creation_time += delta

 # Evaluate gain setting
 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
        except Exception as e:
            print(f'Error while reading gain setting from: \n{control_fname}')
            print(e)
            print("Set gain settion to 0")
            gain_setting = 0

 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Using {creation_time} as creation time")
 print(f"Operating conditions are:\n• Bias voltage: {bias_voltage}\n• Memory cells: {mem_cells_db}\n"
              f"• Acquisition rate: {acq_rate}\n• Gain setting: {gain_setting}\n• Photon Energy: {photon_energy}\n")
 ```

 %% Cell type:markdown id: tags:

 ## Data processing ##

 %% Cell type:code id: tags:

 ``` python
 agipd_corr = AgipdCorrections(max_cells, max_pulses,
                              h5_data_path=h5path,
                              h5_index_path=h5path_idx,
                              corr_bools=corr_bools)

 agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
 agipd_corr.hg_hard_threshold = hg_hard_threshold
 agipd_corr.mg_hard_threshold = mg_hard_threshold

 agipd_corr.cm_dark_fraction = cm_dark_fraction
 agipd_corr.cm_n_itr = cm_n_itr
 agipd_corr.noisy_adc_threshold = noisy_adc_threshold
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve calibration constants to RAM
 agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))

 const_yaml = None
 if os.path.isfile(f'{out_folder}/retrieved_constants.yml'):
    with open(f'{out_folder}/retrieved_constants.yml', "r") as f:
        const_yaml = yaml.load(f.read(), Loader=yaml.FullLoader)

 # retrive constants
 def retrieve_constants(mod):
    """
    Retrieve calibration constants and load them to shared memory

    Metadata for constants is taken from yml file or retrieved from the DB
    """
    device = getattr(getattr(Detectors, dinstance), mod_name(mod))
    err = ''
    try:
        # check if there is a yaml file in out_folder that has the device constants.
        if const_yaml and device.device_name in const_yaml:
            when = agipd_corr.initialize_from_yaml(const_yaml, mod, device)
        else:
            when = agipd_corr.initialize_from_db(cal_db_interface, creation_time, mem_cells_db, bias_voltage,
                                                 photon_energy, gain_setting, acq_rate, mod, device, False)
    except Exception as e:
        err = f"Error: {e}\nError traceback: {traceback.format_exc()}"
        when = None
    return err, mod, when, device.device_name


 ts = perf_counter()
 with Pool(processes=16) as pool:
    const_out = pool.map(retrieve_constants, modules)
 print(f"Constants were loaded in {perf_counter()-ts:.01f}s")
 ```

 %% Cell type:code id: tags:

 ``` python
 # allocate memory for images and hists
 n_images_max = max_cells*256
 data_shape = (n_images_max, 512, 128)
 agipd_corr.allocate_images(data_shape, n_cores_files)
 ```

 %% Cell type:code id: tags:

 ``` python
 def batches(l, batch_size):
    """Group a list into batches of (up to) batch_size elements"""
    start = 0
    while start < len(l):
        yield l[start:start + batch_size]
        start += batch_size
 ```

 %% Cell type:code id: tags:

 ``` python
 def imagewise_chunks(img_counts):
    """Break up the loaded data into chunks of up to chunk_size

    Yields (file data slot, start index, stop index)
    """
    for i_proc, n_img in enumerate(img_counts):
        n_chunks = math.ceil(n_img / chunk_size)
        for i in range(n_chunks):
            yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id: tags:

 ``` python
 with Pool() as pool:
    for file_batch in batches(file_list, n_cores_files):
        # TODO: Move some printed output to logging or similar
        print(f"Processing next {len(file_batch)} files:")
        for file_name in file_batch:
            print(" ", file_name)
        step_timer.start()

        img_counts = pool.starmap(agipd_corr.read_file, enumerate(file_batch))
        step_timer.done_step('Load')

        # Evaluate zero-data-std mask
        pool.starmap(agipd_corr.mask_zero_std, itertools.product(
            range(len(file_batch)), np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
        ))
        step_timer.done_step('Mask 0 std')

        # Perform image-wise correction
        pool.starmap(agipd_corr.correct_agipd, imagewise_chunks(img_counts))
        step_timer.done_step("Image-wise correction")

        # Perform cross-file correction parallel over asics
        pool.starmap(agipd_corr.cm_correction, itertools.product(
            range(len(file_batch)), range(16)  # 16 ASICs per module
        ))
        step_timer.done_step("Common-mode correction")


        # Save corrected data
        pool.starmap(agipd_corr.write_file, [
            (i_proc, file_name, os.path.join(out_folder, os.path.basename(file_name).replace("RAW", "CORR")))
            for i_proc, file_name in enumerate(file_batch)
        ])
        step_timer.done_step("Save")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Correction of {len(file_list)} files is finished")
 print(f"Total processing time {step_timer.timespan():.01f} s")
 print(f"Timing summary per batch of {n_cores_files} files:")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 # if there is a yml file that means a leading notebook got processed
 # and the reporting would be generated from it.
 fst_print = True

 to_store = []
 line = []
 for i, (error, modno, when, mod_dev) in enumerate(const_out):
    qm = mod_name(modno)
    # expose errors while applying correction
    if error:
        print("Error: {}".format(error) )

    if not const_yaml or mod_dev not in const_yaml:
        if fst_print:
            print("Constants are retrieved with creation time: ")
            fst_print = False

        line = [qm]

        # If correction is crashed
        if not error:
            print(f"{qm}:")
            for key, item in when.items():
                if hasattr(item, 'strftime'):
                    item = item.strftime('%y-%m-%d %H:%M')
                when[key] = item
                print('{:.<12s}'.format(key), item)

        # Store few time stamps if exists
        # Add NA to keep array structure
        for key in ['Offset', 'SlopesPC', 'SlopesFF']:
            if when and key in when and when[key]:
                line.append(when[key])
            else:
                if error is not None:
                    line.append('Err')
                else:
                    line.append('NA')

        if len(line) > 0:
            to_store.append(line)

 seq = sequences[0] if sequences else 0

 if len(to_store) > 0:
    with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fyml:
        yaml.safe_dump({"time-summary": {f"S{seq}":to_store}}, fyml)
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.gca(projection='3d')

    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)
    Z = data.T

    # Plot the surface.
    surf = ax.plot_surface(X, Y, Z, cmap=colormap.coolwarm,
                           linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")


 def do_2d_plot(data, edges, y_axis, x_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),
              np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
                   norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 def get_trains_data(run_folder, source, include, tid=None, path='*/DET/*'):
    """
    Load single train for all module

    :param run_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param tid: Train Id to be loaded. First train is considered if None is given
    :param path: Path to find image data inside h5 file

    """
    run_data = RunDirectory(run_folder, include)
    if tid:
        tid, data = run_data.select('*/DET/*', source).train_from_id(tid)
        return tid, stack_detector_data(data, source)
    else:
        for tid, data in run_data.select('*/DET/*', source).trains(require_all=True):
            return tid, stack_detector_data(data, source)
    return None, None
 ```

 %% Cell type:code id: tags:

 ``` python
 geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
    (-525, 625),
    (-550, -10),
    (520, -160),
    (542.5, 475),
 ])
 ```

 %% Cell type:code id: tags:

 ``` python
 include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
 tid, corrected = get_trains_data(f'{out_folder}/', 'image.data', include)
 _, gains = get_trains_data(f'{out_folder}/', 'image.gain', include, tid)
 _, mask = get_trains_data(f'{out_folder}/', 'image.mask', include, tid)
 _, blshift = get_trains_data(f'{out_folder}/', 'image.blShift', include, tid)
 _, cellId = get_trains_data(f'{out_folder}/', 'image.cellId', include, tid)
 _, pulseId = get_trains_data(f'{out_folder}/', 'image.pulseId', include, tid)
 _, raw = get_trains_data(f'{in_folder}/r{run:04d}/', 'image.data', include, tid)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Analogue Gain ###

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(raw[:,0,...].flatten().astype(np.float32),
                                         raw[:,1,...].flatten().astype(np.float32),
                                         bins=(100, 100),
                                         range=[[4000, 8192], [4000, 8192]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Digitized Gain ###

 The following plot shows plots signal vs. digitized gain

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(corrected.flatten().astype(np.float32),
                                         gains.flatten().astype(np.float32), bins=(100, 3),
                                         range=[[-50, 8192], [0, 3]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Gain statistics in %")
 table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
          f'{gains[gains==1].size/gains.size*100:.03f}',
          f'{gains[gains==2].size/gains.size*100:.03f}']]
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["High", "Medium", "Low"])))
 ```

 %% Cell type:markdown id: tags:

 ### Intensity per Pulse ###

 %% Cell type:code id: tags:

 ``` python
 pulse_range = [np.min(pulseId[pulseId>=0]), np.max(pulseId[pulseId>=0])]

 mean_data = np.nanmean(corrected, axis=(2, 3))
 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100, int(pulse_range[1])),
                                      range=[[-50, 1000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")

 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100,  int(pulse_range[1])),
                                      range=[[-50, 200000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 ```

 %% Cell type:markdown id: tags:

 ### Baseline shift ###

 Estimated base-line shift with respect to the total ADU counts of corrected image.

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(blshift.flatten(), bins=100, log=True)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 10))
 corrected_ave = np.nansum(corrected, axis=(2, 3))
 plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)

 plt.xlabel('Illuminated corrected [MADU] ')
 _ = plt.ylabel('Estimated baseline shift [ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Raw preview ###\n'))
 display(Markdown(f'Mean over images of the RAW data\n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 data = np.mean(raw[:, 0, ...], axis=0)
 vmin, vmax = get_range(data, 5)
 ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'Single shot of the RAW data from cell {np.max(cellId[cell_id_preview])} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(raw[cell_id_preview, 0, ...], 5)
 ax = geom.plot_data_fast(raw[cell_id_preview, 0, ...], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Corrected preview ###\n'))
 display(Markdown(f'A single shot image from cell {np.max(cellId[cell_id_preview])} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_id_preview], 7)
 ax = geom.plot_data_fast(corrected[cell_id_preview], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(corrected[cell_id_preview].flatten(), bins=1000, range=(-50, 2000), log=True)
 _ = plt.xlabel('[ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Mean CORRECTED Preview ###\n'))
 display(Markdown(f'A mean across one train \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 data = np.mean(corrected, axis=0)
 vmin, vmax = get_range(data, 5)
 ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(corrected.flatten(), bins=1200,
            range=(-200, 20000), histtype='step', log=True, label = 'All')
 _ = ax.hist(corrected[gains == 0].flatten(), bins=1200, range=(-200, 20000),
            alpha=0.5, log=True, label='High gain', color='green')
 _ = ax.hist(corrected[gains == 1].flatten(), bins=1200, range=(-200, 20000),
            alpha=0.5, log=True, label='Medium gain', color='red')
 _ = ax.hist(corrected[gains == 2].flatten(), bins=1200,
            range=(-200, 20000), alpha=0.5, log=True, label='Low gain', color='yellow')
 _ = ax.legend()
 _ = plt.xlabel('[ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Maximum GAIN Preview ###\n'))
 display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.max(gains, axis=0), ax=ax,
                         cmap="jet", vmin=-1, vmax=3)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'### Single Shot Bad Pixels ### \n'))
 display(Markdown(f'A single shot bad pixel map from cell {np.max(cellId[cell_id_preview])} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.log2(mask[cell_id_preview]), ax=ax, vmin=0, vmax=32, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train  ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.mean(mask>0, axis=0),
                         vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train. Only Dark Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 cm = np.copy(mask)
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 ax = geom.plot_data_fast(np.mean(cm>0, axis=0),
                         vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

--- a/notebooks/AGIPD/AGIPD_Correct_and_Verify_Summary_NBC.ipynb
+++ b/notebooks/AGIPD/AGIPD_Correct_and_Verify_Summary_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Summary of the AGIPD offline correction #

 %% Cell type:code id: tags:

 ``` python
-cluster_profile = "noDB" # The ipcluster profile to use
 run = 11 # runs to process, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/AGIPD_Corr" # path to output to, required
 modules = [-1]
 ```

 %% Cell type:code id: tags:

 ``` python
 import os

 import dateutil.parser
 import glob
 import numpy as np
 import re
 import yaml
 import warnings
 warnings.filterwarnings('ignore')

 import matplotlib.pyplot as plt
 %matplotlib inline
 import tabulate
 from IPython.display import display, Markdown, Latex
 ```

 %% Cell type:code id: tags:

 ``` python
 if os.path.isfile(f'{out_folder}/retrieved_constants.yml'):
    with open(f"{out_folder}/retrieved_constants.yml","r") as fyml:
        main_dict = yaml.load(fyml)
 else:
    main_dict = {"time-summary":dict()}

 if modules[0] == -1:
    modules = list(range(16))

 # This is needed only if AGIPD Correction notebook had no precorrection notebooks for retrieving constants
 # gather all generated sequence yml files for time summary of retrieved constant in retrieved_constants.yml
 fnames = sorted(glob.glob(f'{out_folder}/retrieved_constants_*yml'))
 for f in fnames:
    with open(f,"r") as fyml:
        fdict = yaml.load(fyml)
    # append different sequences's time summary to the main yaml
    for k, v in fdict["time-summary"].items():
        main_dict["time-summary"][k] = v
    os.remove(f)

 with open(f"{out_folder}/retrieved_constants.yml","w") as fyml:
        yaml.safe_dump(main_dict, fyml)
 ```

 %% Cell type:code id: tags:

 ``` python
 with open(f"{out_folder}/retrieved_constants.yml","r") as fyml:
    time_summary = yaml.load(fyml)
 # check if pre-notebook has retrieved constants for all modules.
 const_times = []
 seq = []
 for k, v  in sorted(time_summary["time-summary"].items()):
    arr = np.array(v)
    arr = arr.reshape(arr.shape[0]//len(modules), len(modules), arr.shape[1])
    const_times = const_times + list(arr)
    seq.append(k)

 const_times = np.array(const_times)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Function print summary of constant injection time
 # To reduce printouts only unique entries are shown.
 def const_table(const, pos):
    """
    Create a summary table for the creation time differences for
    the retrieved constants (Offset, SlopesPC, SlopesFF).
    """
    print(f"{const} were injected on: ")

    # catch timing difference in retrieve constants
    unique, idx, counts = np.unique(const_times[:,:,pos], return_inverse=True, return_counts=True)
    idx = idx.reshape((const_times.shape[0], len(modules)))

    table = []
    for i in range(0, counts.shape[0]):
        line = [ const_times[:,:,pos][idx==i][0]  ]
        mods = ''
        for i_s, s in enumerate(seq):
            if(const_times[i_s,:,0][idx[i_s]==i].shape[0] > 0):
                mods = mods+ '{}: {}, '.format(s, const_times[i_s,:,0][idx[i_s]==i])
        line.append(mods)
        table.append(line)

    if counts.shape[0] == 1:
        table[0][1] = 'All modules'
    else:
        table[np.argmax(counts)][1] = 'Rest of the modules'

    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                         headers=["Time stamps", "Modules and sequences"])))
 for i_key, key in enumerate(['Offset', 'SlopesPC', 'SlopesFF']):
    if const_times.shape[2] > i_key+1:
        const_table(key, i_key+1)
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:markdown id: tags:

 # Summary of the AGIPD offline correction #

 %% Cell type:code id: tags:

 ``` python
-cluster_profile = "noDB" # The ipcluster profile to use
 run = 11 # runs to process, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/AGIPD_Corr" # path to output to, required
 modules = [-1]
 ```

 %% Cell type:code id: tags:

 ``` python
 import os

 import dateutil.parser
 import glob
 import numpy as np
 import re
 import yaml
 import warnings
 warnings.filterwarnings('ignore')

 import matplotlib.pyplot as plt
 %matplotlib inline
 import tabulate
 from IPython.display import display, Markdown, Latex
 ```

 %% Cell type:code id: tags:

 ``` python
 if os.path.isfile(f'{out_folder}/retrieved_constants.yml'):
    with open(f"{out_folder}/retrieved_constants.yml","r") as fyml:
        main_dict = yaml.load(fyml)
 else:
    main_dict = {"time-summary":dict()}

 if modules[0] == -1:
    modules = list(range(16))

 # This is needed only if AGIPD Correction notebook had no precorrection notebooks for retrieving constants
 # gather all generated sequence yml files for time summary of retrieved constant in retrieved_constants.yml
 fnames = sorted(glob.glob(f'{out_folder}/retrieved_constants_*yml'))
 for f in fnames:
    with open(f,"r") as fyml:
        fdict = yaml.load(fyml)
    # append different sequences's time summary to the main yaml
    for k, v in fdict["time-summary"].items():
        main_dict["time-summary"][k] = v
    os.remove(f)

 with open(f"{out_folder}/retrieved_constants.yml","w") as fyml:
        yaml.safe_dump(main_dict, fyml)
 ```

 %% Cell type:code id: tags:

 ``` python
 with open(f"{out_folder}/retrieved_constants.yml","r") as fyml:
    time_summary = yaml.load(fyml)
 # check if pre-notebook has retrieved constants for all modules.
 const_times = []
 seq = []
 for k, v  in sorted(time_summary["time-summary"].items()):
    arr = np.array(v)
    arr = arr.reshape(arr.shape[0]//len(modules), len(modules), arr.shape[1])
    const_times = const_times + list(arr)
    seq.append(k)

 const_times = np.array(const_times)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Function print summary of constant injection time
 # To reduce printouts only unique entries are shown.
 def const_table(const, pos):
    """
    Create a summary table for the creation time differences for
    the retrieved constants (Offset, SlopesPC, SlopesFF).
    """
    print(f"{const} were injected on: ")

    # catch timing difference in retrieve constants
    unique, idx, counts = np.unique(const_times[:,:,pos], return_inverse=True, return_counts=True)
    idx = idx.reshape((const_times.shape[0], len(modules)))

    table = []
    for i in range(0, counts.shape[0]):
        line = [ const_times[:,:,pos][idx==i][0]  ]
        mods = ''
        for i_s, s in enumerate(seq):
            if(const_times[i_s,:,0][idx[i_s]==i].shape[0] > 0):
                mods = mods+ '{}: {}, '.format(s, const_times[i_s,:,0][idx[i_s]==i])
        line.append(mods)
        table.append(line)

    if counts.shape[0] == 1:
        table[0][1] = 'All modules'
    else:
        table[np.argmax(counts)][1] = 'Rest of the modules'

    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                         headers=["Time stamps", "Modules and sequences"])))
 for i_key, key in enumerate(['Offset', 'SlopesPC', 'SlopesFF']):
    if const_times.shape[2] > i_key+1:
        const_table(key, i_key+1)
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

--- a/notebooks/AGIPD/AGIPD_Retrieve_Constants_Precorrection.ipynb
+++ b/notebooks/AGIPD/AGIPD_Retrieve_Constants_Precorrection.ipynb
 %% Cell type:markdown id: tags:

 # AGIPD Retrieving Constants Pre-correction #

 Author: K.Ahmed, Version: 1.0

 Retrieving Required Constants for Offline Calibration of the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB"
 in_folder = "/gpfs/exfel/exp/SPB/202030/p900119/raw" # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/AGIPD_"  # the folder to output to, required
 sequences =  [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 run = 80 # runs to process, required

 karabo_id = "SPB_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information
 karabo_id_control = "SPB_IRU_AGIPD1M1" # karabo-id for control device
 karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants

 calfile =  "" # path to calibration file. Leave empty if all data should come from DB
 nodb = False # if set only file-based constants will be used
 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
 photon_energy = 9.2 # photon energy in keV
 max_cells_db_dark = 0  # set to a value different than 0 to use this value for dark data DB queries
 max_cells_db = 0 # set to a value different than 0 to use this value for DB queries

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = True # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 ```

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the herarichy and dependability for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_hmatch"] = blc_hmatch
 ```

 %% Cell type:code id: tags:

 ``` python
 import sys
 from collections import OrderedDict

 # make sure a cluster is running with ipcluster start --n=32, give it a while to start
 import os
 import h5py
 import numpy as np
 import matplotlib
 matplotlib.use("agg")
 import matplotlib.pyplot as plt
 from ipyparallel import Client
 print(f"Connecting to profile {cluster_profile}")
 view = Client(profile=cluster_profile)[:]
 view.use_dill()

 from iCalibrationDB import Constants, Conditions, Detectors
 from cal_tools.tools import (map_modules_from_folder, get_dir_creation_date)
 from cal_tools.agipdlib import get_gain_setting
 from dateutil import parser
 from datetime import timedelta
 ```

 %% Cell type:code id: tags:

 ``` python
 max_cells = mem_cells

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    offset = parser.parse(creation_date_offset)
    delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
    creation_time += delta
    print(f"Using {creation_time} as creation time")

 if sequences[0] == -1:
    sequences = None

 if in_folder[-1] == "/":
    in_folder = in_folder[:-1]
 print(f"Outputting to {out_folder}")

 os.makedirs(out_folder, exist_ok=True)

 import warnings
 warnings.filterwarnings('ignore')

 from cal_tools.agipdlib import SnowResolution

 melt_snow = False if corr_bools["only_offset"] else SnowResolution.NONE
 ```

 %% Cell type:code id: tags:

 ``` python
 control_fname = f'{in_folder}/r{run:04d}/RAW-R{run:04d}-{karabo_da_control}-S00000.h5'
 h5path_ctrl = h5path_ctrl.format(karabo_id_control)

 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
        except Exception as e:
            print(f'Error while reading gain setting: {e}\n')

 print(f"Gain setting: {gain_setting}")
 print(f"Detector in use is {karabo_id}")

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(16))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]


 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 print(f"Checking the files before retrieving constants")
 mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)

 mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf
 ```

 %% Cell type:markdown id: tags:

 ## Retrieve Constants ##

 %% Cell type:code id: tags:

 ``` python
 from functools import partial
 import yaml

 def retrieve_constants(karabo_id, bias_voltage, max_cells, acq_rate,
                       gain_setting, photon_energy, only_dark, nodb_with_dark,
                       cal_db_interface, creation_time,
                       corr_bools, pc_bools, inp):
    """
    Retreive constant for each module in parallel and produce a dictionary
    with the creation-time and constant file path.

    :param karabo_id: (STR) Karabo ID
    :param bias_voltage: (FLOAT) Bias Voltage
    :param max_cells: (INT) Memory cells
    :param acq_rate: (FLOAT) Acquisition Rate
    :param gain_setting: (FLOAT) Gain setting
    :param photon_energy: (FLOAT) Photon Energy
    :param only_dark: (BOOL) only retrieve dark constants
    :param nodb_with_dark: (BOOL) no constant retrieval even for dark
    :param cal_db_interface: (STR) the database interface port
    :param creation_time: (STR) raw data creation time
    :param corr_bools: (DICT) A dictionary with bools for applying requested corrections
    :param pc_bools: (LIST) list of bools to retrieve pulse capacitor constants
    :param inp: (LIST) input for the parallel cluster of the partial function
    :return:
            mdata_dict: (DICT) dictionary with the metadata for the retrieved constants
            dev.device_name: (STR) device name
    """

    import numpy as np
    import sys
    import traceback

    from cal_tools.agipdlib import get_num_cells, get_acq_rate
    from cal_tools.agipdutils import assemble_constant_dict
    from cal_tools.tools import get_from_db

    from iCalibrationDB import Constants, Conditions, Detectors

    err = None

    qm_files, qm, dev, idx = inp
    # get number of memory cells from a sequence file with image data
    for f in qm_files:
        if max_cells == 0:
            max_cells = get_num_cells(f, karabo_id, idx)
            if max_cells is None:
                if f != qm_files[-1]:
                    continue
                else:
                    raise ValueError(f"No raw images found for {qm} for all sequences")
            else:
                cells = np.arange(max_cells)
                # get out of the loop,
                # if max_cells is successfully calculated.
                break

    if acq_rate == 0.:
-        acq_rate = get_acq_rate(f, karabo_id, idx)
+        acq_rate = get_acq_rate((f, karabo_id, idx))
    else:
        acq_rate = None

    print(f"Set memory cells to {max_cells}")
    print(f"Set acquistion rate cells to {acq_rate} MHz")

    # avoid creating retireving constant, if requested.
    if not nodb_with_dark:
        const_dict = assemble_constant_dict(corr_bools, pc_bools, max_cells, bias_voltage,
                                            gain_setting, acq_rate, photon_energy,
                                            beam_energy=None, only_dark=only_dark)

        # Retrieve multiple constants through an input dictionary
        # to return a dict of useful metadata.
        mdata_dict = dict()
        for cname, cval in const_dict.items():
            try:
                condition = getattr(Conditions, cval[2][0]).AGIPD(**cval[2][1])
                co, mdata = \
                    get_from_db(dev, getattr(Constants.AGIPD, cname)(),
                                condition, getattr(np, cval[0])(cval[1]),
                                cal_db_interface, creation_time, meta_only=True)
                mdata_const = mdata.calibration_constant_version
                # saving metadata in a dict
                mdata_dict[cname] = dict()
                # check if constant was sucessfully retrieved.
                if mdata.comm_db_success:
                    mdata_dict[cname]["file-path"] = f"{mdata_const.hdf5path}" \
                                                     f"{mdata_const.filename}"
                    mdata_dict[cname]["creation-time"] = f"{mdata_const.begin_at}"
                else:
                    mdata_dict[cname]["file-path"] = const_dict[cname]
                    mdata_dict[cname]["creation-time"] = None
            except Exception as e:
                err = f"Error: {e}, Traceback: {traceback.format_exc()}"
                print(err)

    return qm, mdata_dict, dev.device_name, acq_rate, max_cells, err

 pc_bools = [corr_bools.get("rel_gain"),
            corr_bools.get("adjust_mg_baseline"),
            corr_bools.get('blc_noise'),
            corr_bools.get('blc_hmatch'),
            corr_bools.get('blc_stripes'),
            melt_snow]

 # Extracting Instrument string
 instrument = karabo_id.split("_")[0]
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
 else:
    dinstance = "AGIPD1M2"

 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")

 inp = []
 only_dark = False
 nodb_with_dark = False
 if not nodb:
    only_dark=(calfile != "")
 if calfile != "" and not corr_bools["only_offset"]:
    nodb_with_dark = nodb

 # A dict to connect virtual device
 # to actual device name.
 for i in modules:
    qm = f"Q{i//4+1}M{i%4+1}"
    if qm in mapped_files and not mapped_files[qm].empty():
        device = getattr(getattr(Detectors, dinstance), qm)
        qm_files = [str(mapped_files[qm].get()) for _ in range(mapped_files[qm].qsize())]

    else:
        print(f"Skipping {qm}")
        continue

    inp.append((qm_files, qm, device, i))

 p = partial(retrieve_constants, karabo_id, bias_voltage, max_cells,
            acq_rate, gain_setting, photon_energy, only_dark, nodb_with_dark,
            cal_db_interface, creation_time,
            corr_bools, pc_bools)

 results = view.map_sync(p, inp)
 #results = list(map(p, inp))
 mod_dev = dict()
 mdata_dict = dict()
 for r in results:
    if r:
        qm, md_dict, dname, acq_rate, max_cells, err = r
        mod_dev[dname] = {"mod": qm, "err": err}
        if err:
            print(f"Error for module {qm}: {err}")
        mdata_dict[dname] = md_dict
 # check if it is requested not to retrieve any constants from the database
 if not nodb_with_dark:
    with open(f"{out_folder}/retrieved_constants.yml", "w") as outfile:
        yaml.dump(mdata_dict, outfile)
    print("\nRetrieved constants for modules: ",
          f"{[', '.join([f'Q{x//4+1}M{x%4+1}' for x in modules])]}")
    print(f"Operating conditions are:\n• Bias voltage: {bias_voltage}\n• Memory cells: {max_cells}\n"
          f"• Acquisition rate: {acq_rate}\n• Gain setting: {gain_setting}\n• Photon Energy: {photon_energy}\n")
    print(f"Constant metadata is saved in retrieved_constants.yml\n")
 else:
    print("No constants were retrieved as calibrated files will be used.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Constants are retrieved with creation time: ")
 i = 0
 when = dict()
 to_store = []

 for dname, dinfo in mod_dev.items():
    print(dinfo["mod"], ":")
    line = [dinfo["mod"]]
    if dname in mdata_dict:
        for cname, mdata in mdata_dict[dname].items():
            if hasattr(mdata["creation-time"], 'strftime'):
                mdata["creation-time"] = mdata["creation-time"].strftime('%y-%m-%d %H:%M')
            print(f'{cname:.<12s}', mdata["creation-time"])
        # Store few time stamps if exists
        # Add NA to keep array structure
    for cname in ['Offset', 'SlopesPC', 'SlopesFF']:
        if not dname in mdata_dict or dinfo["err"]:
            line.append('Err')
        else:
            if cname in mdata_dict[dname]:
                if mdata_dict[dname][cname]["creation-time"]:
                    line.append(mdata_dict[dname][cname]["creation-time"])
                else:
                    line.append('NA')
            else:
                line.append('NA')
    to_store.append(line)

    i += 1
    if sequences:
        seq_num = sequences[0]
    else:
        # if sequences[0] changed to None as it was -1
        seq_num = 0

 with open(f"{out_folder}/retrieved_constants.yml","r") as fyml:
    time_summary = yaml.load(fyml)
    time_summary.update({"time-summary": {
                                          "SAll":to_store
                                        }})
 with open(f"{out_folder}/retrieved_constants.yml","w") as fyml:
        yaml.safe_dump(time_summary, fyml)
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:markdown id: tags:

 # AGIPD Retrieving Constants Pre-correction #

 Author: K.Ahmed, Version: 1.0

 Retrieving Required Constants for Offline Calibration of the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB"
 in_folder = "/gpfs/exfel/exp/SPB/202030/p900119/raw" # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/AGIPD_"  # the folder to output to, required
 sequences =  [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 run = 80 # runs to process, required

 karabo_id = "SPB_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information
 karabo_id_control = "SPB_IRU_AGIPD1M1" # karabo-id for control device
 karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants

 calfile =  "" # path to calibration file. Leave empty if all data should come from DB
 nodb = False # if set only file-based constants will be used
 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
 photon_energy = 9.2 # photon energy in keV
 max_cells_db_dark = 0  # set to a value different than 0 to use this value for dark data DB queries
 max_cells_db = 0 # set to a value different than 0 to use this value for DB queries

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = True # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 ```

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the herarichy and dependability for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_hmatch"] = blc_hmatch
 ```

 %% Cell type:code id: tags:

 ``` python
 import sys
 from collections import OrderedDict

 # make sure a cluster is running with ipcluster start --n=32, give it a while to start
 import os
 import h5py
 import numpy as np
 import matplotlib
 matplotlib.use("agg")
 import matplotlib.pyplot as plt
 from ipyparallel import Client
 print(f"Connecting to profile {cluster_profile}")
 view = Client(profile=cluster_profile)[:]
 view.use_dill()

 from iCalibrationDB import Constants, Conditions, Detectors
 from cal_tools.tools import (map_modules_from_folder, get_dir_creation_date)
 from cal_tools.agipdlib import get_gain_setting
 from dateutil import parser
 from datetime import timedelta
 ```

 %% Cell type:code id: tags:

 ``` python
 max_cells = mem_cells

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    offset = parser.parse(creation_date_offset)
    delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
    creation_time += delta
    print(f"Using {creation_time} as creation time")

 if sequences[0] == -1:
    sequences = None

 if in_folder[-1] == "/":
    in_folder = in_folder[:-1]
 print(f"Outputting to {out_folder}")

 os.makedirs(out_folder, exist_ok=True)

 import warnings
 warnings.filterwarnings('ignore')

 from cal_tools.agipdlib import SnowResolution

 melt_snow = False if corr_bools["only_offset"] else SnowResolution.NONE
 ```

 %% Cell type:code id: tags:

 ``` python
 control_fname = f'{in_folder}/r{run:04d}/RAW-R{run:04d}-{karabo_da_control}-S00000.h5'
 h5path_ctrl = h5path_ctrl.format(karabo_id_control)

 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
        except Exception as e:
            print(f'Error while reading gain setting: {e}\n')

 print(f"Gain setting: {gain_setting}")
 print(f"Detector in use is {karabo_id}")

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(16))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]


 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 print(f"Checking the files before retrieving constants")
 mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)

 mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf
 ```

 %% Cell type:markdown id: tags:

 ## Retrieve Constants ##

 %% Cell type:code id: tags:

 ``` python
 from functools import partial
 import yaml

 def retrieve_constants(karabo_id, bias_voltage, max_cells, acq_rate,
                       gain_setting, photon_energy, only_dark, nodb_with_dark,
                       cal_db_interface, creation_time,
                       corr_bools, pc_bools, inp):
    """
    Retreive constant for each module in parallel and produce a dictionary
    with the creation-time and constant file path.

    :param karabo_id: (STR) Karabo ID
    :param bias_voltage: (FLOAT) Bias Voltage
    :param max_cells: (INT) Memory cells
    :param acq_rate: (FLOAT) Acquisition Rate
    :param gain_setting: (FLOAT) Gain setting
    :param photon_energy: (FLOAT) Photon Energy
    :param only_dark: (BOOL) only retrieve dark constants
    :param nodb_with_dark: (BOOL) no constant retrieval even for dark
    :param cal_db_interface: (STR) the database interface port
    :param creation_time: (STR) raw data creation time
    :param corr_bools: (DICT) A dictionary with bools for applying requested corrections
    :param pc_bools: (LIST) list of bools to retrieve pulse capacitor constants
    :param inp: (LIST) input for the parallel cluster of the partial function
    :return:
            mdata_dict: (DICT) dictionary with the metadata for the retrieved constants
            dev.device_name: (STR) device name
    """

    import numpy as np
    import sys
    import traceback

    from cal_tools.agipdlib import get_num_cells, get_acq_rate
    from cal_tools.agipdutils import assemble_constant_dict
    from cal_tools.tools import get_from_db

    from iCalibrationDB import Constants, Conditions, Detectors

    err = None

    qm_files, qm, dev, idx = inp
    # get number of memory cells from a sequence file with image data
    for f in qm_files:
        if max_cells == 0:
            max_cells = get_num_cells(f, karabo_id, idx)
            if max_cells is None:
                if f != qm_files[-1]:
                    continue
                else:
                    raise ValueError(f"No raw images found for {qm} for all sequences")
            else:
                cells = np.arange(max_cells)
                # get out of the loop,
                # if max_cells is successfully calculated.
                break

    if acq_rate == 0.:
-        acq_rate = get_acq_rate(f, karabo_id, idx)
+        acq_rate = get_acq_rate((f, karabo_id, idx))
    else:
        acq_rate = None

    print(f"Set memory cells to {max_cells}")
    print(f"Set acquistion rate cells to {acq_rate} MHz")

    # avoid creating retireving constant, if requested.
    if not nodb_with_dark:
        const_dict = assemble_constant_dict(corr_bools, pc_bools, max_cells, bias_voltage,
                                            gain_setting, acq_rate, photon_energy,
                                            beam_energy=None, only_dark=only_dark)

        # Retrieve multiple constants through an input dictionary
        # to return a dict of useful metadata.
        mdata_dict = dict()
        for cname, cval in const_dict.items():
            try:
                condition = getattr(Conditions, cval[2][0]).AGIPD(**cval[2][1])
                co, mdata = \
                    get_from_db(dev, getattr(Constants.AGIPD, cname)(),
                                condition, getattr(np, cval[0])(cval[1]),
                                cal_db_interface, creation_time, meta_only=True)
                mdata_const = mdata.calibration_constant_version
                # saving metadata in a dict
                mdata_dict[cname] = dict()
                # check if constant was sucessfully retrieved.
                if mdata.comm_db_success:
                    mdata_dict[cname]["file-path"] = f"{mdata_const.hdf5path}" \
                                                     f"{mdata_const.filename}"
                    mdata_dict[cname]["creation-time"] = f"{mdata_const.begin_at}"
                else:
                    mdata_dict[cname]["file-path"] = const_dict[cname]
                    mdata_dict[cname]["creation-time"] = None
            except Exception as e:
                err = f"Error: {e}, Traceback: {traceback.format_exc()}"
                print(err)

    return qm, mdata_dict, dev.device_name, acq_rate, max_cells, err

 pc_bools = [corr_bools.get("rel_gain"),
            corr_bools.get("adjust_mg_baseline"),
            corr_bools.get('blc_noise'),
            corr_bools.get('blc_hmatch'),
            corr_bools.get('blc_stripes'),
            melt_snow]

 # Extracting Instrument string
 instrument = karabo_id.split("_")[0]
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
 else:
    dinstance = "AGIPD1M2"

 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")

 inp = []
 only_dark = False
 nodb_with_dark = False
 if not nodb:
    only_dark=(calfile != "")
 if calfile != "" and not corr_bools["only_offset"]:
    nodb_with_dark = nodb

 # A dict to connect virtual device
 # to actual device name.
 for i in modules:
    qm = f"Q{i//4+1}M{i%4+1}"
    if qm in mapped_files and not mapped_files[qm].empty():
        device = getattr(getattr(Detectors, dinstance), qm)
        qm_files = [str(mapped_files[qm].get()) for _ in range(mapped_files[qm].qsize())]

    else:
        print(f"Skipping {qm}")
        continue

    inp.append((qm_files, qm, device, i))

 p = partial(retrieve_constants, karabo_id, bias_voltage, max_cells,
            acq_rate, gain_setting, photon_energy, only_dark, nodb_with_dark,
            cal_db_interface, creation_time,
            corr_bools, pc_bools)

 results = view.map_sync(p, inp)
 #results = list(map(p, inp))
 mod_dev = dict()
 mdata_dict = dict()
 for r in results:
    if r:
        qm, md_dict, dname, acq_rate, max_cells, err = r
        mod_dev[dname] = {"mod": qm, "err": err}
        if err:
            print(f"Error for module {qm}: {err}")
        mdata_dict[dname] = md_dict
 # check if it is requested not to retrieve any constants from the database
 if not nodb_with_dark:
    with open(f"{out_folder}/retrieved_constants.yml", "w") as outfile:
        yaml.dump(mdata_dict, outfile)
    print("\nRetrieved constants for modules: ",
          f"{[', '.join([f'Q{x//4+1}M{x%4+1}' for x in modules])]}")
    print(f"Operating conditions are:\n• Bias voltage: {bias_voltage}\n• Memory cells: {max_cells}\n"
          f"• Acquisition rate: {acq_rate}\n• Gain setting: {gain_setting}\n• Photon Energy: {photon_energy}\n")
    print(f"Constant metadata is saved in retrieved_constants.yml\n")
 else:
    print("No constants were retrieved as calibrated files will be used.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Constants are retrieved with creation time: ")
 i = 0
 when = dict()
 to_store = []

 for dname, dinfo in mod_dev.items():
    print(dinfo["mod"], ":")
    line = [dinfo["mod"]]
    if dname in mdata_dict:
        for cname, mdata in mdata_dict[dname].items():
            if hasattr(mdata["creation-time"], 'strftime'):
                mdata["creation-time"] = mdata["creation-time"].strftime('%y-%m-%d %H:%M')
            print(f'{cname:.<12s}', mdata["creation-time"])
        # Store few time stamps if exists
        # Add NA to keep array structure
    for cname in ['Offset', 'SlopesPC', 'SlopesFF']:
        if not dname in mdata_dict or dinfo["err"]:
            line.append('Err')
        else:
            if cname in mdata_dict[dname]:
                if mdata_dict[dname][cname]["creation-time"]:
                    line.append(mdata_dict[dname][cname]["creation-time"])
                else:
                    line.append('NA')
            else:
                line.append('NA')
    to_store.append(line)

    i += 1
    if sequences:
        seq_num = sequences[0]
    else:
        # if sequences[0] changed to None as it was -1
        seq_num = 0

 with open(f"{out_folder}/retrieved_constants.yml","r") as fyml:
    time_summary = yaml.load(fyml)
    time_summary.update({"time-summary": {
                                          "SAll":to_store
                                        }})
 with open(f"{out_folder}/retrieved_constants.yml","w") as fyml:
        yaml.safe_dump(time_summary, fyml)
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

--- a/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
+++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
 %% Cell type:markdown id: tags:

 # AGIPD Characterize Dark Images #

 Author: S. Hauf, Version: 0.1

 The following code analyzes a set of dark images taken with the AGIPD detector to deduce detector offsets , noise, bad-pixel maps and thresholding. All four types of constants are evaluated per-pixel and per-memory cell. Data for the detector's three gain stages needs to be present, separated into separate runs.

 The evaluated calibration constants are stored locally and injected in the calibration data base.

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB" # The ipcluster profile to use
-in_folder = "/gpfs/exfel/d/raw/SPB/202030/p900138" # path to input data, required
-out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/AGIPDbad_sep64" # path to output to, required
+in_folder = "/gpfs/exfel/d/raw/DETLAB/202031/p900172/" # path to input data, required
+out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/miniHalfAGIPD" # path to output to, required
 sequences = [0] # sequence files to evaluate.
 modules = [-1]  # list of modules to evaluate, RANGE ALLOWED
-run_high = 167 # run number in which high gain data was recorded, required
-run_med = 168 # run number in which medium gain data was recorded, required
-run_low = 169 # run number in which low gain data was recorded, required
+run_high = 84 # run number in which high gain data was recorded, required
+run_med = 87 # run number in which medium gain data was recorded, required
+run_low = 88 # run number in which low gain data was recorded, required

-karabo_id = "SPB_DET_AGIPD1M-1" # karabo karabo_id
+karabo_id = "DETLAB_DET_AGIPD500K2G" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = '/INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
 h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information
 karabo_id_control = "SPB_IRU_AGIPD1M1" # karabo-id for control device '
 karabo_da_control = "AGIPD1MCTRL00" # karabo DA for control infromation

 use_dir_creation_date = True  # use dir creation date as data production reference date
 cal_db_interface = "tcp://max-exfl016:8020" # the database interface to use
 cal_db_timeout = 3000000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database

 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
-bias_voltage = 300 # detector bias voltage
+bias_voltage = 0 # detector bias voltage
 gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 interlaced = False # assume interlaced data format, for data prior to Dec. 2017
 rawversion = 2 # RAW file format version

 thresholds_offset_sigma = 3. # offset sigma thresholds for offset deduced bad pixels
 thresholds_offset_hard = [0, 0] # For setting the same threshold offset for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.
 thresholds_offset_hard_hg = [3000, 7000] # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_mg = [6000, 10000] # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_lg = [6000, 10000] # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels

 thresholds_noise_sigma = 5. # noise sigma thresholds for offset deduced bad pixels
 thresholds_noise_hard = [0, 0] # For setting the same threshold noise for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.
 thresholds_noise_hard_hg = [4, 20] # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_mg = [4, 20] # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_lg = [4, 20] # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels

 thresholds_gain_sigma = 5. # Gain separation sigma threshold

 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. ~7mins extra time for 64 memory cells
 ```

 %% Cell type:code id: tags:

 ``` python
 # imports and things that do not usually need to be changed
 from datetime import datetime
 import dateutil.parser
 import warnings
 warnings.filterwarnings('ignore')
 from collections import OrderedDict
 import os
+from typing import Tuple, List
 import h5py
 import numpy as np
 import matplotlib
 import tabulate

 matplotlib.use('agg')
 import matplotlib.pyplot as plt
 from IPython.display import display, Markdown, Latex
 %matplotlib inline

-from cal_tools.tools import (map_gain_stages, parse_runs,
-                             run_prop_seq_from_path, get_notebook_name,
-                             get_dir_creation_date, save_const_to_h5,
-                             get_random_db_interface, get_from_db)
+from cal_tools.tools import (get_from_db, get_dir_creation_date,
+                             get_notebook_name, get_random_db_interface,
+                             map_gain_stages, parse_runs,
+                             run_prop_seq_from_path, save_const_to_h5,
+                             send_to_db)
 from cal_tools.influx import InfluxLogger
 from cal_tools.enums import BadPixels
-from cal_tools.plotting import (show_overview, plot_badpix_3d,
-                                create_constant_overview,
-                                show_processed_modules)
-from cal_tools.agipdlib import get_gain_setting
+from cal_tools.plotting import (create_constant_overview,
+                                plot_badpix_3d, show_processed_modules,
+                                show_overview)
+from cal_tools.agipdlib import get_bias_voltage, get_gain_setting

 # make sure a cluster is running with ipcluster start --n=32, give it a while to start
 from ipyparallel import Client

 view = Client(profile=cluster_profile)[:]
 view.use_dill()

-from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions
+from iCalibrationDB import Constants, Conditions, Detectors, Versions

 gains = np.arange(3)

 IL_MODE = interlaced
 max_cells = mem_cells

 offset_runs = OrderedDict()
 offset_runs["high"] = run_high
 offset_runs["med"] = run_med
 offset_runs["low"] = run_low

 creation_time=None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)

 print(f"Using {creation_time} as creation time of constant.")

 run, prop, seq = run_prop_seq_from_path(in_folder)

 cal_db_interface = get_random_db_interface(cal_db_interface)
 print(f'Calibration database interface: {cal_db_interface}')

-loc = None
 instrument = karabo_id.split("_")[0]
+
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
-else:
+    nmods = 16
+elif instrument == "MID":
    dinstance = "AGIPD1M2"
+    nmods = 16
+# TODO: Remove DETLAB
+elif instrument == "HED" or instrument == "DETLAB":
+    dinstance = "AGIPD500K"
+    nmods = 8
+
+control_names = [f'{in_folder}/r{r:04d}/RAW-R{r:04d}-{karabo_da_control}-S00000.h5'
+                 for r in (run_high, run_med, run_low)]
+
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")

 logger = InfluxLogger(detector="AGIPD", instrument=instrument, mem_cells=mem_cells,
                      notebook=get_notebook_name(), proposal=prop)
 ```

 %% Cell type:code id: tags:

 ``` python
-gain_names = ['High', 'Medium', 'Low']
 runs = [run_high, run_med, run_low]

 if "{" in h5path_ctrl:
    h5path_ctrl = h5path_ctrl.format(karabo_id_control)

 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < dateutil.parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            # extract gain setting and validate that all runs have the same setting
            gsettings = []
            for r in runs:
                control_fname = '{}/r{:04d}/RAW-R{:04d}-{}-S00000.h5'.format(in_folder, r, r,
                                                                             karabo_da_control)
                gsettings.append(get_gain_setting(control_fname, h5path_ctrl))
            if not all(g == gsettings[0] for g in gsettings):
                raise ValueError(f"Different gain settings for the 3 input runs {gsettings}")
            gain_setting =  gsettings[0]
        except Exception as e:
            print(f'Error while reading gain setting from: \n{control_fname}')
            print(f'Error: {e}')
            if "component not found" in str(e):
                print("Gain setting is not found in the control information")
            print("Data will not be processed")
            sequences = []
 ```

 %% Cell type:code id: tags:

 ``` python
 if karabo_da[0] == '-1':
    if modules[0] == -1:
-        modules = list(range(16))
+        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)

+if bias_voltage == 0:
+    # Read the bias voltage from files, if recorded.
+    # If not available, make use of the historical voltage the detector is running at
+    bias_voltage = get_bias_voltage(control_names[0], karabo_id_control)
+    bias_voltage = bias_voltage if bias_voltage is not None else 300
+
 print("Parameters are:")
 print(f"Proposal: {prop}")
 print(f"Memory cells: {mem_cells}/{max_cells}")
 print("Runs: {}".format([ v for v in offset_runs.values()]))
 print(f"Sequences: {sequences}")
 print(f"Interlaced mode: {IL_MODE}")
 print(f"Using DB: {db_output}")
 print(f"Input: {in_folder}")
 print(f"Output: {out_folder}")
 print(f"Bias voltage: {bias_voltage}V")
 print(f"Gain setting: {gain_setting}")
 ```

 %% Cell type:markdown id: tags:

 The following lines will create a queue of files which will the be executed module-parallel. Distiguishing between different gains.

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 os.makedirs(out_folder, exist_ok=True)
 gmf = map_gain_stages(in_folder, offset_runs, path_template, karabo_da, sequences)
 gain_mapped_files, total_sequences, total_file_size = gmf
 print(f"Will process a total of {total_sequences} files.")
 ```

 %% Cell type:markdown id: tags:

 ## Calculate Offsets, Noise and Thresholds ##

 The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array.

 %% Cell type:code id: tags:

 ``` python
 import copy
 from functools import partial
-def characterize_module(il_mode, cells, bp_thresh, rawversion, loc, acq_rate,
-                        h5path, h5path_idx, inp):
+def characterize_module(il_mode: bool,
+                        cells: int,
+                        bp_thresh: Tuple[List[int], float, List[int], float],
+                        rawversion: int,
+                        loc: str,
+                        acq_rate: float,
+                        h5path: str,
+                        h5path_idx: str,
+                        control_names: List[str],
+                        karabo_id_control: str,
+                        inp: Tuple[str, int, int]) -> Tuple[np.array, np.array, np.array, np.array, int, np.array, int, float]:
    import numpy as np
    import copy
    import h5py
    from cal_tools.enums import BadPixels
    from cal_tools.agipdlib import get_num_cells, get_acq_rate

-    filename, channel, gg = inp
+    fast_data_filename, channel, gg = inp

    if cells == 0:
-        cells = get_num_cells(filename, loc, channel)
+        cells = get_num_cells(fast_data_filename, loc, channel)

    print(f"Using {cells} memory cells")

    if acq_rate == 0.:
-        acq_rate = get_acq_rate(filename, loc, channel)
+        slow_paths = control_names[gg], karabo_id_control
+        fast_paths = fast_data_filename, loc, channel
+        acq_rate = get_acq_rate(fast_paths, slow_paths)

    thresholds_offset, thresholds_offset_sigma, thresholds_noise, thresholds_noise_sigma = bp_thresh
    thresholds_offset_hard = thresholds_offset[gg]
    thresholds_noise_hard = thresholds_noise[gg]
-    infile = h5py.File(filename, "r", driver="core")

    h5path = h5path.format(channel)
    h5path_idx = h5path_idx.format(channel)

-    if rawversion == 2:
-        count = np.squeeze(infile[f"{h5path_idx}/count"])
-        first = np.squeeze(infile[f"{h5path_idx}/first"])
-        last_index = int(first[count != 0][-1]+count[count != 0][-1])
-        first_index = int(first[count != 0][0])
-    else:
-        status = np.squeeze(infile[f"{h5path_idx}/status"])
-        if np.count_nonzero(status != 0) == 0:
-            return
-        last = np.squeeze(infile[f"{h5path_idx}/last"])
-        first = np.squeeze(infile[f"{h5path_idx}/first"])
-        last_index = int(last[status != 0][-1]) + 1
-        first_index = int(first[status != 0][0])
-    im = np.array(infile[f"{h5path}/data"][first_index:last_index,...])
-    cellIds = np.squeeze(infile[f"{h5path}/cellId"][first_index:last_index,...])
-
-    infile.close()
+    with h5py.File(fast_data_filename, "r", driver="core") as infile:
+        if rawversion == 2:
+            count = np.squeeze(infile[f"{h5path_idx}/count"])
+            first = np.squeeze(infile[f"{h5path_idx}/first"])
+            last_index = int(first[count != 0][-1]+count[count != 0][-1])
+            first_index = int(first[count != 0][0])
+        else:
+            status = np.squeeze(infile[f"{h5path_idx}/status"])
+            if np.count_nonzero(status != 0) == 0:
+                return
+            last = np.squeeze(infile[f"{h5path_idx}/last"])
+            first = np.squeeze(infile[f"{h5path_idx}/first"])
+            last_index = int(last[status != 0][-1]) + 1
+            first_index = int(first[status != 0][0])
+        im = np.array(infile[f"{h5path}/data"][first_index:last_index,...])
+        cellIds = np.squeeze(infile[f"{h5path}/cellId"][first_index:last_index,...])

    if il_mode:
        ga = im[1::2, 0, ...]
        im = im[0::2, 0, ...].astype(np.float32)
        cellIds = cellIds[::2]
    else:
        ga = im[:, 1, ...]
        im = im[:, 0, ...].astype(np.float32)

    im = np.rollaxis(im, 2)
    im = np.rollaxis(im, 2, 1)

    ga = np.rollaxis(ga, 2)
    ga = np.rollaxis(ga, 2, 1)

    mcells = cells #max(cells, np.max(cellIds)+1)
    offset = np.zeros((im.shape[0], im.shape[1], mcells))
    gains = np.zeros((im.shape[0], im.shape[1], mcells))
    noise = np.zeros((im.shape[0], im.shape[1], mcells))
    gains_std = np.zeros((im.shape[0], im.shape[1], mcells))

    for cc in np.unique(cellIds[cellIds < mcells]):
        cellidx = cellIds == cc
        offset[...,cc] = np.median(im[..., cellidx], axis=2)
        noise[...,cc] = np.std(im[..., cellidx], axis=2)
        gains[...,cc] = np.median(ga[..., cellidx], axis=2)
        gains_std[...,cc] = np.std(ga[..., cellidx], axis=2)

    # bad pixels
    bp = np.zeros(offset.shape, np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0,1))
    offset_std = np.nanstd(offset, axis=(0,1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[(offset < thresholds_offset_hard[0]) | (
        offset > thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0,1))
    noise_std = np.nanstd(noise, axis=(0,1))
    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[(noise < thresholds_noise_hard[0]) | (noise > thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    return offset, noise, gains, gains_std, gg, bp, cells, acq_rate

 offset_g = OrderedDict()
 noise_g = OrderedDict()
 gain_g = OrderedDict()
 gainstd_g = OrderedDict()
 badpix_g = OrderedDict()
 gg = 0

 start = datetime.now()
 all_cells = []
 all_acq_rate = []

 if thresholds_offset_hard == [0, 0]:
    thresholds_offset_hard = [thresholds_offset_hard_hg, thresholds_offset_hard_mg, thresholds_offset_hard_lg]
 else:
    thresholds_offset_hard = [thresholds_offset_hard] * 3

 if thresholds_noise_hard == [0, 0]:
    thresholds_noise_hard = [thresholds_noise_hard_hg, thresholds_noise_hard_mg, thresholds_noise_hard_lg]
 else:
    thresholds_noise_hard = [thresholds_noise_hard] * 3


 inp = []
 for gain, mapped_files in gain_mapped_files.items():
    dones = []
    for i in modules:
        qm = f"Q{i//4+1}M{i%4+1}"
        if qm in mapped_files and not mapped_files[qm].empty():
            fname_in = mapped_files[qm].get()
            print("Process file: ", fname_in)
            dones.append(mapped_files[qm].empty())
        else:
            continue
        inp.append((fname_in, i, gg))

    gg += 1

 p = partial(characterize_module, IL_MODE, max_cells,
           (thresholds_offset_hard, thresholds_offset_sigma,
            thresholds_noise_hard, thresholds_noise_sigma),
-            rawversion, karabo_id, acq_rate, h5path, h5path_idx)
+            rawversion, karabo_id, acq_rate, h5path, h5path_idx,
+           control_names, karabo_id_control)

 # Don't remove. Used for Debugging.
 #results = list(map(p, inp))
 results = view.map_sync(p, inp)

 for ii, r in enumerate(results):
    offset, noise, gains, gains_std, gg, bp, thiscell, thisacq = r
    all_cells.append(thiscell)
    all_acq_rate.append(thisacq)
    for i in modules:
        qm = f"Q{i//4+1}M{i%4+1}"
        if qm not in offset_g:
            offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2], 3))
            noise_g[qm] = np.zeros_like(offset_g[qm])
            gain_g[qm] = np.zeros_like(offset_g[qm])
            gainstd_g[qm] = np.zeros_like(offset_g[qm])
            badpix_g[qm] = np.zeros_like(offset_g[qm], np.uint32)

        offset_g[qm][...,gg] = offset
        noise_g[qm][...,gg] = noise
        gain_g[qm][...,gg] = gains
        gainstd_g[qm][..., gg] = gains_std
        badpix_g[qm][...,gg] = bp


 duration = (datetime.now() - start).total_seconds()
 logger.runtime_summary_entry(success=True, runtime=duration,
                             total_sequences=total_sequences,
                             filesize=total_file_size)
 logger.send()
 max_cells = np.max(all_cells)
 print(f"Using {max_cells} memory cells")
 acq_rate = np.max(all_acq_rate)
 print(f"Using {acq_rate} MHz acquisition rate")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Add a badpixel due to bad gain separation
 for g in range(2):
    # Bad pixels during bad gain separation.
    # Fraction of pixels in the module with separation lower than "thresholds_gain_sigma".
    bad_sep = (gain_g[qm][..., g+1] - gain_g[qm][..., g]) / np.sqrt(gainstd_g[qm][..., g+1]**2 + gainstd_g[qm][..., g]**2)
    badpix_g[qm][...,g+1][(bad_sep)<thresholds_gain_sigma]|= BadPixels.GAIN_THRESHOLDING_ERROR.value
 ```

 %% Cell type:markdown id: tags:

 The thresholds for gain switching are then defined as the mean value between in individual gain bit levels. Note that these thresholds need to be refined with charge induced thresholds, as the two are not the same.

 %% Cell type:code id: tags:

 ``` python
 thresholds_g = {}
 for qm in gain_g.keys():
    thresholds_g[qm] = np.zeros((gain_g[qm].shape[0], gain_g[qm].shape[1], gain_g[qm].shape[2], 5))
    thresholds_g[qm][...,0] = (gain_g[qm][...,1]+gain_g[qm][...,0])/2
    thresholds_g[qm][...,1] = (gain_g[qm][...,2]+gain_g[qm][...,1])/2
    for i in range(3):
        thresholds_g[qm][...,2+i] = gain_g[qm][...,i]
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()
 for i in modules:
    qm = "Q{}M{}".format(i//4+1, i%4+1)
    res[qm] = {'Offset': offset_g[qm],
               'Noise': noise_g[qm],
               'ThresholdsDark': thresholds_g[qm],
               'BadPixelsDark': badpix_g[qm]
               }
+```

-if local_output:
-    for qm in offset_g.keys():
-        ofile = "{}/agipd_offset_store_{}_{}.h5".format(out_folder,
-                                                        "{}-{}-{}".format(*offset_runs.values()),
-                                                        qm)
-        store_file = h5py.File(ofile, "w")
-        store_file["{}/Offset/0/data".format(qm)] = offset_g[qm]
-        store_file["{}/Noise/0/data".format(qm)] = noise_g[qm]
-        store_file["{}/Threshold/0/data".format(qm)] = thresholds_g[qm]
-        store_file["{}/BadPixels/0/data".format(qm)] = badpix_g[qm]
-        store_file.close()
+%% Cell type:code id: tags:
+
+``` python
+proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
+file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_low, run_med, run_high)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve existing constants for comparison
 clist = ["Offset", "Noise", "ThresholdsDark", "BadPixelsDark"]
 old_const = {}
 old_mdata = {}
 detinst = getattr(Detectors, dinstance)

 print('Retrieve pre-existing constants for comparison.')

 for qm in res:
    for const in res[qm]:
-        metadata = ConstantMetaData()
        dconst = getattr(Constants.AGIPD, const)()
        dconst.data = res[qm][const]
-        metadata.calibration_constant = dconst

        # Setting conditions
        condition = Conditions.Dark.AGIPD(memory_cells=max_cells,
                                          bias_voltage=bias_voltage,
                                          acquisition_rate=acq_rate,
                                          gain_setting=gain_setting)
        device = getattr(detinst, qm)
        data, mdata = get_from_db(device,
                                  getattr(Constants.AGIPD, const)(),
                                  condition,
                                  None,
                                  cal_db_interface, creation_time=creation_time,
                                  verbosity=2, timeout=cal_db_timeout)

        old_const[const] = data
        if mdata is not None and data is not None:
            time = mdata.calibration_constant_version.begin_at
            old_mdata[const] = time.isoformat()
            os.makedirs('{}/old/'.format(out_folder), exist_ok=True)
-            save_const_to_h5(mdata, '{}/old/'.format(out_folder))
+            save_const_to_h5(device,
+                             getattr(Constants.AGIPD, const)(),
+                             condition, data, file_loc, creation_time,
+                             f'{out_folder}/old/')
        else:
            old_mdata[const] = "Not found"
 ```

 %% Cell type:code id: tags:

 ``` python
-proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
-file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_low, run_med, run_high)
-```
-
-%% Cell type:code id: tags:
-
-``` python
+md = None
 for qm in res:

    for const in res[qm]:
-        metadata = ConstantMetaData()
        dconst = getattr(Constants.AGIPD, const)()
        dconst.data = res[qm][const]
-        metadata.calibration_constant = dconst

        # set the operating condition
        condition = Conditions.Dark.AGIPD(memory_cells=max_cells,
                                          bias_voltage=bias_voltage,
                                          acquisition_rate=acq_rate,
                                          gain_setting=gain_setting)
        detinst = getattr(Detectors, dinstance)
        device = getattr(detinst, qm)

-        metadata.detector_condition = condition
-
-        # specify the a version for this constant
-        if creation_time is None:
-            metadata.calibration_constant_version = Versions.Now(device=device)
-        else:
-            metadata.calibration_constant_version = Versions.Timespan(device=device,
-                                                                      start=creation_time)
-
-        metadata.calibration_constant_version.raw_data_location = file_loc
-
        if db_output:
-            try:
-                metadata.send(cal_db_interface, timeout=cal_db_timeout)
-                print(f'Const {const} for module {qm} was injected to the calibration DB. '
-                      f'Begin at: {metadata.calibration_constant_version.begin_at}')
-            except Exception as e:
-                print("Error:", e)
+            md = send_to_db(device, dconst, condition, file_loc,
+                            cal_db_interface, creation_time=creation_time, timeout=cal_db_timeout)

        if local_output:
-            save_const_to_h5(metadata, out_folder)
+            md = save_const_to_h5(device, dconst, condition, dconst.data, file_loc, creation_time, out_folder)
            print(f"Calibration constant {const} is stored locally.\n")

-    print("Generated constants with conditions:\n")
+    print("Constants parameter conditions are:\n")
    print(f"• memory_cells: {max_cells}\n• bias_voltage: {bias_voltage}\n"
          f"• acquisition_rate: {acq_rate}\n• gain_setting: {gain_setting}\n"
-          f"• creation_time: {creation_time}\n")
+          f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:code id: tags:

 ``` python
-mnames=[]
-for i in modules:
-    qm = f"Q{i//4+1}M{i % 4+1}"
-    mnames.append(qm)
-    display(Markdown(f'## Position of the module {qm} and it\'s ASICs##'))
-show_processed_modules(dinstance, constants=None, mnames=mnames, mode="position")
+# TODO: add show_processed_modules diagram for Mini-Half AGIPD
+if dinstance != "AGIPD500K":
+    mnames=[]
+    for i in modules:
+        qm = f"Q{i//4+1}M{i % 4+1}"
+        mnames.append(qm)
+        display(Markdown(f'## Position of the module {qm} and it\'s ASICs##'))
+    show_processed_modules(dinstance, constants=None, mnames=mnames, mode="position")
 ```

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:markdown id: tags:

 ### High Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 0
-out_folder = None
-show_overview(res, cell, gain, out_folder=out_folder, infix="{}-{}-{}".format(*offset_runs.values()))
+show_overview(res, cell, gain, infix="{}-{}-{}".format(*offset_runs.values()))
 ```

 %% Cell type:markdown id: tags:

 ### Medium Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 1
-show_overview(res, cell, gain, out_folder=out_folder, infix="{}-{}-{}".format(*offset_runs.values()))
+show_overview(res, cell, gain, infix="{}-{}-{}".format(*offset_runs.values()))
 ```

 %% Cell type:markdown id: tags:

 ### Low Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 2
-show_overview(res, cell, gain, out_folder=out_folder, infix="{}-{}-{}".format(*offset_runs.values()))
+show_overview(res, cell, gain, infix="{}-{}-{}".format(*offset_runs.values()))
 ```

 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.NOISE_OUT_OF_THRESHOLD.value: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR.value: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.GAIN_THRESHOLDING_ERROR.value: (BadPixels.GAIN_THRESHOLDING_ERROR.name, '#FF40FF40'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value | BadPixels.NOISE_OUT_OF_THRESHOLD.value: ('OFFSET_OUT_OF_THRESHOLD + NOISE_OUT_OF_THRESHOLD', '#DD00DD80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value | BadPixels.NOISE_OUT_OF_THRESHOLD.value |
        BadPixels.GAIN_THRESHOLDING_ERROR.value: ('MIXED', '#BFDF009F')}

 if high_res_badpix_3d:
    display(Markdown("""

    ## Global Bad Pixel Behaviour ##

    The following plots show the results of bad pixel evaluation for all evaluated memory cells.
    Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2.
    This excludes single bad pixels present only in disconnected pixels.
    Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
    Colors encode the bad pixel type, or mixed type.

    """))

    gnames = ['High Gain', 'Medium Gain', 'Low Gain']
    for gain in range(3):
        display(Markdown(f'### {gnames[gain]} ###'))
        for mod, data in badpix_g.items():
            plot_badpix_3d(data[...,gain], cols, title=mod, rebin_fac=1)
            plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Aggregate values, and per Cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior.

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(offset_g, "Offset (ADU)", max_cells, 4000, 8000,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(noise_g, "Noise (ADU)", max_cells, 0, 100,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 # Plot only three gain threshold maps.
 bp_thresh = OrderedDict()
 for mod, con in badpix_g.items():
    bp_thresh[mod] = np.zeros((con.shape[0], con.shape[1], con.shape[2], 5), dtype=con.dtype)
    bp_thresh[mod][...,:2] = con[...,:2]
    bp_thresh[mod][...,2:] = con


 create_constant_overview(thresholds_g, "Threshold (ADU)", max_cells, 4000, 10000, 5,
                         badpixels=[bp_thresh, np.nan],
                         gmap=['HG-MG Threshold', 'MG-LG Threshold', 'High gain', 'Medium gain', 'low gain'],
                         marker=['d','d','','','']
                         )
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixel_aggregate_g = OrderedDict()
 for m, d in badpix_g.items():
    bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)
 create_constant_overview(bad_pixel_aggregate_g, "Bad pixel fraction", max_cells, 0, 0.10, 3)
 ```

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 table = []
 gain_names = ['High', 'Medium', 'Low']
 bits = [BadPixels.NOISE_OUT_OF_THRESHOLD, BadPixels.OFFSET_OUT_OF_THRESHOLD, BadPixels.OFFSET_NOISE_EVAL_ERROR, BadPixels.GAIN_THRESHOLDING_ERROR]
 for qm in badpix_g.keys():
    for gain in range(3):

        l_data = []
        l_data_old = []

        data = np.copy(badpix_g[qm][:,:,:,gain])
        datau32 = data.astype(np.uint32)
        l_data.append(len(datau32[datau32>0].flatten()))
        for bit in bits:
            l_data.append(np.count_nonzero(badpix_g[qm][:,:,:,gain] & bit.value))

        if old_const['BadPixelsDark'] is not None:
            dataold = np.copy(old_const['BadPixelsDark'][:, :, :, gain])
            datau32old = dataold.astype(np.uint32)
            l_data_old.append(len(datau32old[datau32old>0].flatten()))
            for bit in bits:
                l_data_old.append(np.count_nonzero(old_const['BadPixelsDark'][:, :, :, gain] & bit.value))

        l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',
                       'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR', 'GAIN_THRESHOLDING_ERROR']

        l_threshold = ['', f'{thresholds_noise_sigma}' f'{thresholds_noise_hard[gain]}',
                       f'{thresholds_offset_sigma}' f'{thresholds_offset_hard[gain]}',
                       '', f'{thresholds_gain_sigma}']

        for i in range(len(l_data)):
            line = [f'{l_data_name[i]}, {gain_names[gain]} gain', l_threshold[i], l_data[i]]

            if old_const['BadPixelsDark'] is not None:
                line += [l_data_old[i]]
            else:
                line += ['-']

            table.append(line)
        table.append(['', '', '', ''])

 display(Markdown('''

 ### Number of bad pixels ###

 One pixel can be bad for different reasons, therefore, the sum of all types of bad pixels can be more than the number of all bad pixels.

 '''))
 if len(table)>0:
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                         headers=["Pixel type", "Threshold",
                                                  "New constant", "Old constant"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 for const in ['Offset', 'Noise', 'ThresholdsDark']:
    if const != 'ThresholdsDark':
        table = [['','High gain', 'High gain', 'Medium gain', 'Medium gain', 'Low gain', 'Low gain']]
    else:
        table = [['','HG-MG threshold', 'HG-MG threshold', 'MG-LG threshold', 'MG-LG threshold']]
    for qm in res.keys():

        data = np.copy(res[qm][const])
        if const == 'ThresholdsDark':
            data[...,0][res[qm]['BadPixelsDark'][...,0]>0] = np.nan
            data[...,1][res[qm]['BadPixelsDark'][...,1]>0] = np.nan
        else:
            data[res[qm]['BadPixelsDark']>0] = np.nan

        if old_const[const] is not None and old_const['BadPixelsDark'] is not None:
            dataold = np.copy(old_const[const])
            if const == 'ThresholdsDark':
                dataold[...,0][old_const['BadPixelsDark'][...,0]>0] = np.nan
                dataold[...,1][old_const['BadPixelsDark'][...,1]>0] = np.nan
            else:
                dataold[old_const['BadPixelsDark']>0] = np.nan

        f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
        n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

        for i, f in enumerate(f_list):
            line = [n_list[i]]
            for gain in range(3):
                # Compare only 3 threshold gain-maps
                if gain == 2 and const == 'ThresholdsDark':
                    continue
                line.append('{:6.1f}'.format(f(data[...,gain])))
                if old_const[const] is not None and old_const['BadPixelsDark'] is not None:
                    line.append('{:6.1f}'.format(f(dataold[...,gain])))
                else:
                    line.append('-')

            table.append(line)

    display(Markdown('### {} [ADU], good pixels only ###'.format(const)))
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:markdown id: tags:

 # AGIPD Characterize Dark Images #

 Author: S. Hauf, Version: 0.1

 The following code analyzes a set of dark images taken with the AGIPD detector to deduce detector offsets , noise, bad-pixel maps and thresholding. All four types of constants are evaluated per-pixel and per-memory cell. Data for the detector's three gain stages needs to be present, separated into separate runs.

 The evaluated calibration constants are stored locally and injected in the calibration data base.

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB" # The ipcluster profile to use
-in_folder = "/gpfs/exfel/d/raw/SPB/202030/p900138" # path to input data, required
-out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/AGIPDbad_sep64" # path to output to, required
+in_folder = "/gpfs/exfel/d/raw/DETLAB/202031/p900172/" # path to input data, required
+out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/miniHalfAGIPD" # path to output to, required
 sequences = [0] # sequence files to evaluate.
 modules = [-1]  # list of modules to evaluate, RANGE ALLOWED
-run_high = 167 # run number in which high gain data was recorded, required
-run_med = 168 # run number in which medium gain data was recorded, required
-run_low = 169 # run number in which low gain data was recorded, required
+run_high = 84 # run number in which high gain data was recorded, required
+run_med = 87 # run number in which medium gain data was recorded, required
+run_low = 88 # run number in which low gain data was recorded, required

-karabo_id = "SPB_DET_AGIPD1M-1" # karabo karabo_id
+karabo_id = "DETLAB_DET_AGIPD500K2G" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = '/INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
 h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information
 karabo_id_control = "SPB_IRU_AGIPD1M1" # karabo-id for control device '
 karabo_da_control = "AGIPD1MCTRL00" # karabo DA for control infromation

 use_dir_creation_date = True  # use dir creation date as data production reference date
 cal_db_interface = "tcp://max-exfl016:8020" # the database interface to use
 cal_db_timeout = 3000000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database

 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
-bias_voltage = 300 # detector bias voltage
+bias_voltage = 0 # detector bias voltage
 gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 interlaced = False # assume interlaced data format, for data prior to Dec. 2017
 rawversion = 2 # RAW file format version

 thresholds_offset_sigma = 3. # offset sigma thresholds for offset deduced bad pixels
 thresholds_offset_hard = [0, 0] # For setting the same threshold offset for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.
 thresholds_offset_hard_hg = [3000, 7000] # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_mg = [6000, 10000] # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_offset_hard_lg = [6000, 10000] # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels

 thresholds_noise_sigma = 5. # noise sigma thresholds for offset deduced bad pixels
 thresholds_noise_hard = [0, 0] # For setting the same threshold noise for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.
 thresholds_noise_hard_hg = [4, 20] # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_mg = [4, 20] # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
 thresholds_noise_hard_lg = [4, 20] # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels

 thresholds_gain_sigma = 5. # Gain separation sigma threshold

 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. ~7mins extra time for 64 memory cells
 ```

 %% Cell type:code id: tags:

 ``` python
 # imports and things that do not usually need to be changed
 from datetime import datetime
 import dateutil.parser
 import warnings
 warnings.filterwarnings('ignore')
 from collections import OrderedDict
 import os
+from typing import Tuple, List
 import h5py
 import numpy as np
 import matplotlib
 import tabulate

 matplotlib.use('agg')
 import matplotlib.pyplot as plt
 from IPython.display import display, Markdown, Latex
 %matplotlib inline

-from cal_tools.tools import (map_gain_stages, parse_runs,
-                             run_prop_seq_from_path, get_notebook_name,
-                             get_dir_creation_date, save_const_to_h5,
-                             get_random_db_interface, get_from_db)
+from cal_tools.tools import (get_from_db, get_dir_creation_date,
+                             get_notebook_name, get_random_db_interface,
+                             map_gain_stages, parse_runs,
+                             run_prop_seq_from_path, save_const_to_h5,
+                             send_to_db)
 from cal_tools.influx import InfluxLogger
 from cal_tools.enums import BadPixels
-from cal_tools.plotting import (show_overview, plot_badpix_3d,
-                                create_constant_overview,
-                                show_processed_modules)
-from cal_tools.agipdlib import get_gain_setting
+from cal_tools.plotting import (create_constant_overview,
+                                plot_badpix_3d, show_processed_modules,
+                                show_overview)
+from cal_tools.agipdlib import get_bias_voltage, get_gain_setting

 # make sure a cluster is running with ipcluster start --n=32, give it a while to start
 from ipyparallel import Client

 view = Client(profile=cluster_profile)[:]
 view.use_dill()

-from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions
+from iCalibrationDB import Constants, Conditions, Detectors, Versions

 gains = np.arange(3)

 IL_MODE = interlaced
 max_cells = mem_cells

 offset_runs = OrderedDict()
 offset_runs["high"] = run_high
 offset_runs["med"] = run_med
 offset_runs["low"] = run_low

 creation_time=None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)

 print(f"Using {creation_time} as creation time of constant.")

 run, prop, seq = run_prop_seq_from_path(in_folder)

 cal_db_interface = get_random_db_interface(cal_db_interface)
 print(f'Calibration database interface: {cal_db_interface}')

-loc = None
 instrument = karabo_id.split("_")[0]
+
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
-else:
+    nmods = 16
+elif instrument == "MID":
    dinstance = "AGIPD1M2"
+    nmods = 16
+# TODO: Remove DETLAB
+elif instrument == "HED" or instrument == "DETLAB":
+    dinstance = "AGIPD500K"
+    nmods = 8
+
+control_names = [f'{in_folder}/r{r:04d}/RAW-R{r:04d}-{karabo_da_control}-S00000.h5'
+                 for r in (run_high, run_med, run_low)]
+
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")

 logger = InfluxLogger(detector="AGIPD", instrument=instrument, mem_cells=mem_cells,
                      notebook=get_notebook_name(), proposal=prop)
 ```

 %% Cell type:code id: tags:

 ``` python
-gain_names = ['High', 'Medium', 'Low']
 runs = [run_high, run_med, run_low]

 if "{" in h5path_ctrl:
    h5path_ctrl = h5path_ctrl.format(karabo_id_control)

 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < dateutil.parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            # extract gain setting and validate that all runs have the same setting
            gsettings = []
            for r in runs:
                control_fname = '{}/r{:04d}/RAW-R{:04d}-{}-S00000.h5'.format(in_folder, r, r,
                                                                             karabo_da_control)
                gsettings.append(get_gain_setting(control_fname, h5path_ctrl))
            if not all(g == gsettings[0] for g in gsettings):
                raise ValueError(f"Different gain settings for the 3 input runs {gsettings}")
            gain_setting =  gsettings[0]
        except Exception as e:
            print(f'Error while reading gain setting from: \n{control_fname}')
            print(f'Error: {e}')
            if "component not found" in str(e):
                print("Gain setting is not found in the control information")
            print("Data will not be processed")
            sequences = []
 ```

 %% Cell type:code id: tags:

 ``` python
 if karabo_da[0] == '-1':
    if modules[0] == -1:
-        modules = list(range(16))
+        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)

+if bias_voltage == 0:
+    # Read the bias voltage from files, if recorded.
+    # If not available, make use of the historical voltage the detector is running at
+    bias_voltage = get_bias_voltage(control_names[0], karabo_id_control)
+    bias_voltage = bias_voltage if bias_voltage is not None else 300
+
 print("Parameters are:")
 print(f"Proposal: {prop}")
 print(f"Memory cells: {mem_cells}/{max_cells}")
 print("Runs: {}".format([ v for v in offset_runs.values()]))
 print(f"Sequences: {sequences}")
 print(f"Interlaced mode: {IL_MODE}")
 print(f"Using DB: {db_output}")
 print(f"Input: {in_folder}")
 print(f"Output: {out_folder}")
 print(f"Bias voltage: {bias_voltage}V")
 print(f"Gain setting: {gain_setting}")
 ```

 %% Cell type:markdown id: tags:

 The following lines will create a queue of files which will the be executed module-parallel. Distiguishing between different gains.

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 os.makedirs(out_folder, exist_ok=True)
 gmf = map_gain_stages(in_folder, offset_runs, path_template, karabo_da, sequences)
 gain_mapped_files, total_sequences, total_file_size = gmf
 print(f"Will process a total of {total_sequences} files.")
 ```

 %% Cell type:markdown id: tags:

 ## Calculate Offsets, Noise and Thresholds ##

 The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array.

 %% Cell type:code id: tags:

 ``` python
 import copy
 from functools import partial
-def characterize_module(il_mode, cells, bp_thresh, rawversion, loc, acq_rate,
-                        h5path, h5path_idx, inp):
+def characterize_module(il_mode: bool,
+                        cells: int,
+                        bp_thresh: Tuple[List[int], float, List[int], float],
+                        rawversion: int,
+                        loc: str,
+                        acq_rate: float,
+                        h5path: str,
+                        h5path_idx: str,
+                        control_names: List[str],
+                        karabo_id_control: str,
+                        inp: Tuple[str, int, int]) -> Tuple[np.array, np.array, np.array, np.array, int, np.array, int, float]:
    import numpy as np
    import copy
    import h5py
    from cal_tools.enums import BadPixels
    from cal_tools.agipdlib import get_num_cells, get_acq_rate

-    filename, channel, gg = inp
+    fast_data_filename, channel, gg = inp

    if cells == 0:
-        cells = get_num_cells(filename, loc, channel)
+        cells = get_num_cells(fast_data_filename, loc, channel)

    print(f"Using {cells} memory cells")

    if acq_rate == 0.:
-        acq_rate = get_acq_rate(filename, loc, channel)
+        slow_paths = control_names[gg], karabo_id_control
+        fast_paths = fast_data_filename, loc, channel
+        acq_rate = get_acq_rate(fast_paths, slow_paths)

    thresholds_offset, thresholds_offset_sigma, thresholds_noise, thresholds_noise_sigma = bp_thresh
    thresholds_offset_hard = thresholds_offset[gg]
    thresholds_noise_hard = thresholds_noise[gg]
-    infile = h5py.File(filename, "r", driver="core")

    h5path = h5path.format(channel)
    h5path_idx = h5path_idx.format(channel)

-    if rawversion == 2:
-        count = np.squeeze(infile[f"{h5path_idx}/count"])
-        first = np.squeeze(infile[f"{h5path_idx}/first"])
-        last_index = int(first[count != 0][-1]+count[count != 0][-1])
-        first_index = int(first[count != 0][0])
-    else:
-        status = np.squeeze(infile[f"{h5path_idx}/status"])
-        if np.count_nonzero(status != 0) == 0:
-            return
-        last = np.squeeze(infile[f"{h5path_idx}/last"])
-        first = np.squeeze(infile[f"{h5path_idx}/first"])
-        last_index = int(last[status != 0][-1]) + 1
-        first_index = int(first[status != 0][0])
-    im = np.array(infile[f"{h5path}/data"][first_index:last_index,...])
-    cellIds = np.squeeze(infile[f"{h5path}/cellId"][first_index:last_index,...])
-
-    infile.close()
+    with h5py.File(fast_data_filename, "r", driver="core") as infile:
+        if rawversion == 2:
+            count = np.squeeze(infile[f"{h5path_idx}/count"])
+            first = np.squeeze(infile[f"{h5path_idx}/first"])
+            last_index = int(first[count != 0][-1]+count[count != 0][-1])
+            first_index = int(first[count != 0][0])
+        else:
+            status = np.squeeze(infile[f"{h5path_idx}/status"])
+            if np.count_nonzero(status != 0) == 0:
+                return
+            last = np.squeeze(infile[f"{h5path_idx}/last"])
+            first = np.squeeze(infile[f"{h5path_idx}/first"])
+            last_index = int(last[status != 0][-1]) + 1
+            first_index = int(first[status != 0][0])
+        im = np.array(infile[f"{h5path}/data"][first_index:last_index,...])
+        cellIds = np.squeeze(infile[f"{h5path}/cellId"][first_index:last_index,...])

    if il_mode:
        ga = im[1::2, 0, ...]
        im = im[0::2, 0, ...].astype(np.float32)
        cellIds = cellIds[::2]
    else:
        ga = im[:, 1, ...]
        im = im[:, 0, ...].astype(np.float32)

    im = np.rollaxis(im, 2)
    im = np.rollaxis(im, 2, 1)

    ga = np.rollaxis(ga, 2)
    ga = np.rollaxis(ga, 2, 1)

    mcells = cells #max(cells, np.max(cellIds)+1)
    offset = np.zeros((im.shape[0], im.shape[1], mcells))
    gains = np.zeros((im.shape[0], im.shape[1], mcells))
    noise = np.zeros((im.shape[0], im.shape[1], mcells))
    gains_std = np.zeros((im.shape[0], im.shape[1], mcells))

    for cc in np.unique(cellIds[cellIds < mcells]):
        cellidx = cellIds == cc
        offset[...,cc] = np.median(im[..., cellidx], axis=2)
        noise[...,cc] = np.std(im[..., cellidx], axis=2)
        gains[...,cc] = np.median(ga[..., cellidx], axis=2)
        gains_std[...,cc] = np.std(ga[..., cellidx], axis=2)

    # bad pixels
    bp = np.zeros(offset.shape, np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0,1))
    offset_std = np.nanstd(offset, axis=(0,1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[(offset < thresholds_offset_hard[0]) | (
        offset > thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0,1))
    noise_std = np.nanstd(noise, axis=(0,1))
    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[(noise < thresholds_noise_hard[0]) | (noise > thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    return offset, noise, gains, gains_std, gg, bp, cells, acq_rate

 offset_g = OrderedDict()
 noise_g = OrderedDict()
 gain_g = OrderedDict()
 gainstd_g = OrderedDict()
 badpix_g = OrderedDict()
 gg = 0

 start = datetime.now()
 all_cells = []
 all_acq_rate = []

 if thresholds_offset_hard == [0, 0]:
    thresholds_offset_hard = [thresholds_offset_hard_hg, thresholds_offset_hard_mg, thresholds_offset_hard_lg]
 else:
    thresholds_offset_hard = [thresholds_offset_hard] * 3

 if thresholds_noise_hard == [0, 0]:
    thresholds_noise_hard = [thresholds_noise_hard_hg, thresholds_noise_hard_mg, thresholds_noise_hard_lg]
 else:
    thresholds_noise_hard = [thresholds_noise_hard] * 3


 inp = []
 for gain, mapped_files in gain_mapped_files.items():
    dones = []
    for i in modules:
        qm = f"Q{i//4+1}M{i%4+1}"
        if qm in mapped_files and not mapped_files[qm].empty():
            fname_in = mapped_files[qm].get()
            print("Process file: ", fname_in)
            dones.append(mapped_files[qm].empty())
        else:
            continue
        inp.append((fname_in, i, gg))

    gg += 1

 p = partial(characterize_module, IL_MODE, max_cells,
           (thresholds_offset_hard, thresholds_offset_sigma,
            thresholds_noise_hard, thresholds_noise_sigma),
-            rawversion, karabo_id, acq_rate, h5path, h5path_idx)
+            rawversion, karabo_id, acq_rate, h5path, h5path_idx,
+           control_names, karabo_id_control)

 # Don't remove. Used for Debugging.
 #results = list(map(p, inp))
 results = view.map_sync(p, inp)

 for ii, r in enumerate(results):
    offset, noise, gains, gains_std, gg, bp, thiscell, thisacq = r
    all_cells.append(thiscell)
    all_acq_rate.append(thisacq)
    for i in modules:
        qm = f"Q{i//4+1}M{i%4+1}"
        if qm not in offset_g:
            offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2], 3))
            noise_g[qm] = np.zeros_like(offset_g[qm])
            gain_g[qm] = np.zeros_like(offset_g[qm])
            gainstd_g[qm] = np.zeros_like(offset_g[qm])
            badpix_g[qm] = np.zeros_like(offset_g[qm], np.uint32)

        offset_g[qm][...,gg] = offset
        noise_g[qm][...,gg] = noise
        gain_g[qm][...,gg] = gains
        gainstd_g[qm][..., gg] = gains_std
        badpix_g[qm][...,gg] = bp


 duration = (datetime.now() - start).total_seconds()
 logger.runtime_summary_entry(success=True, runtime=duration,
                             total_sequences=total_sequences,
                             filesize=total_file_size)
 logger.send()
 max_cells = np.max(all_cells)
 print(f"Using {max_cells} memory cells")
 acq_rate = np.max(all_acq_rate)
 print(f"Using {acq_rate} MHz acquisition rate")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Add a badpixel due to bad gain separation
 for g in range(2):
    # Bad pixels during bad gain separation.
    # Fraction of pixels in the module with separation lower than "thresholds_gain_sigma".
    bad_sep = (gain_g[qm][..., g+1] - gain_g[qm][..., g]) / np.sqrt(gainstd_g[qm][..., g+1]**2 + gainstd_g[qm][..., g]**2)
    badpix_g[qm][...,g+1][(bad_sep)<thresholds_gain_sigma]|= BadPixels.GAIN_THRESHOLDING_ERROR.value
 ```

 %% Cell type:markdown id: tags:

 The thresholds for gain switching are then defined as the mean value between in individual gain bit levels. Note that these thresholds need to be refined with charge induced thresholds, as the two are not the same.

 %% Cell type:code id: tags:

 ``` python
 thresholds_g = {}
 for qm in gain_g.keys():
    thresholds_g[qm] = np.zeros((gain_g[qm].shape[0], gain_g[qm].shape[1], gain_g[qm].shape[2], 5))
    thresholds_g[qm][...,0] = (gain_g[qm][...,1]+gain_g[qm][...,0])/2
    thresholds_g[qm][...,1] = (gain_g[qm][...,2]+gain_g[qm][...,1])/2
    for i in range(3):
        thresholds_g[qm][...,2+i] = gain_g[qm][...,i]
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()
 for i in modules:
    qm = "Q{}M{}".format(i//4+1, i%4+1)
    res[qm] = {'Offset': offset_g[qm],
               'Noise': noise_g[qm],
               'ThresholdsDark': thresholds_g[qm],
               'BadPixelsDark': badpix_g[qm]
               }
+```

-if local_output:
-    for qm in offset_g.keys():
-        ofile = "{}/agipd_offset_store_{}_{}.h5".format(out_folder,
-                                                        "{}-{}-{}".format(*offset_runs.values()),
-                                                        qm)
-        store_file = h5py.File(ofile, "w")
-        store_file["{}/Offset/0/data".format(qm)] = offset_g[qm]
-        store_file["{}/Noise/0/data".format(qm)] = noise_g[qm]
-        store_file["{}/Threshold/0/data".format(qm)] = thresholds_g[qm]
-        store_file["{}/BadPixels/0/data".format(qm)] = badpix_g[qm]
-        store_file.close()
+%% Cell type:code id: tags:
+
+``` python
+proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
+file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_low, run_med, run_high)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve existing constants for comparison
 clist = ["Offset", "Noise", "ThresholdsDark", "BadPixelsDark"]
 old_const = {}
 old_mdata = {}
 detinst = getattr(Detectors, dinstance)

 print('Retrieve pre-existing constants for comparison.')

 for qm in res:
    for const in res[qm]:
-        metadata = ConstantMetaData()
        dconst = getattr(Constants.AGIPD, const)()
        dconst.data = res[qm][const]
-        metadata.calibration_constant = dconst

        # Setting conditions
        condition = Conditions.Dark.AGIPD(memory_cells=max_cells,
                                          bias_voltage=bias_voltage,
                                          acquisition_rate=acq_rate,
                                          gain_setting=gain_setting)
        device = getattr(detinst, qm)
        data, mdata = get_from_db(device,
                                  getattr(Constants.AGIPD, const)(),
                                  condition,
                                  None,
                                  cal_db_interface, creation_time=creation_time,
                                  verbosity=2, timeout=cal_db_timeout)

        old_const[const] = data
        if mdata is not None and data is not None:
            time = mdata.calibration_constant_version.begin_at
            old_mdata[const] = time.isoformat()
            os.makedirs('{}/old/'.format(out_folder), exist_ok=True)
-            save_const_to_h5(mdata, '{}/old/'.format(out_folder))
+            save_const_to_h5(device,
+                             getattr(Constants.AGIPD, const)(),
+                             condition, data, file_loc, creation_time,
+                             f'{out_folder}/old/')
        else:
            old_mdata[const] = "Not found"
 ```

 %% Cell type:code id: tags:

 ``` python
-proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
-file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_low, run_med, run_high)
-```
-
-%% Cell type:code id: tags:
-
-``` python
+md = None
 for qm in res:

    for const in res[qm]:
-        metadata = ConstantMetaData()
        dconst = getattr(Constants.AGIPD, const)()
        dconst.data = res[qm][const]
-        metadata.calibration_constant = dconst

        # set the operating condition
        condition = Conditions.Dark.AGIPD(memory_cells=max_cells,
                                          bias_voltage=bias_voltage,
                                          acquisition_rate=acq_rate,
                                          gain_setting=gain_setting)
        detinst = getattr(Detectors, dinstance)
        device = getattr(detinst, qm)

-        metadata.detector_condition = condition
-
-        # specify the a version for this constant
-        if creation_time is None:
-            metadata.calibration_constant_version = Versions.Now(device=device)
-        else:
-            metadata.calibration_constant_version = Versions.Timespan(device=device,
-                                                                      start=creation_time)
-
-        metadata.calibration_constant_version.raw_data_location = file_loc
-
        if db_output:
-            try:
-                metadata.send(cal_db_interface, timeout=cal_db_timeout)
-                print(f'Const {const} for module {qm} was injected to the calibration DB. '
-                      f'Begin at: {metadata.calibration_constant_version.begin_at}')
-            except Exception as e:
-                print("Error:", e)
+            md = send_to_db(device, dconst, condition, file_loc,
+                            cal_db_interface, creation_time=creation_time, timeout=cal_db_timeout)

        if local_output:
-            save_const_to_h5(metadata, out_folder)
+            md = save_const_to_h5(device, dconst, condition, dconst.data, file_loc, creation_time, out_folder)
            print(f"Calibration constant {const} is stored locally.\n")

-    print("Generated constants with conditions:\n")
+    print("Constants parameter conditions are:\n")
    print(f"• memory_cells: {max_cells}\n• bias_voltage: {bias_voltage}\n"
          f"• acquisition_rate: {acq_rate}\n• gain_setting: {gain_setting}\n"
-          f"• creation_time: {creation_time}\n")
+          f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:code id: tags:

 ``` python
-mnames=[]
-for i in modules:
-    qm = f"Q{i//4+1}M{i % 4+1}"
-    mnames.append(qm)
-    display(Markdown(f'## Position of the module {qm} and it\'s ASICs##'))
-show_processed_modules(dinstance, constants=None, mnames=mnames, mode="position")
+# TODO: add show_processed_modules diagram for Mini-Half AGIPD
+if dinstance != "AGIPD500K":
+    mnames=[]
+    for i in modules:
+        qm = f"Q{i//4+1}M{i % 4+1}"
+        mnames.append(qm)
+        display(Markdown(f'## Position of the module {qm} and it\'s ASICs##'))
+    show_processed_modules(dinstance, constants=None, mnames=mnames, mode="position")
 ```

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:markdown id: tags:

 ### High Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 0
-out_folder = None
-show_overview(res, cell, gain, out_folder=out_folder, infix="{}-{}-{}".format(*offset_runs.values()))
+show_overview(res, cell, gain, infix="{}-{}-{}".format(*offset_runs.values()))
 ```

 %% Cell type:markdown id: tags:

 ### Medium Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 1
-show_overview(res, cell, gain, out_folder=out_folder, infix="{}-{}-{}".format(*offset_runs.values()))
+show_overview(res, cell, gain, infix="{}-{}-{}".format(*offset_runs.values()))
 ```

 %% Cell type:markdown id: tags:

 ### Low Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 2
-show_overview(res, cell, gain, out_folder=out_folder, infix="{}-{}-{}".format(*offset_runs.values()))
+show_overview(res, cell, gain, infix="{}-{}-{}".format(*offset_runs.values()))
 ```

 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.NOISE_OUT_OF_THRESHOLD.value: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR.value: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.GAIN_THRESHOLDING_ERROR.value: (BadPixels.GAIN_THRESHOLDING_ERROR.name, '#FF40FF40'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value | BadPixels.NOISE_OUT_OF_THRESHOLD.value: ('OFFSET_OUT_OF_THRESHOLD + NOISE_OUT_OF_THRESHOLD', '#DD00DD80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value | BadPixels.NOISE_OUT_OF_THRESHOLD.value |
        BadPixels.GAIN_THRESHOLDING_ERROR.value: ('MIXED', '#BFDF009F')}

 if high_res_badpix_3d:
    display(Markdown("""

    ## Global Bad Pixel Behaviour ##

    The following plots show the results of bad pixel evaluation for all evaluated memory cells.
    Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2.
    This excludes single bad pixels present only in disconnected pixels.
    Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
    Colors encode the bad pixel type, or mixed type.

    """))

    gnames = ['High Gain', 'Medium Gain', 'Low Gain']
    for gain in range(3):
        display(Markdown(f'### {gnames[gain]} ###'))
        for mod, data in badpix_g.items():
            plot_badpix_3d(data[...,gain], cols, title=mod, rebin_fac=1)
            plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Aggregate values, and per Cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior.

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(offset_g, "Offset (ADU)", max_cells, 4000, 8000,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(noise_g, "Noise (ADU)", max_cells, 0, 100,
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 # Plot only three gain threshold maps.
 bp_thresh = OrderedDict()
 for mod, con in badpix_g.items():
    bp_thresh[mod] = np.zeros((con.shape[0], con.shape[1], con.shape[2], 5), dtype=con.dtype)
    bp_thresh[mod][...,:2] = con[...,:2]
    bp_thresh[mod][...,2:] = con


 create_constant_overview(thresholds_g, "Threshold (ADU)", max_cells, 4000, 10000, 5,
                         badpixels=[bp_thresh, np.nan],
                         gmap=['HG-MG Threshold', 'MG-LG Threshold', 'High gain', 'Medium gain', 'low gain'],
                         marker=['d','d','','','']
                         )
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixel_aggregate_g = OrderedDict()
 for m, d in badpix_g.items():
    bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)
 create_constant_overview(bad_pixel_aggregate_g, "Bad pixel fraction", max_cells, 0, 0.10, 3)
 ```

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 table = []
 gain_names = ['High', 'Medium', 'Low']
 bits = [BadPixels.NOISE_OUT_OF_THRESHOLD, BadPixels.OFFSET_OUT_OF_THRESHOLD, BadPixels.OFFSET_NOISE_EVAL_ERROR, BadPixels.GAIN_THRESHOLDING_ERROR]
 for qm in badpix_g.keys():
    for gain in range(3):

        l_data = []
        l_data_old = []

        data = np.copy(badpix_g[qm][:,:,:,gain])
        datau32 = data.astype(np.uint32)
        l_data.append(len(datau32[datau32>0].flatten()))
        for bit in bits:
            l_data.append(np.count_nonzero(badpix_g[qm][:,:,:,gain] & bit.value))

        if old_const['BadPixelsDark'] is not None:
            dataold = np.copy(old_const['BadPixelsDark'][:, :, :, gain])
            datau32old = dataold.astype(np.uint32)
            l_data_old.append(len(datau32old[datau32old>0].flatten()))
            for bit in bits:
                l_data_old.append(np.count_nonzero(old_const['BadPixelsDark'][:, :, :, gain] & bit.value))

        l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',
                       'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR', 'GAIN_THRESHOLDING_ERROR']

        l_threshold = ['', f'{thresholds_noise_sigma}' f'{thresholds_noise_hard[gain]}',
                       f'{thresholds_offset_sigma}' f'{thresholds_offset_hard[gain]}',
                       '', f'{thresholds_gain_sigma}']

        for i in range(len(l_data)):
            line = [f'{l_data_name[i]}, {gain_names[gain]} gain', l_threshold[i], l_data[i]]

            if old_const['BadPixelsDark'] is not None:
                line += [l_data_old[i]]
            else:
                line += ['-']

            table.append(line)
        table.append(['', '', '', ''])

 display(Markdown('''

 ### Number of bad pixels ###

 One pixel can be bad for different reasons, therefore, the sum of all types of bad pixels can be more than the number of all bad pixels.

 '''))
 if len(table)>0:
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                         headers=["Pixel type", "Threshold",
                                                  "New constant", "Old constant"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 for const in ['Offset', 'Noise', 'ThresholdsDark']:
    if const != 'ThresholdsDark':
        table = [['','High gain', 'High gain', 'Medium gain', 'Medium gain', 'Low gain', 'Low gain']]
    else:
        table = [['','HG-MG threshold', 'HG-MG threshold', 'MG-LG threshold', 'MG-LG threshold']]
    for qm in res.keys():

        data = np.copy(res[qm][const])
        if const == 'ThresholdsDark':
            data[...,0][res[qm]['BadPixelsDark'][...,0]>0] = np.nan
            data[...,1][res[qm]['BadPixelsDark'][...,1]>0] = np.nan
        else:
            data[res[qm]['BadPixelsDark']>0] = np.nan

        if old_const[const] is not None and old_const['BadPixelsDark'] is not None:
            dataold = np.copy(old_const[const])
            if const == 'ThresholdsDark':
                dataold[...,0][old_const['BadPixelsDark'][...,0]>0] = np.nan
                dataold[...,1][old_const['BadPixelsDark'][...,1]>0] = np.nan
            else:
                dataold[old_const['BadPixelsDark']>0] = np.nan

        f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
        n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

        for i, f in enumerate(f_list):
            line = [n_list[i]]
            for gain in range(3):
                # Compare only 3 threshold gain-maps
                if gain == 2 and const == 'ThresholdsDark':
                    continue
                line.append('{:6.1f}'.format(f(data[...,gain])))
                if old_const[const] is not None and old_const['BadPixelsDark'] is not None:
                    line.append('{:6.1f}'.format(f(dataold[...,gain])))
                else:
                    line.append('-')

            table.append(line)

    display(Markdown('### {} [ADU], good pixels only ###'.format(const)))
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

--- a/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_NBC.ipynb
+++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Gain Characterization (Flat Fields) #

 The following code characterizes the gain of the AGIPD detector from flat field data, i.e. data with X-rays of defined intensity. This data should fullfil the following requirements:

 * intensity should be such that single photon peaks are visible
 * data for all modules should be present
 * no shadowing should occur on any of the modules
 * each pixel should have at minimum arround 100 events per photon peak per memory cell
 * if central beam edges are visible, they should not be significantly more intense

 Characterization is done by a weighted average algorithm, which evaluates the peak locations for all pixels
 and memory cells of a given module. These locations are then fitted to a linear function of the average peak
 location in each module, such that it yield a relative gain correction.

 %% Cell type:code id: tags:

 ``` python
 # the following lines should be adjusted depending on data
 in_folder = '/gpfs/exfel/exp/MID/201931/p900091/raw/' # path to input data, required
 modules = [3] # modules to work on, required, range allowed
 out_folder = "/gpfs/exfel/exp/MID/201931/p900091/usr/FF/2.2" # path to output to, required
 runs = [484, 485,] # runs to use, required, range allowed
 sequences = [0,1,2,3]#,4,5,6,7,8] #,5,6,7,8,9,10] # sequences files to use, range allowed
 cluster_profile = "noDB" # The ipcluster profile to use
 local_output = True # output constants locally
 db_output = False # output constants to database
 bias_voltage = 300 # detector bias voltage
 cal_db_interface = "tcp://max-exfl016:8026#8035"  # the database interface to use
 mem_cells = 0  # number of memory cells used
 interlaced = False # assume interlaced data format, for data prior to Dec. 2017
 fit_hook = True # fit a hook function to medium gain slope
 rawversion = 2 # RAW file format version
 instrument = "MID"
 photon_energy = 9.2 # the photon energy in keV
 offset_store = "" # for file-baed access
 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h
 db_input = True # retreive data from calibration database, setting offset-store will overwrite this
 deviation_threshold = 0.75 # peaks with an absolute location deviation larger than the medium are are considere bad
 acqrate = 0. # acquisition rate
 use_dir_creation_date = True
 creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC.ms e.g. 2019-07-04 11:02:41.00
 gain_setting = 0.1 # gain setting can have value 0 or 1, Default=0.1 for no (None) gain-setting
 karabo_da_control = "AGIPD1MCTRL00" # karabo DA for control infromation
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information
 ```

 %% Cell type:code id: tags:

 ``` python
 # std library imports
 from datetime import datetime
 import dateutil
 from functools import partial
 import warnings
 warnings.filterwarnings('ignore')
 import os

 import h5py
 # numpy and matplot lib specific
 import numpy as np
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
 %matplotlib inline

 # parallel processing via ipcluster
 # make sure a cluster is running with ipcluster start --n=32, give it a while to start
 from ipyparallel import Client

 client = Client(profile=cluster_profile)
 view = client[:]
 view.use_dill()

 # pyDetLib imports
 import XFELDetAna.xfelpycaltools as xcal
 import XFELDetAna.xfelpyanatools as xana
 from XFELDetAna.util import env
 env.iprofile = cluster_profile
 profile = cluster_profile

 from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions
 from cal_tools.agipdlib import get_num_cells, get_acq_rate, get_gain_setting
 from cal_tools.enums import BadPixels
 from cal_tools.influx import InfluxLogger
 from cal_tools.plotting import show_overview, plot_badpix_3d
 from cal_tools.tools import gain_map_files, parse_runs, run_prop_seq_from_path, get_notebook_name, get_dir_creation_date, get_random_db_interface
 ```

 %% Cell type:code id: tags:

 ``` python
 # usually no need to change these lines
 sensor_size = [128, 512]
 block_size = [128, 512]
 QUADRANTS = 4
 MODULES_PER_QUAD = 4
 DET_FILE_INSET = "AGIPD"

 # Define constant creation time.
 if creation_time:
    try:
        creation_time = datetime.strptime(creation_time, '%Y-%m-%d %H:%M:%S.%f')
    except Exception as e:
        print(f"creation_time value error: {e}."
               "Use same format as YYYY-MM-DD HR:MN:SC.ms e.g. 2019-07-04 11:02:41.00/n")
        creation_time = None
        print("Given creation time wont be used.")
 else:
    creation_time = None

 if not creation_time and use_dir_creation_date:
    ntries = 100
    while ntries > 0:
        try:
            creation_time = get_dir_creation_date(in_folder, runs[0])
            break
        except OSError:
            pass
        ntries -= 1

 print("Using {} as creation time".format(creation_time))

 runs = parse_runs(runs)

 if offset_store != "":
    db_input = False
 else:
    db_input = True

 limit_trains = 20
 limit_trains_eval = None

 print("Parameters are:")

 print("Runs: {}".format(runs))
 print("Modules: {}".format(modules))
 print("Sequences: {}".format(sequences))
 print("Using DB: {}".format(db_output))


 if instrument == "SPB":
    loc = "SPB_DET_AGIPD1M-1"
    dinstance = "AGIPD1M1"
    karabo_id_control = "SPB_IRU_AGIPD1M1"
 else:
    loc = "MID_DET_AGIPD1M-1"
    dinstance = "AGIPD1M2"
    karabo_id_control = "MID_EXP_AGIPD1M1"

 cal_db_interface = get_random_db_interface(cal_db_interface)

 # these lines can usually stay as is
 fbase = "{}/{{}}/RAW-{{}}-AGIPD{{:02d}}-S{{:05d}}.h5".format(in_folder)
 gains = np.arange(3)


 run, prop, seq = run_prop_seq_from_path(in_folder)
 logger = InfluxLogger(detector="AGIPD", instrument=instrument, mem_cells=mem_cells,
                      notebook=get_notebook_name(), proposal=prop)

 # extract the memory cells and acquisition rate from
 # the file of the first module, first sequence and first run
 channel = 0
 fname = fbase.format(runs[0], runs[0].upper(), channel, sequences[0])
 if acqrate == 0.:
-    acqrate = get_acq_rate(fname, loc, channel)
+    acqrate = get_acq_rate((fname, loc, channel))


 if mem_cells == 0:
    cells = get_num_cells(fname, loc, channel)
    mem_cells = cells  # avoid setting twice


 IL_MODE = interlaced
 max_cells = mem_cells if not interlaced else mem_cells*2
 memory_cells = mem_cells
 print("Interlaced mode: {}".format(IL_MODE))

 cells = np.arange(max_cells)

 print(f"Acquisition rate and memory cells set from file {fname} are "
      f"{acqrate} MHz and {max_cells}, respectively")
 ```

 %% Cell type:code id: tags:

 ``` python
 control_fname = f'{in_folder}/{runs[0]}/RAW-{runs[0].upper()}-{karabo_da_control}-S00000.h5'

 if "{" in h5path_ctrl:
    h5path_ctrl = h5path_ctrl.format(karabo_id_control)

 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < dateutil.parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
        except Exception as e:
            print(f'Error while reading gain setting from: \n{control_fname}')
            print(e)
            print("Gain setting is not found in the control information")
            print("Data will not be processed")
            sequences = []
 print(f"Gain setting: {gain_setting}")
 ```

 %% Cell type:markdown id: tags:

 For the characterization offset maps for each module are needed. The are read in either locally or through the XFEL calibration database.

 %% Cell type:code id: tags:

 ``` python
 from dateutil import parser
 offset_g = {}
 noise_g = {}
 thresholds_g = {}
 pc_g = {}
 if not db_input:
    print("Offset, noise and thresholds have been read in from: {}".format(offset_store))
    store_file = h5py.File(offset_store, "r")
    for i in modules:
        qm = "Q{}M{}".format(i//4+1, i%4+1)
        offset_g[qm] = np.array(store_file["{}/Offset/0/data".format(qm)])
        noise_g[qm] = np.array(store_file["{}/Noise/0/data".format(qm)])
        thresholds_g[qm] = np.array(store_file["{}/Threshold/0/data".format(qm)])
    store_file.close()
 else:
    print("Offset, noise and thresholds have been read in from calibration database:")
    first_ex = True
    for i in modules:
        qm = "Q{}M{}".format(i//4+1, i%4+1)
        metadata = ConstantMetaData()
        offset = Constants.AGIPD.Offset()
        metadata.calibration_constant = offset
        det = getattr(Detectors, dinstance)

        # set the operating condition
        condition = Conditions.Dark.AGIPD(memory_cells=max_cells, bias_voltage=bias_voltage,
                                          acquisition_rate=acqrate, gain_setting=gain_setting)
        metadata.detector_condition = condition

        # specify the a version for this constant
        if creation_time is None:
            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
        else:
            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
        metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=300000)
        offset_g[qm] = offset.data
        if first_ex:
            print("Offset map was injected on: {}".format(metadata.calibration_constant_version.begin_at))

        metadata = ConstantMetaData()
        noise = Constants.AGIPD.Noise()
        metadata.calibration_constant = noise

        # set the operating condition
        condition = Conditions.Dark.AGIPD(memory_cells=max_cells, bias_voltage=bias_voltage,
                                          acquisition_rate=acqrate, gain_setting=gain_setting)
        metadata.detector_condition = condition

        # specify the a version for this constant
        if creation_time is None:
            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
        else:
            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
        metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=300000)
        noise_g[qm] = noise.data
        if first_ex:
            print("Noise map was injected on: {}".format(metadata.calibration_constant_version.begin_at))

        metadata = ConstantMetaData()
        thresholds = Constants.AGIPD.ThresholdsDark()
        metadata.calibration_constant = thresholds

        # set the operating condition
        condition = Conditions.Dark.AGIPD(memory_cells=max_cells, bias_voltage=bias_voltage,
                                          acquisition_rate=acqrate, gain_setting=gain_setting)
        metadata.detector_condition = condition

        # specify the a version for this constant
        if creation_time is None:
            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
        else:
            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
        metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=300000)
        thresholds_g[qm] = thresholds.data
        if first_ex:
            print("Threshold map was injected on: {}".format(metadata.calibration_constant_version.begin_at))


        metadata = ConstantMetaData()
        pc = Constants.AGIPD.SlopesPC()
        metadata.calibration_constant = pc

        # set the operating condition
        condition = Conditions.Dark.AGIPD(memory_cells=max_cells, bias_voltage=bias_voltage,
                                          acquisition_rate=acqrate, gain_setting=gain_setting)
        metadata.detector_condition = condition

        # specify the a version for this constant
        if creation_time is None:
            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
        else:
            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
        metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=300000)
        pc_g[qm] = np.nanmedian(pc.data[0,...])/pc.data
        if first_ex:
            print("PC map was injected on: {}".format(metadata.calibration_constant_version.begin_at))
            first_ex = False

 ```

 %% Cell type:markdown id: tags:

 ## Initial peak estimates ##

 First initial peak estimates need to be defined. This is done by inspecting histograms created from (a subset of) the offset-corrected data for peak locations and peak heights.

 %% Cell type:code id: tags:

 ``` python
 def hist_single_module(fbase, runs, sequences, sensor_size, memory_cells, block_size,
                       il_mode, limit_trains, profile, rawversion, instrument, inp):
    """ This function calculates a per-pixel histogram for a single module

    Runs and sequences give the data to calculate histogram from
    """
    channel, offset, thresholds, pc, noise = inp

    import XFELDetAna.xfelpycaltools as xcal
    import numpy as np
    import h5py
    from XFELDetAna.util import env


    env.iprofile = profile

    def baseline_correct_via_noise(d, noise, g, baseline_corr_noise_threshold=1000):
        """ Correct baseline shifts by evaluating position of the noise peak

        :param: d: the data to correct, should be a single image
        :param noise: the mean noise for the cell id of `d`
        :param g: gain array matching `d` array

        Correction is done by identifying the left-most (significant) peak
        in the histogram of `d` and shifting it to be centered on 0.
        This is done via a continous wavelet transform (CWT), using a Ricker
        wavelet.

        Only high gain data is evaulated, and data larger than 50 ADU,
        equivalent of roughly a 9 keV photon is ignored.

        Two passes are executed: the first shift is accurate to 10 ADU and
        will catch baseline shifts smaller then -2000 ADU. CWT is evaluated
        for peaks of widths one, three and five time the noise.
        The baseline is then shifted by the position of the summed CWT maximum.

        In a second pass histogram is evaluated within a range of
        +- 5*noise of the initial shift value, for peak of width noise.

        Baseline shifts larger than the maximum threshold or positive shifts
        larger 10 are discarded, and the original data is returned, otherwise
        the shift corrected data is returned.

        """
        import copy
        from scipy.signal import cwt, ricker
        # we work on a copy to be able to filter values by setting them to
        # np.nan
        dd = copy.copy(d)
        dd[g != 0] = np.nan  # only high gain data
        dd[dd > 50] = np.nan  # only noise data
        h, e = np.histogram(dd, bins=210, range=(-2000, 100))
        c = (e[1:] + e[:-1]) / 2

        try:
            cwtmatr = cwt(h, ricker, [noise, 3. * noise, 5. * noise])
        except:
            return d
        cwtall = np.sum(cwtmatr, axis=0)
        marg = np.argmax(cwtall)
        pc = c[marg]

        high_res_range = (dd > pc - 5 * noise) & (dd < pc + 5 * noise)
        dd[~high_res_range] = np.nan
        h, e = np.histogram(dd, bins=200,
                            range=(pc - 5 * noise, pc + 5 * noise))
        c = (e[1:] + e[:-1]) / 2
        try:
            cwtmatr = cwt(h, ricker, [noise, ])
        except:
            return d
        marg = np.argmax(cwtmatr)
        pc = c[marg]
        # too large shift to be easily decernable via noise
        if pc > 10 or pc < -baseline_corr_noise_threshold:
            return d
        return d - pc


    # function needs to be inline for parallell processing
    def read_fun(filename, channel):
        """ A reader function used by pyDetLib
        """
        infile = h5py.File(filename, "r", driver="core")
        if rawversion == 2:
            count = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)])
            first = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first".format(instrument, channel)])
            last_index = int(first[count != 0][-1]+count[count != 0][-1])
            first_index = int(first[count != 0][0])
        else:
            status = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)])
            if np.count_nonzero(status != 0) == 0:
                return
            last = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last".format(instrument, channel)])
            last_index = int(last[status != 0][-1])
            first_index = int(last[status != 0][0])
        if limit_trains is not None:
            last_index = min(limit_trains*memory_cells+first_index, last_index)

        im = np.array(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(instrument, channel)][first_index:last_index,...])
        carr = infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId".format(instrument, channel)][first_index:last_index]
        cells = np.squeeze(np.array(carr))
        infile.close()

        if il_mode:
            ga = im[1::2, 0, ...]
            im = im[0::2, 0, ...].astype(np.float32)
        else:
            ga = im[:, 1, ...]
            im = im[:, 0, ...].astype(np.float32)

        im = np.rollaxis(im, 2)
        im = np.rollaxis(im, 2, 1)

        ga = np.rollaxis(ga, 2)
        ga = np.rollaxis(ga, 2, 1)
        return im, ga, cells

    offset_cor = xcal.OffsetCorrection(sensor_size,
                                       offset,
                                       nCells=memory_cells,
                                       blockSize=block_size,
                                       gains=[0,1,2])
    offset_cor.mapper = offset_cor._view.map_sync
    offset_cor.debug() # force non-parallel processing since outer function will run concurrently
    hist_calc = xcal.HistogramCalculator(sensor_size,
                                         bins=4000,
                                         range=(-4000, 8000),
                                         blockSize=block_size)
    hist_calc.mapper = hist_calc._view.map_sync
    hist_calc.debug() # force non-parallel processing since outer function will run concurrently
    for run in runs:
        for seq in sequences:
            fname = fbase.format(run, run.upper(), channel, seq)
            d, ga, c = read_fun(fname, channel)
            vidx = (c < offset.shape[2]) & (c < thresholds.shape[2])
            c = c[vidx]
            d = d[:,:,vidx]
            ga = ga[:,:,vidx]
            # we need to do proper gain thresholding
            g = np.zeros(ga.shape, np.uint8)
            g[...] = 2

            g[ga < thresholds[...,c,1]] = 1
            g[ga < thresholds[...,c,0]] = 0
            d = offset_cor.correct(d, cellTable=c, gainMap=g)
            for i in range(d.shape[2]):
                mn_noise = np.nanmean(noise[...,c[i]])
                d[...,i] = baseline_correct_via_noise(d[...,i],
                                                      mn_noise,
                                                      g[..., i])

            d *= np.moveaxis(pc[c,...], 0, 2)

            hist_calc.fill(d)
    h, e, c, _ = hist_calc.get()
    return h, e, c

 inp = []
 for i in modules:
    qm = "Q{}M{}".format(i//4+1, i%4+1)
    inp.append((i, offset_g[qm], thresholds_g[qm], pc_g[qm][0,...], noise_g[qm][...,0]))

 p = partial(hist_single_module, fbase, runs, sequences,
            sensor_size, memory_cells, block_size, IL_MODE, limit_trains, profile, rawversion, instrument)
 res_uncorr = list(map(p, inp))
 ```

 %% Cell type:code id: tags:

 ``` python
 d = []
 qms = []
 for i, r in enumerate(res_uncorr):
    ii = list(modules)[i]
    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
    qms.append(qm)
    h, e, c = r
    d.append({
            'x': c,
            'y': h,
            'drawstyle': 'steps-mid'
        })

 fig = xana.simplePlot(d, y_log=False,
                      figsize="2col",
                      aspect=2,
                      x_range=(-50, 500),
                      x_label="Intensity (ADU)",
                      y_label="Counts")

 fig.savefig("{}/FF_module_{}_peak_pos.png".format(out_folder, ",".join(qms)))
 ```

 %% Cell type:code id: tags:

 ``` python
 # these should be quite stable

 peak_estimates = [0, 55, 105, 155]
 peak_heights = [5e7, 5e6, 1e6, 5e5]
 peak_sigma = [5., 5.,5., 5.,]

 print("Using the following peak estimates: {}".format(peak_estimates))
 ```

 %% Cell type:markdown id: tags:

 ## Calculate relative gain per module ##

 Using the so obtained estimates, we calculate the relative gain per module. For this we use the weighted average method implemented in pyDetLib.

 %% Cell type:code id: tags:

 ``` python
 block_size = [64, 64]
 def relgain_single_module(fbase, runs, sequences, peak_estimates,
                          peak_heights, peak_sigma, memory_cells, sensor_size,
                          block_size, il_mode, profile, limit_trains_eval, rawversion, instrument, inp):
    """ A function for calculated the relative gain of a single AGIPD module
    """

    # import needed inline for parallel processing
    import XFELDetAna.xfelpycaltools as xcal
    import numpy as np
    import h5py
    from XFELDetAna.util import env
    env.iprofile = profile

    channel, offset, thresholds, noise, pc = inp

    def baseline_correct_via_noise(d, noise, g, baseline_corr_noise_threshold=1000):
        """ Correct baseline shifts by evaluating position of the noise peak

        :param: d: the data to correct, should be a single image
        :param noise: the mean noise for the cell id of `d`
        :param g: gain array matching `d` array

        Correction is done by identifying the left-most (significant) peak
        in the histogram of `d` and shifting it to be centered on 0.
        This is done via a continous wavelet transform (CWT), using a Ricker
        wavelet.

        Only high gain data is evaulated, and data larger than 50 ADU,
        equivalent of roughly a 9 keV photon is ignored.

        Two passes are executed: the first shift is accurate to 10 ADU and
        will catch baseline shifts smaller then -2000 ADU. CWT is evaluated
        for peaks of widths one, three and five time the noise.
        The baseline is then shifted by the position of the summed CWT maximum.

        In a second pass histogram is evaluated within a range of
        +- 5*noise of the initial shift value, for peak of width noise.

        Baseline shifts larger than the maximum threshold or positive shifts
        larger 10 are discarded, and the original data is returned, otherwise
        the shift corrected data is returned.

        """
        import copy
        from scipy.signal import cwt, ricker
        # we work on a copy to be able to filter values by setting them to
        # np.nan
        dd = copy.copy(d)
        dd[g != 0] = np.nan  # only high gain data
        dd[dd > 50] = np.nan  # only noise data
        h, e = np.histogram(dd, bins=210, range=(-2000, 100))
        c = (e[1:] + e[:-1]) / 2

        try:
            cwtmatr = cwt(h, ricker, [noise, 3. * noise, 5. * noise])
        except:
            return d
        cwtall = np.sum(cwtmatr, axis=0)
        marg = np.argmax(cwtall)
        pc = c[marg]

        high_res_range = (dd > pc - 5 * noise) & (dd < pc + 5 * noise)
        dd[~high_res_range] = np.nan
        h, e = np.histogram(dd, bins=200,
                            range=(pc - 5 * noise, pc + 5 * noise))
        c = (e[1:] + e[:-1]) / 2
        try:
            cwtmatr = cwt(h, ricker, [noise, ])
        except:
            return d
        marg = np.argmax(cwtmatr)
        pc = c[marg]
        # too large shift to be easily decernable via noise
        if pc > 10 or pc < -baseline_corr_noise_threshold:
            return d
        return d - pc


    # function needs to be inline for parallell processing
    def read_fun(filename, channel):
        """ A reader function used by pyDetLib
        """
        infile = h5py.File(filename, "r", driver="core")
        if rawversion == 2:
            count = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)])
            first = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first".format(instrument, channel)])
            last_index = int(first[count != 0][-1]+count[count != 0][-1])
            first_index = int(first[count != 0][0])
        else:
            status = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)])
            if np.count_nonzero(status != 0) == 0:
                return
            last = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last".format(instrument, channel)])
            last_index = int(last[status != 0][-1])
            first_index = int(last[status != 0][0])
        if limit_trains is not None:
            last_index = min(limit_trains*memory_cells+first_index, last_index)
        im = np.array(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(instrument, channel)][first_index:last_index,...])
        carr = infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId".format(instrument, channel)][first_index:last_index]
        cells = np.squeeze(np.array(carr))
        infile.close()

        if il_mode:
            ga = im[1::2, 0, ...]
            im = im[0::2, 0, ...].astype(np.float32)
        else:
            ga = im[:, 1, ...]
            im = im[:, 0, ...].astype(np.float32)

        im = np.rollaxis(im, 2)
        im = np.rollaxis(im, 2, 1)

        ga = np.rollaxis(ga, 2)
        ga = np.rollaxis(ga, 2, 1)
        return im, ga, cells

    offset_cor = xcal.OffsetCorrection(sensor_size, offset, nCells=memory_cells,
                                       blockSize=block_size, gains=[0,1,2])
    offset_cor.mapper = offset_cor._view.map_sync

    rel_gain = xcal.GainMapCalculator(sensor_size,
                                      peak_estimates,
                                      peak_sigma,
                                      nCells=memory_cells,
                                      peakHeights = peak_heights,
                                      noiseMap=noise,
                                      deviationType="relative")
    rel_gain.mapper = rel_gain._view.map_sync
    for run in runs:
        for seq in sequences:
            fname = fbase.format(run, run.upper(), channel, seq)
            d, ga, c = read_fun(fname, channel)
            # we need to do proper gain thresholding
            vidx = (c < offset.shape[2]) & (c < thresholds.shape[2])
            c = c[vidx]
            d = d[:,:,vidx]
            ga = ga[:,:,vidx]
            # we need to do proper gain thresholding
            g = np.zeros(ga.shape, np.uint8)
            g[...] = 2

            g[ga < thresholds[...,c,1]] = 1
            g[ga < thresholds[...,c,0]] = 0
            d = offset_cor.correct(d, cellTable=c, gainMap=g)

            for i in range(d.shape[2]):
                mn_noise = np.nanmean(noise[...,c[i]])
                d[...,i] = baseline_correct_via_noise(d[...,i],
                                                      mn_noise,
                                                      g[..., i])

            d *= np.moveaxis(pc[c,...], 0, 2)
            rel_gain.fill(d, cellTable=c)

    gain_map = rel_gain.get()
    gain_map_func = rel_gain.getUsingFunc(inverse=False)

    pks, stds = rel_gain.getRawPeaks()
    return gain_map, pks, stds, gain_map_func

 inp = []
 for i in modules:
    qm = "Q{}M{}".format(i//4+1, i%4+1)
    inp.append((i, offset_g[qm], thresholds_g[qm], noise_g[qm][...,0], pc_g[qm][0,...]))
 start = datetime.now()
 p = partial(relgain_single_module, fbase, runs, sequences,
            peak_estimates, peak_heights, peak_sigma, memory_cells,
            sensor_size, block_size, IL_MODE, profile, limit_trains_eval, rawversion, instrument)
 res_gain = list(map(p, inp)) # don't run concurently as inner function are parelllized
 duration = (datetime.now()-start).total_seconds()
 logger.runtime_summary_entry(success=True, runtime=duration)
 logger.send()
 ```

 %% Cell type:markdown id: tags:

 Finally, we inspect the results, by plotting the number of entries, peak locations and resulting gain maps for each peak. In the course of doing so, we identify bad pixels by either having 0 entries for a peak, or having `nan` values for the peak location.

 %% Cell type:code id: tags:

 ``` python

 gain_m = {}
 flatsff = {}
 gainoff_g = {}
 entries_g = {}
 peaks_g = {}
 mask_g = {}
 cell_to_preview = 4
 masks_eval_peaks = [1, 2]
 global_eval_peaks = [1]
 global_meds = {}

 for i, r in enumerate(res_gain):
    ii = list(modules)[i]
    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
    gain, pks, std, gfunc = r
    gains, errors, chisq, valid, max_dev, stats = gfunc
    _, entries, stds, sow = gain
    gain_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells))
    gain_mdb = np.zeros((gains.shape[0], gains.shape[1], memory_cells))
    entries_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells, 5))
    gainoff_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells))
    mask_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells), np.uint32)

    gainoff_g[qm] = gainoff_db
    gain_m[qm] = gain_mdb
    entries_g[qm] = entries_db
    peaks_g[qm] = pks

    # create a mask for unregular pixels
    # first bit set if first peak has nan entries
    for pk in masks_eval_peaks:
        mask_db[~(np.isfinite(gain_mdb) & np.isfinite(gainoff_db))] |= BadPixels.FF_GAIN_EVAL_ERROR.value
        mask_db[(np.abs(1-gain_mdb/np.nanmedian(gain_mdb) > deviation_threshold) )] |= BadPixels.FF_GAIN_DEVIATION.value
    # second bit set if entries are 0 for first peak
    mask_db[entries[...,1] == 0] |= BadPixels.FF_NO_ENTRIES.value
    zero_oc = np.count_nonzero((entries > 0).astype(np.int), axis=3)
    mask_db[zero_oc <= len(peak_estimates)-2] |= BadPixels.FF_NO_ENTRIES.value
    # third bit set if entries of a given adc show significant noise
    stds = []
    for ii in range(8):
        for jj in range(8):
            stds.append(np.std(entries_db[ii*16:(ii+1)*16,jj*64+2:(jj+1)*64-2,:,1], axis=(0,1)))
    avg_stds = np.median(np.array(stds), axis=0)

    for ii in range(8):
        for jj in range(8):
            std = np.std(entries_db[ii*16:(ii+1)*16,jj*64+2:(jj+1)*64-2,:,1], axis=(0,1))
            if np.any(std > 5*avg_stds):
                mask_db[ii*16:(ii+1)*16,jj*64:(jj+1)*64,std > 5*avg_stds] |= BadPixels.FF_GAIN_DEVIATION


    mask_g[qm] = mask_db

    flat = np.zeros((gains.shape[0], gains.shape[1], memory_cells, 3))
    for j in range(2,len(peak_estimates)):
        flat[...,j-2] = np.mean(entries[...,j]/np.mean(entries[...,j]))
    flat = np.mean(flat, axis=3)
    #flat_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells))
    #for j in range(2):
    #    flat_db[...,j::2] = flat
    flatsff[qm] = flat

    global_meds[qm] = np.nanmedian(pks[...,global_eval_peaks][np.max(mask_db, axis=2) != 0])
 ```

 %% Cell type:markdown id: tags:

 ## Evaluated peak locations ##

 The following plot shows the evaluated peak locations for each peak. Peak ids increase downward:

 %% Cell type:code id: tags:

 ``` python
 from mpl_toolkits.axes_grid1 import AxesGrid
 cell_to_preview = 4
 for qm, data in peaks_g.items():
    print("The module shown is: {}".format(qm))
    print("The cell shown is: {}".format(cell_to_preview))
    print("Evaluated peaks: {}".format(data.shape[-1]))
    fig = plt.figure(figsize=(15,15))
    grid = AxesGrid(fig, 111,
                    nrows_ncols=(data.shape[-1], 1),
                    axes_pad=0.1,
                    share_all=True,
                    label_mode="L",
                    cbar_location="right",
                    cbar_mode="each",
                    cbar_size="7%",
                    cbar_pad="2%")

    for j in range(data.shape[-1]):
        d = data[...,cell_to_preview,j]
        d[~np.isfinite(d)] = 0

        im = grid[j].imshow(d, interpolation="nearest", vmin=0.9*np.nanmedian(d), vmax=max(1.1*np.nanmedian(d), 50))
        cb = grid.cbar_axes[j].colorbar(im)
        cb.set_label_text("Peak location (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ## Gain Slopes And Offsets ##

 The gain slopes and offsets are deduced by fitting a linear function $y = mx + b$ to the evaluated peaks. Gains are normalized to all pixels and all memory cells of a module by using the average peak locations a $x$ values. Thus slopes centered around 1 are to be expected.

 %% Cell type:code id: tags:

 ``` python
 for i, r in enumerate(res_gain):
    ii = list(modules)[i]
    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
    gain, pks, std, gfunc = r
    gains, errors, chisq, valid, max_dev, stats = gfunc
    _, entries, stds, sow = gain

    fig = plt.figure(figsize=(15,8))
    ax = fig.add_subplot(211)
    im = ax.imshow(gains[...,0], interpolation="nearest", vmin=0.85, vmax=1.15)
    cb = fig.colorbar(im)
    cb.set_label("Gain slope m")
    fig.savefig("{}/gain_m_mod{}.png".format(out_folder, qm))

    ax = fig.add_subplot(212)
    ax.hist(gains[...,0].flatten(), range=(0.5, 1.5), bins=100)
    ax.set_ylabel("Occurences")
    ax.set_xlabel("Gain slope m")
    ax.semilogy()
 ```

 %% Cell type:markdown id: tags:

 The gain offsets b are expected to be centered around 0 and minimal, as data is already offset substracted.

 %% Cell type:code id: tags:

 ``` python
 for i, r in enumerate(res_gain):
    ii = list(modules)[i]
    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
    gain, pks, std, gfunc = r
    gains, errors, chisq, valid, max_dev, stats = gfunc
    _, entries, stds, sow = gain

    fig = plt.figure(figsize=(15,8))
    ax = fig.add_subplot(211)

    im = ax.imshow(gains[...,1], interpolation="nearest", vmin=-25, vmax=25)
    cb = fig.colorbar(im)
    cb.set_label("Gain offset b")
    fig.savefig("{}/gain_b_mod{}.png".format(out_folder, qm))

    ax = fig.add_subplot(212)
    ax.hist(gains[...,1].flatten(), range=(-25, 25), bins=100)
    ax.set_ylabel("Occurences")
    ax.set_xlabel("Gain offset b")
    ax.semilogy()
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##

 Bad pixels are defined as any of the following criteria:

 * the gain evaluation did not converge, or location of noise peak deviates by more than than the deviation threshold from the median location.
 * the number of entries for the noise peak in 0.
 * the standard deviation of the number of entries is larger than 5 times the standard deviation for the ASIC the pixel is on.

 %% Cell type:code id: tags:

 ``` python
 print("The deviation threshold is: {}".format(deviation_threshold))
 ```

 %% Cell type:code id: tags:

 ``` python
 for i, r in enumerate(res_gain):
    ii = list(modules)[i]
    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
    mask_db = mask_g[qm]

    fig = plt.figure(figsize=(15,8))
    ax = fig.add_subplot(211)
    im = ax.imshow(np.log2(mask_db[...,cell_to_preview]), interpolation="nearest", vmin=0, vmax=32)
    cb = fig.colorbar(im)
    cb.set_label("Mask value")
    fig.savefig("{}/mask_mod{}.png".format(out_folder, qm))

    ax = fig.add_subplot(212)
    ax.hist(np.log2(mask_db.flatten()), range=(0, 32), bins=32, normed=True)
    ax.set_ylabel("Occurences")
    ax.set_xlabel("Mask value")
    ax.semilogy()
 ```

 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.FF_GAIN_EVAL_ERROR.value: (BadPixels.FF_GAIN_EVAL_ERROR.name, '#FF000080'),
        BadPixels.FF_GAIN_DEVIATION.value: (BadPixels.FF_GAIN_DEVIATION.name, '#0000FF80'),
        BadPixels.FF_NO_ENTRIES.value: (BadPixels.FF_NO_ENTRIES.name, '#00FF0080'),
        BadPixels.FF_GAIN_EVAL_ERROR.value |
        BadPixels.FF_GAIN_DEVIATION.value: ('EVAL+DEV', '#DD00DD80'),
        BadPixels.FF_GAIN_EVAL_ERROR.value |
        BadPixels.FF_NO_ENTRIES.value: ('EVAL+NO_ENTRY', '#00DDDD80'),
        BadPixels.FF_GAIN_DEVIATION.value |
        BadPixels.FF_NO_ENTRIES.value: ('DEV+NO_ENTRY', '#DDDD0080'),
       }

 rebin = 32 if not high_res_badpix_3d else 2

 gain = 0
 for mod, data in mask_g.items():
    plot_badpix_3d(data, cols, title=mod, rebin_fac=rebin)
 ```

 %% Cell type:code id: tags:

 ``` python
 if local_output:
    ofile = "{}/agipd_gain_store_{}_modules_{}.h5".format(out_folder,
                                                          "_".join([str(r) for r in runs]),
                                                          "_".join([str(m) for m in modules]))
    store_file = h5py.File(ofile, "w")
    for i, r in enumerate(res_gain):
        ii = list(modules)[i]
        qm = "Q{}M{}".format(ii//4+1, ii%4+1)
        gain, pks, std, gfunc = r
        gains, errors, chisq, valid, max_dev, stats = gfunc
        gainmap, entires, stds, sow = gain
        store_file["/{}/Gain/0/data".format(qm)] = gains[...,0]
        store_file["/{}/GainOffset/0/data".format(qm)] = gains[...,1]
        store_file["/{}/Flat/0/data".format(qm)] = flatsff[qm]
        store_file["/{}/Entries/0/data".format(qm)] = entires
        store_file["/{}/BadPixels/0/data".format(qm)] = mask_g[qm]
    store_file.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = proposal + ' ' + ' '.join(list(map(str,runs)))
 ```

 %% Cell type:code id: tags:

 ``` python
 if db_output:
    for i, r in enumerate(res_gain):
        ii = list(modules)[i]
        qm = "Q{}M{}".format(ii//4+1, ii%4+1)

        gain, pks, std, gfunc = r
        gains, errors, chisq, valid, max_dev, stats = gfunc
        gainmap, entires, stds, sow = gain

        det = getattr(Detectors, dinstance)
        device = getattr(det, qm)
        # gains related
        metadata = ConstantMetaData()
        cgain = Constants.AGIPD.SlopesFF()
        cgain.data = gains
        metadata.calibration_constant = cgain

        # set the operating condition
        condition = Conditions.Illuminated.AGIPD(memory_cells, bias_voltage, 9.2,
                                                 pixels_x=512, pixels_y=128, beam_energy=None,
                                                 acquisition_rate=acqrate, gain_setting=gain_setting)

        metadata.detector_condition = condition

        # specify the a version for this constant
        if creation_time is None:
            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
        else:
            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)

        metadata.calibration_constant_version.raw_data_location = file_loc
        metadata.send(cal_db_interface, timeout=300000)


        # bad pixels
        metadata = ConstantMetaData()
        bp = Constants.AGIPD.BadPixelsFF()
        bp.data = mask_g[qm]
        metadata.calibration_constant = bp

        # set the operating condition
        condition = Conditions.Illuminated.AGIPD(memory_cells, bias_voltage, 9.2,
                                                 pixels_x=512, pixels_y=128, beam_energy=None,
                                                 acquisition_rate=acqrate, gain_setting=gain_setting)

        metadata.detector_condition = condition

        # specify the a version for this constant
        if creation_time is None:
            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
        else:
            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
        metadata.calibration_constant_version.raw_data_location = file_loc
        metadata.send(cal_db_interface, timeout=300000)
 ```

 %% Cell type:markdown id: tags:

 ## Sanity check ##

 Finally, we perform a correction of the data used to derive the gain constants with said constants. We expected an improvement in peak FWHM, if constants have been deduced correctly.

 %% Cell type:code id: tags:

 ``` python
 def hist_single_module(fbase, runs, sequences, il_mode, profile, limit_trains, memory_cells, rawversion, instrument, inp):
    channel, offset, thresholds, relgain, noise, pc = inp
    gain, pks, std, gfunc = relgain
    gains, errors, chisq, valid, max_dev, stats = gfunc

    import XFELDetAna.xfelpycaltools as xcal
    import numpy as np
    import h5py
    import copy
    from XFELDetAna.util import env
    env.iprofile = profile
    sensor_size = [128, 512]

    block_size = sensor_size


    def baseline_correct_via_noise(d, noise, g, baseline_corr_noise_threshold=1000):
        """ Correct baseline shifts by evaluating position of the noise peak

        :param: d: the data to correct, should be a single image
        :param noise: the mean noise for the cell id of `d`
        :param g: gain array matching `d` array

        Correction is done by identifying the left-most (significant) peak
        in the histogram of `d` and shifting it to be centered on 0.
        This is done via a continous wavelet transform (CWT), using a Ricker
        wavelet.

        Only high gain data is evaulated, and data larger than 50 ADU,
        equivalent of roughly a 9 keV photon is ignored.

        Two passes are executed: the first shift is accurate to 10 ADU and
        will catch baseline shifts smaller then -2000 ADU. CWT is evaluated
        for peaks of widths one, three and five time the noise.
        The baseline is then shifted by the position of the summed CWT maximum.

        In a second pass histogram is evaluated within a range of
        +- 5*noise of the initial shift value, for peak of width noise.

        Baseline shifts larger than the maximum threshold or positive shifts
        larger 10 are discarded, and the original data is returned, otherwise
        the shift corrected data is returned.

        """
        import copy
        from scipy.signal import cwt, ricker
        # we work on a copy to be able to filter values by setting them to
        # np.nan
        dd = copy.copy(d)
        dd[g != 0] = np.nan  # only high gain data
        dd[dd > 50] = np.nan  # only noise data
        h, e = np.histogram(dd, bins=210, range=(-2000, 100))
        c = (e[1:] + e[:-1]) / 2

        try:
            cwtmatr = cwt(h, ricker, [noise, 3. * noise, 5. * noise])
        except:
            return d
        cwtall = np.sum(cwtmatr, axis=0)
        marg = np.argmax(cwtall)
        pc = c[marg]

        high_res_range = (dd > pc - 5 * noise) & (dd < pc + 5 * noise)
        dd[~high_res_range] = np.nan
        h, e = np.histogram(dd, bins=200,
                            range=(pc - 5 * noise, pc + 5 * noise))
        c = (e[1:] + e[:-1]) / 2
        try:
            cwtmatr = cwt(h, ricker, [noise, ])
        except:
            return d
        marg = np.argmax(cwtmatr)
        pc = c[marg]
        # too large shift to be easily decernable via noise
        if pc > 10 or pc < -baseline_corr_noise_threshold:
            return d
        return d - pc

    def read_fun(filename, channel):

        infile = h5py.File(filename, "r", driver="core")
        if rawversion == 2:
            count = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)])
            first = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first".format(instrument, channel)])
            last_index = int(first[count != 0][-1]+count[count != 0][-1])
            first_index = int(first[count != 0][0])
        else:
            status = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)])
            if np.count_nonzero(status != 0) == 0:
                return
            last = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last".format(instrument, channel)])
            last_index = int(last[status != 0][-1])
            first_index = int(last[status != 0][0])

        if limit_trains is not None:
            last_index = min(limit_trains*memory_cells+first_index, last_index)

        im = np.array(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(instrument, channel)][first_index:last_index,...])
        carr = infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId".format(instrument, channel)][first_index:last_index]
        cells = np.squeeze(np.array(carr))
        infile.close()


        if il_mode:
            ga = im[1::2, 0, ...]
            im = im[0::2, 0, ...].astype(np.float32)
        else:
            ga = im[:, 1, ...]
            im = im[:, 0, ...].astype(np.float32)

        im = np.rollaxis(im, 2)
        im = np.rollaxis(im, 2, 1)

        ga = np.rollaxis(ga, 2)
        ga = np.rollaxis(ga, 2, 1)
        return im, ga, cells

    offset_cor = xcal.OffsetCorrection(sensor_size, offset, nCells=memory_cells, blockSize=block_size, gains=[0,1,2])
    offset_cor.debug()

    hist_calc = xcal.HistogramCalculator(sensor_size, bins=2000, range=(0, 2000), blockSize=block_size)
    hist_calc.debug()

    hist_calc_uncorr = xcal.HistogramCalculator(sensor_size, bins=2000, range=(0, 2000), blockSize=block_size)
    hist_calc_uncorr.debug()


    for run in runs:
        for seq in sequences[:2]:

            fname = fbase.format(run, run.upper(), channel, seq)

            d, ga, c = read_fun(fname, channel)

            # we need to do proper gain thresholding
            vidx = (c < offset.shape[2]) & (c < thresholds.shape[2])
            c = c[vidx]
            d = d[:,:,vidx]
            ga = ga[:,:,vidx]

            g = np.zeros(ga.shape, np.uint8)
            g[...] = 2

            g[ga < thresholds[...,c,1]] = 1
            g[ga < thresholds[...,c,0]] = 0
            d = offset_cor.correct(d, cellTable=c, gainMap=g)
            for i in range(d.shape[2]):
                mn_noise = np.nanmean(noise[...,c[i]])
                d[...,i] = baseline_correct_via_noise(d[...,i],
                                                      mn_noise,
                                                      g[..., i])

            d *= np.moveaxis(pc[c,...], 0, 2)

            hist_calc_uncorr.fill(d)
            d = (d-gains[..., 1][...,None])/gains[..., 0][...,None]
            #d = d/gains[..., 0][...,None]
            hist_calc.fill(d)

    h, e, c, _ = hist_calc.get()
    hu  = hist_calc_uncorr.get()
    return h, e, c, hu[0]

 inp = []
 idx = 0
 for i in modules:
    qm = "Q{}M{}".format(i//4+1, i%4+1)

    inp.append((i, offset_g[qm], thresholds_g[qm], res_gain[idx], noise_g[qm][...,0], pc_g[qm][0,...]))
    idx += 1

 p = partial(hist_single_module, fbase, runs, sequences, IL_MODE, profile, limit_trains, memory_cells, rawversion, instrument)
 #res = view.map_sync(p, inp)
 res = list(map(p, inp))
 ```

 %% Cell type:code id: tags:

 ``` python
 from iminuit import Minuit
 from iminuit.util import make_func_code, describe
 from IPython.display import HTML, display
 import tabulate

 # fitting
 par_ests = {}
 par_ests["mu0"] = 0
 par_ests["mu1"] = 50
 par_ests["mu2"] = 100
 par_ests["limit_mu0"] = [-5, 5]
 par_ests["limit_mu1"] = [35, 65]
 par_ests["limit_mu2"] = [100, 150]
 par_ests["s0"] = 10
 par_ests["s1"] = 10
 par_ests["s2"] = 10
 par_ests["limit_A0"] = [1e5, 1e12]
 par_ests["limit_A1"] = [1e4, 1e10]
 par_ests["limit_A2"] = [1e3, 1e10]


 par_ests["throw_nan"] = False
 par_ests["pedantic"] = False
 par_ests["print_level"] = 1

 def gaussian3(x, mu0, s0, A0, mu1, s1, A1, mu2, s2, A2):
    return (A0/np.sqrt(2*np.pi*s0**2)*np.exp(-0.5*((x-mu0)/s0)**2) +
            A1/np.sqrt(2*np.pi*s1**2)*np.exp(-0.5*((x-mu1)/s1)**2) +
            A2/np.sqrt(2*np.pi*s2**2)*np.exp(-0.5*((x-mu2)/s2)**2))


 f_sig = describe(gaussian3)[1:]

 class _Chi2Functor:
    def __init__(self, f, x, y, err):
        self.f = f
        self.x = x
        self.y = y
        self.err = err
        f_sig = describe(f)
        # this is how you fake function
        # signature dynamically
        self.func_code = make_func_code(
            f_sig[1:])  # docking off independent variable
        self.func_defaults = None  # this keeps numpy.vectorize happy

    def __call__(self, *arg):
        # notice that it accept variable length
        # positional arguments
        # chi2 = sum((y-self.f(x,*arg))**2 for x,y in zip(self.x,self.y))
        return np.sum(((self.f(self.x, *arg) - self.y) ** 2) / self.err)


 d = []
 y_range_max = 0
 table = []
 headers = ['Module',
           'FWHM (cor.) [ADU]', 'Separation (cor.) [$\sigma$]',
           'FWHM (uncor.) [ADU]', 'Separation (uncor.) [$\sigma$]',
           'Improvement'
          ]
 for i, r in enumerate(res):
    qm = "Q{}M{}".format(i//4+1, i%4+1)
    row = []
    row.append(qm)

    h, e, c, hu = r


    d.append({
            'x': c,
            'y': h,
            'drawstyle': 'steps-mid',
            'label': '{}: corrected'.format(qm),
            'marker': '.',
            'color': 'blue',
        })

    idx = (h > 0) & np.isfinite(h)
    h_non_zero = h[idx]
    c_non_zero = c[idx]
    par_ests["A0"] = np.float(h[np.argmin(abs(c-0))])
    par_ests["A1"] = np.float(h[np.argmin(abs(c-50))])
    par_ests["A2"] = np.float(h[np.argmin(abs(c-100))])
    wrapped = _Chi2Functor(gaussian3, c_non_zero, h_non_zero,
                                     np.sqrt(h_non_zero))

    m = Minuit(wrapped, **par_ests)
    fmin = m.migrad()

    xt = np.arange(0, 200)

    yt = gaussian3(xt, m.values['mu0'],  m.values['s0'],  m.values['A0'],
                   m.values['mu1'],  m.values['s1'],  m.values['A1'],
                   m.values['mu2'],  m.values['s2'],  m.values['A2'])

    d.append({
            'x': xt,
            'y': yt,
            'label': '{}: corrected (fit)'.format(qm),
            'color': 'green',
            'drawstyle': 'line',
            'linewidth': 2
        })


    d.append({
            'x': c,
            'y': hu,
            'drawstyle': 'steps-mid',
            'label': '{}: uncorrected'.format(qm),
            'marker': '.',
            'color': 'grey',
            'alpha': 0.5
        })

    row += [m.values['s1']*2.35, (m.values['mu1']-m.values['mu0'])/m.values['s1']]


    idx = (hu > 0) & np.isfinite(hu)
    h_non_zero = hu[idx]
    c_non_zero = c[idx]
    wrapped = _Chi2Functor(gaussian3, c_non_zero, h_non_zero,
                                     np.sqrt(h_non_zero))

    m = Minuit(wrapped, **par_ests)
    fmin = m.migrad()

    xt = np.arange(0, 200)

    yt = gaussian3(xt, m.values['mu0'],  m.values['s0'],  m.values['A0'],
                   m.values['mu1'],  m.values['s1'],  m.values['A1'],
                   m.values['mu2'],  m.values['s2'],  m.values['A2'])

    d.append({
            'x': xt,
            'y': yt,
            'label': '{}: uncorrected (fit)'.format(qm),
            'color': 'red',
            'linewidth': 2
        })

    row += [m.values['s1']*2.35, (m.values['mu1']-m.values['mu0'])/m.values['s1']]

    row.append("{:0.2f} %".format(100*(row[3]/row[1]-1)))

    y_range_max = max(y_range_max, np.max(h[c>25])*1.5)

    # output table
    table.append(row)

 fig = xana.simplePlot(d, y_log=False, figsize="2col",
                      aspect=2,
                      x_range=(0, 200),
                      legend='top-right-frame',
                      y_range=(0, y_range_max),
                      x_label='Energy (ADU)',
                      y_label='Counts')

 display(HTML(tabulate.tabulate(table, tablefmt='html', headers=headers,
                               numalign="right", floatfmt="0.2f")))
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:markdown id: tags:

 # Gain Characterization (Flat Fields) #

 The following code characterizes the gain of the AGIPD detector from flat field data, i.e. data with X-rays of defined intensity. This data should fullfil the following requirements:

 * intensity should be such that single photon peaks are visible
 * data for all modules should be present
 * no shadowing should occur on any of the modules
 * each pixel should have at minimum arround 100 events per photon peak per memory cell
 * if central beam edges are visible, they should not be significantly more intense

 Characterization is done by a weighted average algorithm, which evaluates the peak locations for all pixels
 and memory cells of a given module. These locations are then fitted to a linear function of the average peak
 location in each module, such that it yield a relative gain correction.

 %% Cell type:code id: tags:

 ``` python
 # the following lines should be adjusted depending on data
 in_folder = '/gpfs/exfel/exp/MID/201931/p900091/raw/' # path to input data, required
 modules = [3] # modules to work on, required, range allowed
 out_folder = "/gpfs/exfel/exp/MID/201931/p900091/usr/FF/2.2" # path to output to, required
 runs = [484, 485,] # runs to use, required, range allowed
 sequences = [0,1,2,3]#,4,5,6,7,8] #,5,6,7,8,9,10] # sequences files to use, range allowed
 cluster_profile = "noDB" # The ipcluster profile to use
 local_output = True # output constants locally
 db_output = False # output constants to database
 bias_voltage = 300 # detector bias voltage
 cal_db_interface = "tcp://max-exfl016:8026#8035"  # the database interface to use
 mem_cells = 0  # number of memory cells used
 interlaced = False # assume interlaced data format, for data prior to Dec. 2017
 fit_hook = True # fit a hook function to medium gain slope
 rawversion = 2 # RAW file format version
 instrument = "MID"
 photon_energy = 9.2 # the photon energy in keV
 offset_store = "" # for file-baed access
 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h
 db_input = True # retreive data from calibration database, setting offset-store will overwrite this
 deviation_threshold = 0.75 # peaks with an absolute location deviation larger than the medium are are considere bad
 acqrate = 0. # acquisition rate
 use_dir_creation_date = True
 creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC.ms e.g. 2019-07-04 11:02:41.00
 gain_setting = 0.1 # gain setting can have value 0 or 1, Default=0.1 for no (None) gain-setting
 karabo_da_control = "AGIPD1MCTRL00" # karabo DA for control infromation
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information
 ```

 %% Cell type:code id: tags:

 ``` python
 # std library imports
 from datetime import datetime
 import dateutil
 from functools import partial
 import warnings
 warnings.filterwarnings('ignore')
 import os

 import h5py
 # numpy and matplot lib specific
 import numpy as np
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
 %matplotlib inline

 # parallel processing via ipcluster
 # make sure a cluster is running with ipcluster start --n=32, give it a while to start
 from ipyparallel import Client

 client = Client(profile=cluster_profile)
 view = client[:]
 view.use_dill()

 # pyDetLib imports
 import XFELDetAna.xfelpycaltools as xcal
 import XFELDetAna.xfelpyanatools as xana
 from XFELDetAna.util import env
 env.iprofile = cluster_profile
 profile = cluster_profile

 from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions
 from cal_tools.agipdlib import get_num_cells, get_acq_rate, get_gain_setting
 from cal_tools.enums import BadPixels
 from cal_tools.influx import InfluxLogger
 from cal_tools.plotting import show_overview, plot_badpix_3d
 from cal_tools.tools import gain_map_files, parse_runs, run_prop_seq_from_path, get_notebook_name, get_dir_creation_date, get_random_db_interface
 ```

 %% Cell type:code id: tags:

 ``` python
 # usually no need to change these lines
 sensor_size = [128, 512]
 block_size = [128, 512]
 QUADRANTS = 4
 MODULES_PER_QUAD = 4
 DET_FILE_INSET = "AGIPD"

 # Define constant creation time.
 if creation_time:
    try:
        creation_time = datetime.strptime(creation_time, '%Y-%m-%d %H:%M:%S.%f')
    except Exception as e:
        print(f"creation_time value error: {e}."
               "Use same format as YYYY-MM-DD HR:MN:SC.ms e.g. 2019-07-04 11:02:41.00/n")
        creation_time = None
        print("Given creation time wont be used.")
 else:
    creation_time = None

 if not creation_time and use_dir_creation_date:
    ntries = 100
    while ntries > 0:
        try:
            creation_time = get_dir_creation_date(in_folder, runs[0])
            break
        except OSError:
            pass
        ntries -= 1

 print("Using {} as creation time".format(creation_time))

 runs = parse_runs(runs)

 if offset_store != "":
    db_input = False
 else:
    db_input = True

 limit_trains = 20
 limit_trains_eval = None

 print("Parameters are:")

 print("Runs: {}".format(runs))
 print("Modules: {}".format(modules))
 print("Sequences: {}".format(sequences))
 print("Using DB: {}".format(db_output))


 if instrument == "SPB":
    loc = "SPB_DET_AGIPD1M-1"
    dinstance = "AGIPD1M1"
    karabo_id_control = "SPB_IRU_AGIPD1M1"
 else:
    loc = "MID_DET_AGIPD1M-1"
    dinstance = "AGIPD1M2"
    karabo_id_control = "MID_EXP_AGIPD1M1"

 cal_db_interface = get_random_db_interface(cal_db_interface)

 # these lines can usually stay as is
 fbase = "{}/{{}}/RAW-{{}}-AGIPD{{:02d}}-S{{:05d}}.h5".format(in_folder)
 gains = np.arange(3)


 run, prop, seq = run_prop_seq_from_path(in_folder)
 logger = InfluxLogger(detector="AGIPD", instrument=instrument, mem_cells=mem_cells,
                      notebook=get_notebook_name(), proposal=prop)

 # extract the memory cells and acquisition rate from
 # the file of the first module, first sequence and first run
 channel = 0
 fname = fbase.format(runs[0], runs[0].upper(), channel, sequences[0])
 if acqrate == 0.:
-    acqrate = get_acq_rate(fname, loc, channel)
+    acqrate = get_acq_rate((fname, loc, channel))


 if mem_cells == 0:
    cells = get_num_cells(fname, loc, channel)
    mem_cells = cells  # avoid setting twice


 IL_MODE = interlaced
 max_cells = mem_cells if not interlaced else mem_cells*2
 memory_cells = mem_cells
 print("Interlaced mode: {}".format(IL_MODE))

 cells = np.arange(max_cells)

 print(f"Acquisition rate and memory cells set from file {fname} are "
      f"{acqrate} MHz and {max_cells}, respectively")
 ```

 %% Cell type:code id: tags:

 ``` python
 control_fname = f'{in_folder}/{runs[0]}/RAW-{runs[0].upper()}-{karabo_da_control}-S00000.h5'

 if "{" in h5path_ctrl:
    h5path_ctrl = h5path_ctrl.format(karabo_id_control)

 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < dateutil.parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
        except Exception as e:
            print(f'Error while reading gain setting from: \n{control_fname}')
            print(e)
            print("Gain setting is not found in the control information")
            print("Data will not be processed")
            sequences = []
 print(f"Gain setting: {gain_setting}")
 ```

 %% Cell type:markdown id: tags:

 For the characterization offset maps for each module are needed. The are read in either locally or through the XFEL calibration database.

 %% Cell type:code id: tags:

 ``` python
 from dateutil import parser
 offset_g = {}
 noise_g = {}
 thresholds_g = {}
 pc_g = {}
 if not db_input:
    print("Offset, noise and thresholds have been read in from: {}".format(offset_store))
    store_file = h5py.File(offset_store, "r")
    for i in modules:
        qm = "Q{}M{}".format(i//4+1, i%4+1)
        offset_g[qm] = np.array(store_file["{}/Offset/0/data".format(qm)])
        noise_g[qm] = np.array(store_file["{}/Noise/0/data".format(qm)])
        thresholds_g[qm] = np.array(store_file["{}/Threshold/0/data".format(qm)])
    store_file.close()
 else:
    print("Offset, noise and thresholds have been read in from calibration database:")
    first_ex = True
    for i in modules:
        qm = "Q{}M{}".format(i//4+1, i%4+1)
        metadata = ConstantMetaData()
        offset = Constants.AGIPD.Offset()
        metadata.calibration_constant = offset
        det = getattr(Detectors, dinstance)

        # set the operating condition
        condition = Conditions.Dark.AGIPD(memory_cells=max_cells, bias_voltage=bias_voltage,
                                          acquisition_rate=acqrate, gain_setting=gain_setting)
        metadata.detector_condition = condition

        # specify the a version for this constant
        if creation_time is None:
            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
        else:
            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
        metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=300000)
        offset_g[qm] = offset.data
        if first_ex:
            print("Offset map was injected on: {}".format(metadata.calibration_constant_version.begin_at))

        metadata = ConstantMetaData()
        noise = Constants.AGIPD.Noise()
        metadata.calibration_constant = noise

        # set the operating condition
        condition = Conditions.Dark.AGIPD(memory_cells=max_cells, bias_voltage=bias_voltage,
                                          acquisition_rate=acqrate, gain_setting=gain_setting)
        metadata.detector_condition = condition

        # specify the a version for this constant
        if creation_time is None:
            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
        else:
            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
        metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=300000)
        noise_g[qm] = noise.data
        if first_ex:
            print("Noise map was injected on: {}".format(metadata.calibration_constant_version.begin_at))

        metadata = ConstantMetaData()
        thresholds = Constants.AGIPD.ThresholdsDark()
        metadata.calibration_constant = thresholds

        # set the operating condition
        condition = Conditions.Dark.AGIPD(memory_cells=max_cells, bias_voltage=bias_voltage,
                                          acquisition_rate=acqrate, gain_setting=gain_setting)
        metadata.detector_condition = condition

        # specify the a version for this constant
        if creation_time is None:
            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
        else:
            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
        metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=300000)
        thresholds_g[qm] = thresholds.data
        if first_ex:
            print("Threshold map was injected on: {}".format(metadata.calibration_constant_version.begin_at))


        metadata = ConstantMetaData()
        pc = Constants.AGIPD.SlopesPC()
        metadata.calibration_constant = pc

        # set the operating condition
        condition = Conditions.Dark.AGIPD(memory_cells=max_cells, bias_voltage=bias_voltage,
                                          acquisition_rate=acqrate, gain_setting=gain_setting)
        metadata.detector_condition = condition

        # specify the a version for this constant
        if creation_time is None:
            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
        else:
            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
        metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=300000)
        pc_g[qm] = np.nanmedian(pc.data[0,...])/pc.data
        if first_ex:
            print("PC map was injected on: {}".format(metadata.calibration_constant_version.begin_at))
            first_ex = False

 ```

 %% Cell type:markdown id: tags:

 ## Initial peak estimates ##

 First initial peak estimates need to be defined. This is done by inspecting histograms created from (a subset of) the offset-corrected data for peak locations and peak heights.

 %% Cell type:code id: tags:

 ``` python
 def hist_single_module(fbase, runs, sequences, sensor_size, memory_cells, block_size,
                       il_mode, limit_trains, profile, rawversion, instrument, inp):
    """ This function calculates a per-pixel histogram for a single module

    Runs and sequences give the data to calculate histogram from
    """
    channel, offset, thresholds, pc, noise = inp

    import XFELDetAna.xfelpycaltools as xcal
    import numpy as np
    import h5py
    from XFELDetAna.util import env


    env.iprofile = profile

    def baseline_correct_via_noise(d, noise, g, baseline_corr_noise_threshold=1000):
        """ Correct baseline shifts by evaluating position of the noise peak

        :param: d: the data to correct, should be a single image
        :param noise: the mean noise for the cell id of `d`
        :param g: gain array matching `d` array

        Correction is done by identifying the left-most (significant) peak
        in the histogram of `d` and shifting it to be centered on 0.
        This is done via a continous wavelet transform (CWT), using a Ricker
        wavelet.

        Only high gain data is evaulated, and data larger than 50 ADU,
        equivalent of roughly a 9 keV photon is ignored.

        Two passes are executed: the first shift is accurate to 10 ADU and
        will catch baseline shifts smaller then -2000 ADU. CWT is evaluated
        for peaks of widths one, three and five time the noise.
        The baseline is then shifted by the position of the summed CWT maximum.

        In a second pass histogram is evaluated within a range of
        +- 5*noise of the initial shift value, for peak of width noise.

        Baseline shifts larger than the maximum threshold or positive shifts
        larger 10 are discarded, and the original data is returned, otherwise
        the shift corrected data is returned.

        """
        import copy
        from scipy.signal import cwt, ricker
        # we work on a copy to be able to filter values by setting them to
        # np.nan
        dd = copy.copy(d)
        dd[g != 0] = np.nan  # only high gain data
        dd[dd > 50] = np.nan  # only noise data
        h, e = np.histogram(dd, bins=210, range=(-2000, 100))
        c = (e[1:] + e[:-1]) / 2

        try:
            cwtmatr = cwt(h, ricker, [noise, 3. * noise, 5. * noise])
        except:
            return d
        cwtall = np.sum(cwtmatr, axis=0)
        marg = np.argmax(cwtall)
        pc = c[marg]

        high_res_range = (dd > pc - 5 * noise) & (dd < pc + 5 * noise)
        dd[~high_res_range] = np.nan
        h, e = np.histogram(dd, bins=200,
                            range=(pc - 5 * noise, pc + 5 * noise))
        c = (e[1:] + e[:-1]) / 2
        try:
            cwtmatr = cwt(h, ricker, [noise, ])
        except:
            return d
        marg = np.argmax(cwtmatr)
        pc = c[marg]
        # too large shift to be easily decernable via noise
        if pc > 10 or pc < -baseline_corr_noise_threshold:
            return d
        return d - pc


    # function needs to be inline for parallell processing
    def read_fun(filename, channel):
        """ A reader function used by pyDetLib
        """
        infile = h5py.File(filename, "r", driver="core")
        if rawversion == 2:
            count = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)])
            first = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first".format(instrument, channel)])
            last_index = int(first[count != 0][-1]+count[count != 0][-1])
            first_index = int(first[count != 0][0])
        else:
            status = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)])
            if np.count_nonzero(status != 0) == 0:
                return
            last = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last".format(instrument, channel)])
            last_index = int(last[status != 0][-1])
            first_index = int(last[status != 0][0])
        if limit_trains is not None:
            last_index = min(limit_trains*memory_cells+first_index, last_index)

        im = np.array(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(instrument, channel)][first_index:last_index,...])
        carr = infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId".format(instrument, channel)][first_index:last_index]
        cells = np.squeeze(np.array(carr))
        infile.close()

        if il_mode:
            ga = im[1::2, 0, ...]
            im = im[0::2, 0, ...].astype(np.float32)
        else:
            ga = im[:, 1, ...]
            im = im[:, 0, ...].astype(np.float32)

        im = np.rollaxis(im, 2)
        im = np.rollaxis(im, 2, 1)

        ga = np.rollaxis(ga, 2)
        ga = np.rollaxis(ga, 2, 1)
        return im, ga, cells

    offset_cor = xcal.OffsetCorrection(sensor_size,
                                       offset,
                                       nCells=memory_cells,
                                       blockSize=block_size,
                                       gains=[0,1,2])
    offset_cor.mapper = offset_cor._view.map_sync
    offset_cor.debug() # force non-parallel processing since outer function will run concurrently
    hist_calc = xcal.HistogramCalculator(sensor_size,
                                         bins=4000,
                                         range=(-4000, 8000),
                                         blockSize=block_size)
    hist_calc.mapper = hist_calc._view.map_sync
    hist_calc.debug() # force non-parallel processing since outer function will run concurrently
    for run in runs:
        for seq in sequences:
            fname = fbase.format(run, run.upper(), channel, seq)
            d, ga, c = read_fun(fname, channel)
            vidx = (c < offset.shape[2]) & (c < thresholds.shape[2])
            c = c[vidx]
            d = d[:,:,vidx]
            ga = ga[:,:,vidx]
            # we need to do proper gain thresholding
            g = np.zeros(ga.shape, np.uint8)
            g[...] = 2

            g[ga < thresholds[...,c,1]] = 1
            g[ga < thresholds[...,c,0]] = 0
            d = offset_cor.correct(d, cellTable=c, gainMap=g)
            for i in range(d.shape[2]):
                mn_noise = np.nanmean(noise[...,c[i]])
                d[...,i] = baseline_correct_via_noise(d[...,i],
                                                      mn_noise,
                                                      g[..., i])

            d *= np.moveaxis(pc[c,...], 0, 2)

            hist_calc.fill(d)
    h, e, c, _ = hist_calc.get()
    return h, e, c

 inp = []
 for i in modules:
    qm = "Q{}M{}".format(i//4+1, i%4+1)
    inp.append((i, offset_g[qm], thresholds_g[qm], pc_g[qm][0,...], noise_g[qm][...,0]))

 p = partial(hist_single_module, fbase, runs, sequences,
            sensor_size, memory_cells, block_size, IL_MODE, limit_trains, profile, rawversion, instrument)
 res_uncorr = list(map(p, inp))
 ```

 %% Cell type:code id: tags:

 ``` python
 d = []
 qms = []
 for i, r in enumerate(res_uncorr):
    ii = list(modules)[i]
    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
    qms.append(qm)
    h, e, c = r
    d.append({
            'x': c,
            'y': h,
            'drawstyle': 'steps-mid'
        })

 fig = xana.simplePlot(d, y_log=False,
                      figsize="2col",
                      aspect=2,
                      x_range=(-50, 500),
                      x_label="Intensity (ADU)",
                      y_label="Counts")

 fig.savefig("{}/FF_module_{}_peak_pos.png".format(out_folder, ",".join(qms)))
 ```

 %% Cell type:code id: tags:

 ``` python
 # these should be quite stable

 peak_estimates = [0, 55, 105, 155]
 peak_heights = [5e7, 5e6, 1e6, 5e5]
 peak_sigma = [5., 5.,5., 5.,]

 print("Using the following peak estimates: {}".format(peak_estimates))
 ```

 %% Cell type:markdown id: tags:

 ## Calculate relative gain per module ##

 Using the so obtained estimates, we calculate the relative gain per module. For this we use the weighted average method implemented in pyDetLib.

 %% Cell type:code id: tags:

 ``` python
 block_size = [64, 64]
 def relgain_single_module(fbase, runs, sequences, peak_estimates,
                          peak_heights, peak_sigma, memory_cells, sensor_size,
                          block_size, il_mode, profile, limit_trains_eval, rawversion, instrument, inp):
    """ A function for calculated the relative gain of a single AGIPD module
    """

    # import needed inline for parallel processing
    import XFELDetAna.xfelpycaltools as xcal
    import numpy as np
    import h5py
    from XFELDetAna.util import env
    env.iprofile = profile

    channel, offset, thresholds, noise, pc = inp

    def baseline_correct_via_noise(d, noise, g, baseline_corr_noise_threshold=1000):
        """ Correct baseline shifts by evaluating position of the noise peak

        :param: d: the data to correct, should be a single image
        :param noise: the mean noise for the cell id of `d`
        :param g: gain array matching `d` array

        Correction is done by identifying the left-most (significant) peak
        in the histogram of `d` and shifting it to be centered on 0.
        This is done via a continous wavelet transform (CWT), using a Ricker
        wavelet.

        Only high gain data is evaulated, and data larger than 50 ADU,
        equivalent of roughly a 9 keV photon is ignored.

        Two passes are executed: the first shift is accurate to 10 ADU and
        will catch baseline shifts smaller then -2000 ADU. CWT is evaluated
        for peaks of widths one, three and five time the noise.
        The baseline is then shifted by the position of the summed CWT maximum.

        In a second pass histogram is evaluated within a range of
        +- 5*noise of the initial shift value, for peak of width noise.

        Baseline shifts larger than the maximum threshold or positive shifts
        larger 10 are discarded, and the original data is returned, otherwise
        the shift corrected data is returned.

        """
        import copy
        from scipy.signal import cwt, ricker
        # we work on a copy to be able to filter values by setting them to
        # np.nan
        dd = copy.copy(d)
        dd[g != 0] = np.nan  # only high gain data
        dd[dd > 50] = np.nan  # only noise data
        h, e = np.histogram(dd, bins=210, range=(-2000, 100))
        c = (e[1:] + e[:-1]) / 2

        try:
            cwtmatr = cwt(h, ricker, [noise, 3. * noise, 5. * noise])
        except:
            return d
        cwtall = np.sum(cwtmatr, axis=0)
        marg = np.argmax(cwtall)
        pc = c[marg]

        high_res_range = (dd > pc - 5 * noise) & (dd < pc + 5 * noise)
        dd[~high_res_range] = np.nan
        h, e = np.histogram(dd, bins=200,
                            range=(pc - 5 * noise, pc + 5 * noise))
        c = (e[1:] + e[:-1]) / 2
        try:
            cwtmatr = cwt(h, ricker, [noise, ])
        except:
            return d
        marg = np.argmax(cwtmatr)
        pc = c[marg]
        # too large shift to be easily decernable via noise
        if pc > 10 or pc < -baseline_corr_noise_threshold:
            return d
        return d - pc


    # function needs to be inline for parallell processing
    def read_fun(filename, channel):
        """ A reader function used by pyDetLib
        """
        infile = h5py.File(filename, "r", driver="core")
        if rawversion == 2:
            count = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)])
            first = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first".format(instrument, channel)])
            last_index = int(first[count != 0][-1]+count[count != 0][-1])
            first_index = int(first[count != 0][0])
        else:
            status = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)])
            if np.count_nonzero(status != 0) == 0:
                return
            last = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last".format(instrument, channel)])
            last_index = int(last[status != 0][-1])
            first_index = int(last[status != 0][0])
        if limit_trains is not None:
            last_index = min(limit_trains*memory_cells+first_index, last_index)
        im = np.array(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(instrument, channel)][first_index:last_index,...])
        carr = infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId".format(instrument, channel)][first_index:last_index]
        cells = np.squeeze(np.array(carr))
        infile.close()

        if il_mode:
            ga = im[1::2, 0, ...]
            im = im[0::2, 0, ...].astype(np.float32)
        else:
            ga = im[:, 1, ...]
            im = im[:, 0, ...].astype(np.float32)

        im = np.rollaxis(im, 2)
        im = np.rollaxis(im, 2, 1)

        ga = np.rollaxis(ga, 2)
        ga = np.rollaxis(ga, 2, 1)
        return im, ga, cells

    offset_cor = xcal.OffsetCorrection(sensor_size, offset, nCells=memory_cells,
                                       blockSize=block_size, gains=[0,1,2])
    offset_cor.mapper = offset_cor._view.map_sync

    rel_gain = xcal.GainMapCalculator(sensor_size,
                                      peak_estimates,
                                      peak_sigma,
                                      nCells=memory_cells,
                                      peakHeights = peak_heights,
                                      noiseMap=noise,
                                      deviationType="relative")
    rel_gain.mapper = rel_gain._view.map_sync
    for run in runs:
        for seq in sequences:
            fname = fbase.format(run, run.upper(), channel, seq)
            d, ga, c = read_fun(fname, channel)
            # we need to do proper gain thresholding
            vidx = (c < offset.shape[2]) & (c < thresholds.shape[2])
            c = c[vidx]
            d = d[:,:,vidx]
            ga = ga[:,:,vidx]
            # we need to do proper gain thresholding
            g = np.zeros(ga.shape, np.uint8)
            g[...] = 2

            g[ga < thresholds[...,c,1]] = 1
            g[ga < thresholds[...,c,0]] = 0
            d = offset_cor.correct(d, cellTable=c, gainMap=g)

            for i in range(d.shape[2]):
                mn_noise = np.nanmean(noise[...,c[i]])
                d[...,i] = baseline_correct_via_noise(d[...,i],
                                                      mn_noise,
                                                      g[..., i])

            d *= np.moveaxis(pc[c,...], 0, 2)
            rel_gain.fill(d, cellTable=c)

    gain_map = rel_gain.get()
    gain_map_func = rel_gain.getUsingFunc(inverse=False)

    pks, stds = rel_gain.getRawPeaks()
    return gain_map, pks, stds, gain_map_func

 inp = []
 for i in modules:
    qm = "Q{}M{}".format(i//4+1, i%4+1)
    inp.append((i, offset_g[qm], thresholds_g[qm], noise_g[qm][...,0], pc_g[qm][0,...]))
 start = datetime.now()
 p = partial(relgain_single_module, fbase, runs, sequences,
            peak_estimates, peak_heights, peak_sigma, memory_cells,
            sensor_size, block_size, IL_MODE, profile, limit_trains_eval, rawversion, instrument)
 res_gain = list(map(p, inp)) # don't run concurently as inner function are parelllized
 duration = (datetime.now()-start).total_seconds()
 logger.runtime_summary_entry(success=True, runtime=duration)
 logger.send()
 ```

 %% Cell type:markdown id: tags:

 Finally, we inspect the results, by plotting the number of entries, peak locations and resulting gain maps for each peak. In the course of doing so, we identify bad pixels by either having 0 entries for a peak, or having `nan` values for the peak location.

 %% Cell type:code id: tags:

 ``` python

 gain_m = {}
 flatsff = {}
 gainoff_g = {}
 entries_g = {}
 peaks_g = {}
 mask_g = {}
 cell_to_preview = 4
 masks_eval_peaks = [1, 2]
 global_eval_peaks = [1]
 global_meds = {}

 for i, r in enumerate(res_gain):
    ii = list(modules)[i]
    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
    gain, pks, std, gfunc = r
    gains, errors, chisq, valid, max_dev, stats = gfunc
    _, entries, stds, sow = gain
    gain_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells))
    gain_mdb = np.zeros((gains.shape[0], gains.shape[1], memory_cells))
    entries_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells, 5))
    gainoff_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells))
    mask_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells), np.uint32)

    gainoff_g[qm] = gainoff_db
    gain_m[qm] = gain_mdb
    entries_g[qm] = entries_db
    peaks_g[qm] = pks

    # create a mask for unregular pixels
    # first bit set if first peak has nan entries
    for pk in masks_eval_peaks:
        mask_db[~(np.isfinite(gain_mdb) & np.isfinite(gainoff_db))] |= BadPixels.FF_GAIN_EVAL_ERROR.value
        mask_db[(np.abs(1-gain_mdb/np.nanmedian(gain_mdb) > deviation_threshold) )] |= BadPixels.FF_GAIN_DEVIATION.value
    # second bit set if entries are 0 for first peak
    mask_db[entries[...,1] == 0] |= BadPixels.FF_NO_ENTRIES.value
    zero_oc = np.count_nonzero((entries > 0).astype(np.int), axis=3)
    mask_db[zero_oc <= len(peak_estimates)-2] |= BadPixels.FF_NO_ENTRIES.value
    # third bit set if entries of a given adc show significant noise
    stds = []
    for ii in range(8):
        for jj in range(8):
            stds.append(np.std(entries_db[ii*16:(ii+1)*16,jj*64+2:(jj+1)*64-2,:,1], axis=(0,1)))
    avg_stds = np.median(np.array(stds), axis=0)

    for ii in range(8):
        for jj in range(8):
            std = np.std(entries_db[ii*16:(ii+1)*16,jj*64+2:(jj+1)*64-2,:,1], axis=(0,1))
            if np.any(std > 5*avg_stds):
                mask_db[ii*16:(ii+1)*16,jj*64:(jj+1)*64,std > 5*avg_stds] |= BadPixels.FF_GAIN_DEVIATION


    mask_g[qm] = mask_db

    flat = np.zeros((gains.shape[0], gains.shape[1], memory_cells, 3))
    for j in range(2,len(peak_estimates)):
        flat[...,j-2] = np.mean(entries[...,j]/np.mean(entries[...,j]))
    flat = np.mean(flat, axis=3)
    #flat_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells))
    #for j in range(2):
    #    flat_db[...,j::2] = flat
    flatsff[qm] = flat

    global_meds[qm] = np.nanmedian(pks[...,global_eval_peaks][np.max(mask_db, axis=2) != 0])
 ```

 %% Cell type:markdown id: tags:

 ## Evaluated peak locations ##

 The following plot shows the evaluated peak locations for each peak. Peak ids increase downward:

 %% Cell type:code id: tags:

 ``` python
 from mpl_toolkits.axes_grid1 import AxesGrid
 cell_to_preview = 4
 for qm, data in peaks_g.items():
    print("The module shown is: {}".format(qm))
    print("The cell shown is: {}".format(cell_to_preview))
    print("Evaluated peaks: {}".format(data.shape[-1]))
    fig = plt.figure(figsize=(15,15))
    grid = AxesGrid(fig, 111,
                    nrows_ncols=(data.shape[-1], 1),
                    axes_pad=0.1,
                    share_all=True,
                    label_mode="L",
                    cbar_location="right",
                    cbar_mode="each",
                    cbar_size="7%",
                    cbar_pad="2%")

    for j in range(data.shape[-1]):
        d = data[...,cell_to_preview,j]
        d[~np.isfinite(d)] = 0

        im = grid[j].imshow(d, interpolation="nearest", vmin=0.9*np.nanmedian(d), vmax=max(1.1*np.nanmedian(d), 50))
        cb = grid.cbar_axes[j].colorbar(im)
        cb.set_label_text("Peak location (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ## Gain Slopes And Offsets ##

 The gain slopes and offsets are deduced by fitting a linear function $y = mx + b$ to the evaluated peaks. Gains are normalized to all pixels and all memory cells of a module by using the average peak locations a $x$ values. Thus slopes centered around 1 are to be expected.

 %% Cell type:code id: tags:

 ``` python
 for i, r in enumerate(res_gain):
    ii = list(modules)[i]
    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
    gain, pks, std, gfunc = r
    gains, errors, chisq, valid, max_dev, stats = gfunc
    _, entries, stds, sow = gain

    fig = plt.figure(figsize=(15,8))
    ax = fig.add_subplot(211)
    im = ax.imshow(gains[...,0], interpolation="nearest", vmin=0.85, vmax=1.15)
    cb = fig.colorbar(im)
    cb.set_label("Gain slope m")
    fig.savefig("{}/gain_m_mod{}.png".format(out_folder, qm))

    ax = fig.add_subplot(212)
    ax.hist(gains[...,0].flatten(), range=(0.5, 1.5), bins=100)
    ax.set_ylabel("Occurences")
    ax.set_xlabel("Gain slope m")
    ax.semilogy()
 ```

 %% Cell type:markdown id: tags:

 The gain offsets b are expected to be centered around 0 and minimal, as data is already offset substracted.

 %% Cell type:code id: tags:

 ``` python
 for i, r in enumerate(res_gain):
    ii = list(modules)[i]
    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
    gain, pks, std, gfunc = r
    gains, errors, chisq, valid, max_dev, stats = gfunc
    _, entries, stds, sow = gain

    fig = plt.figure(figsize=(15,8))
    ax = fig.add_subplot(211)

    im = ax.imshow(gains[...,1], interpolation="nearest", vmin=-25, vmax=25)
    cb = fig.colorbar(im)
    cb.set_label("Gain offset b")
    fig.savefig("{}/gain_b_mod{}.png".format(out_folder, qm))

    ax = fig.add_subplot(212)
    ax.hist(gains[...,1].flatten(), range=(-25, 25), bins=100)
    ax.set_ylabel("Occurences")
    ax.set_xlabel("Gain offset b")
    ax.semilogy()
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##

 Bad pixels are defined as any of the following criteria:

 * the gain evaluation did not converge, or location of noise peak deviates by more than than the deviation threshold from the median location.
 * the number of entries for the noise peak in 0.
 * the standard deviation of the number of entries is larger than 5 times the standard deviation for the ASIC the pixel is on.

 %% Cell type:code id: tags:

 ``` python
 print("The deviation threshold is: {}".format(deviation_threshold))
 ```

 %% Cell type:code id: tags:

 ``` python
 for i, r in enumerate(res_gain):
    ii = list(modules)[i]
    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
    mask_db = mask_g[qm]

    fig = plt.figure(figsize=(15,8))
    ax = fig.add_subplot(211)
    im = ax.imshow(np.log2(mask_db[...,cell_to_preview]), interpolation="nearest", vmin=0, vmax=32)
    cb = fig.colorbar(im)
    cb.set_label("Mask value")
    fig.savefig("{}/mask_mod{}.png".format(out_folder, qm))

    ax = fig.add_subplot(212)
    ax.hist(np.log2(mask_db.flatten()), range=(0, 32), bins=32, normed=True)
    ax.set_ylabel("Occurences")
    ax.set_xlabel("Mask value")
    ax.semilogy()
 ```

 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.FF_GAIN_EVAL_ERROR.value: (BadPixels.FF_GAIN_EVAL_ERROR.name, '#FF000080'),
        BadPixels.FF_GAIN_DEVIATION.value: (BadPixels.FF_GAIN_DEVIATION.name, '#0000FF80'),
        BadPixels.FF_NO_ENTRIES.value: (BadPixels.FF_NO_ENTRIES.name, '#00FF0080'),
        BadPixels.FF_GAIN_EVAL_ERROR.value |
        BadPixels.FF_GAIN_DEVIATION.value: ('EVAL+DEV', '#DD00DD80'),
        BadPixels.FF_GAIN_EVAL_ERROR.value |
        BadPixels.FF_NO_ENTRIES.value: ('EVAL+NO_ENTRY', '#00DDDD80'),
        BadPixels.FF_GAIN_DEVIATION.value |
        BadPixels.FF_NO_ENTRIES.value: ('DEV+NO_ENTRY', '#DDDD0080'),
       }

 rebin = 32 if not high_res_badpix_3d else 2

 gain = 0
 for mod, data in mask_g.items():
    plot_badpix_3d(data, cols, title=mod, rebin_fac=rebin)
 ```

 %% Cell type:code id: tags:

 ``` python
 if local_output:
    ofile = "{}/agipd_gain_store_{}_modules_{}.h5".format(out_folder,
                                                          "_".join([str(r) for r in runs]),
                                                          "_".join([str(m) for m in modules]))
    store_file = h5py.File(ofile, "w")
    for i, r in enumerate(res_gain):
        ii = list(modules)[i]
        qm = "Q{}M{}".format(ii//4+1, ii%4+1)
        gain, pks, std, gfunc = r
        gains, errors, chisq, valid, max_dev, stats = gfunc
        gainmap, entires, stds, sow = gain
        store_file["/{}/Gain/0/data".format(qm)] = gains[...,0]
        store_file["/{}/GainOffset/0/data".format(qm)] = gains[...,1]
        store_file["/{}/Flat/0/data".format(qm)] = flatsff[qm]
        store_file["/{}/Entries/0/data".format(qm)] = entires
        store_file["/{}/BadPixels/0/data".format(qm)] = mask_g[qm]
    store_file.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = proposal + ' ' + ' '.join(list(map(str,runs)))
 ```

 %% Cell type:code id: tags:

 ``` python
 if db_output:
    for i, r in enumerate(res_gain):
        ii = list(modules)[i]
        qm = "Q{}M{}".format(ii//4+1, ii%4+1)

        gain, pks, std, gfunc = r
        gains, errors, chisq, valid, max_dev, stats = gfunc
        gainmap, entires, stds, sow = gain

        det = getattr(Detectors, dinstance)
        device = getattr(det, qm)
        # gains related
        metadata = ConstantMetaData()
        cgain = Constants.AGIPD.SlopesFF()
        cgain.data = gains
        metadata.calibration_constant = cgain

        # set the operating condition
        condition = Conditions.Illuminated.AGIPD(memory_cells, bias_voltage, 9.2,
                                                 pixels_x=512, pixels_y=128, beam_energy=None,
                                                 acquisition_rate=acqrate, gain_setting=gain_setting)

        metadata.detector_condition = condition

        # specify the a version for this constant
        if creation_time is None:
            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
        else:
            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)

        metadata.calibration_constant_version.raw_data_location = file_loc
        metadata.send(cal_db_interface, timeout=300000)


        # bad pixels
        metadata = ConstantMetaData()
        bp = Constants.AGIPD.BadPixelsFF()
        bp.data = mask_g[qm]
        metadata.calibration_constant = bp

        # set the operating condition
        condition = Conditions.Illuminated.AGIPD(memory_cells, bias_voltage, 9.2,
                                                 pixels_x=512, pixels_y=128, beam_energy=None,
                                                 acquisition_rate=acqrate, gain_setting=gain_setting)

        metadata.detector_condition = condition

        # specify the a version for this constant
        if creation_time is None:
            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
        else:
            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
        metadata.calibration_constant_version.raw_data_location = file_loc
        metadata.send(cal_db_interface, timeout=300000)
 ```

 %% Cell type:markdown id: tags:

 ## Sanity check ##

 Finally, we perform a correction of the data used to derive the gain constants with said constants. We expected an improvement in peak FWHM, if constants have been deduced correctly.

 %% Cell type:code id: tags:

 ``` python
 def hist_single_module(fbase, runs, sequences, il_mode, profile, limit_trains, memory_cells, rawversion, instrument, inp):
    channel, offset, thresholds, relgain, noise, pc = inp
    gain, pks, std, gfunc = relgain
    gains, errors, chisq, valid, max_dev, stats = gfunc

    import XFELDetAna.xfelpycaltools as xcal
    import numpy as np
    import h5py
    import copy
    from XFELDetAna.util import env
    env.iprofile = profile
    sensor_size = [128, 512]

    block_size = sensor_size


    def baseline_correct_via_noise(d, noise, g, baseline_corr_noise_threshold=1000):
        """ Correct baseline shifts by evaluating position of the noise peak

        :param: d: the data to correct, should be a single image
        :param noise: the mean noise for the cell id of `d`
        :param g: gain array matching `d` array

        Correction is done by identifying the left-most (significant) peak
        in the histogram of `d` and shifting it to be centered on 0.
        This is done via a continous wavelet transform (CWT), using a Ricker
        wavelet.

        Only high gain data is evaulated, and data larger than 50 ADU,
        equivalent of roughly a 9 keV photon is ignored.

        Two passes are executed: the first shift is accurate to 10 ADU and
        will catch baseline shifts smaller then -2000 ADU. CWT is evaluated
        for peaks of widths one, three and five time the noise.
        The baseline is then shifted by the position of the summed CWT maximum.

        In a second pass histogram is evaluated within a range of
        +- 5*noise of the initial shift value, for peak of width noise.

        Baseline shifts larger than the maximum threshold or positive shifts
        larger 10 are discarded, and the original data is returned, otherwise
        the shift corrected data is returned.

        """
        import copy
        from scipy.signal import cwt, ricker
        # we work on a copy to be able to filter values by setting them to
        # np.nan
        dd = copy.copy(d)
        dd[g != 0] = np.nan  # only high gain data
        dd[dd > 50] = np.nan  # only noise data
        h, e = np.histogram(dd, bins=210, range=(-2000, 100))
        c = (e[1:] + e[:-1]) / 2

        try:
            cwtmatr = cwt(h, ricker, [noise, 3. * noise, 5. * noise])
        except:
            return d
        cwtall = np.sum(cwtmatr, axis=0)
        marg = np.argmax(cwtall)
        pc = c[marg]

        high_res_range = (dd > pc - 5 * noise) & (dd < pc + 5 * noise)
        dd[~high_res_range] = np.nan
        h, e = np.histogram(dd, bins=200,
                            range=(pc - 5 * noise, pc + 5 * noise))
        c = (e[1:] + e[:-1]) / 2
        try:
            cwtmatr = cwt(h, ricker, [noise, ])
        except:
            return d
        marg = np.argmax(cwtmatr)
        pc = c[marg]
        # too large shift to be easily decernable via noise
        if pc > 10 or pc < -baseline_corr_noise_threshold:
            return d
        return d - pc

    def read_fun(filename, channel):

        infile = h5py.File(filename, "r", driver="core")
        if rawversion == 2:
            count = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)])
            first = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first".format(instrument, channel)])
            last_index = int(first[count != 0][-1]+count[count != 0][-1])
            first_index = int(first[count != 0][0])
        else:
            status = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)])
            if np.count_nonzero(status != 0) == 0:
                return
            last = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last".format(instrument, channel)])
            last_index = int(last[status != 0][-1])
            first_index = int(last[status != 0][0])

        if limit_trains is not None:
            last_index = min(limit_trains*memory_cells+first_index, last_index)

        im = np.array(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(instrument, channel)][first_index:last_index,...])
        carr = infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId".format(instrument, channel)][first_index:last_index]
        cells = np.squeeze(np.array(carr))
        infile.close()


        if il_mode:
            ga = im[1::2, 0, ...]
            im = im[0::2, 0, ...].astype(np.float32)
        else:
            ga = im[:, 1, ...]
            im = im[:, 0, ...].astype(np.float32)

        im = np.rollaxis(im, 2)
        im = np.rollaxis(im, 2, 1)

        ga = np.rollaxis(ga, 2)
        ga = np.rollaxis(ga, 2, 1)
        return im, ga, cells

    offset_cor = xcal.OffsetCorrection(sensor_size, offset, nCells=memory_cells, blockSize=block_size, gains=[0,1,2])
    offset_cor.debug()

    hist_calc = xcal.HistogramCalculator(sensor_size, bins=2000, range=(0, 2000), blockSize=block_size)
    hist_calc.debug()

    hist_calc_uncorr = xcal.HistogramCalculator(sensor_size, bins=2000, range=(0, 2000), blockSize=block_size)
    hist_calc_uncorr.debug()


    for run in runs:
        for seq in sequences[:2]:

            fname = fbase.format(run, run.upper(), channel, seq)

            d, ga, c = read_fun(fname, channel)

            # we need to do proper gain thresholding
            vidx = (c < offset.shape[2]) & (c < thresholds.shape[2])
            c = c[vidx]
            d = d[:,:,vidx]
            ga = ga[:,:,vidx]

            g = np.zeros(ga.shape, np.uint8)
            g[...] = 2

            g[ga < thresholds[...,c,1]] = 1
            g[ga < thresholds[...,c,0]] = 0
            d = offset_cor.correct(d, cellTable=c, gainMap=g)
            for i in range(d.shape[2]):
                mn_noise = np.nanmean(noise[...,c[i]])
                d[...,i] = baseline_correct_via_noise(d[...,i],
                                                      mn_noise,
                                                      g[..., i])

            d *= np.moveaxis(pc[c,...], 0, 2)

            hist_calc_uncorr.fill(d)
            d = (d-gains[..., 1][...,None])/gains[..., 0][...,None]
            #d = d/gains[..., 0][...,None]
            hist_calc.fill(d)

    h, e, c, _ = hist_calc.get()
    hu  = hist_calc_uncorr.get()
    return h, e, c, hu[0]

 inp = []
 idx = 0
 for i in modules:
    qm = "Q{}M{}".format(i//4+1, i%4+1)

    inp.append((i, offset_g[qm], thresholds_g[qm], res_gain[idx], noise_g[qm][...,0], pc_g[qm][0,...]))
    idx += 1

 p = partial(hist_single_module, fbase, runs, sequences, IL_MODE, profile, limit_trains, memory_cells, rawversion, instrument)
 #res = view.map_sync(p, inp)
 res = list(map(p, inp))
 ```

 %% Cell type:code id: tags:

 ``` python
 from iminuit import Minuit
 from iminuit.util import make_func_code, describe
 from IPython.display import HTML, display
 import tabulate

 # fitting
 par_ests = {}
 par_ests["mu0"] = 0
 par_ests["mu1"] = 50
 par_ests["mu2"] = 100
 par_ests["limit_mu0"] = [-5, 5]
 par_ests["limit_mu1"] = [35, 65]
 par_ests["limit_mu2"] = [100, 150]
 par_ests["s0"] = 10
 par_ests["s1"] = 10
 par_ests["s2"] = 10
 par_ests["limit_A0"] = [1e5, 1e12]
 par_ests["limit_A1"] = [1e4, 1e10]
 par_ests["limit_A2"] = [1e3, 1e10]


 par_ests["throw_nan"] = False
 par_ests["pedantic"] = False
 par_ests["print_level"] = 1

 def gaussian3(x, mu0, s0, A0, mu1, s1, A1, mu2, s2, A2):
    return (A0/np.sqrt(2*np.pi*s0**2)*np.exp(-0.5*((x-mu0)/s0)**2) +
            A1/np.sqrt(2*np.pi*s1**2)*np.exp(-0.5*((x-mu1)/s1)**2) +
            A2/np.sqrt(2*np.pi*s2**2)*np.exp(-0.5*((x-mu2)/s2)**2))


 f_sig = describe(gaussian3)[1:]

 class _Chi2Functor:
    def __init__(self, f, x, y, err):
        self.f = f
        self.x = x
        self.y = y
        self.err = err
        f_sig = describe(f)
        # this is how you fake function
        # signature dynamically
        self.func_code = make_func_code(
            f_sig[1:])  # docking off independent variable
        self.func_defaults = None  # this keeps numpy.vectorize happy

    def __call__(self, *arg):
        # notice that it accept variable length
        # positional arguments
        # chi2 = sum((y-self.f(x,*arg))**2 for x,y in zip(self.x,self.y))
        return np.sum(((self.f(self.x, *arg) - self.y) ** 2) / self.err)


 d = []
 y_range_max = 0
 table = []
 headers = ['Module',
           'FWHM (cor.) [ADU]', 'Separation (cor.) [$\sigma$]',
           'FWHM (uncor.) [ADU]', 'Separation (uncor.) [$\sigma$]',
           'Improvement'
          ]
 for i, r in enumerate(res):
    qm = "Q{}M{}".format(i//4+1, i%4+1)
    row = []
    row.append(qm)

    h, e, c, hu = r


    d.append({
            'x': c,
            'y': h,
            'drawstyle': 'steps-mid',
            'label': '{}: corrected'.format(qm),
            'marker': '.',
            'color': 'blue',
        })

    idx = (h > 0) & np.isfinite(h)
    h_non_zero = h[idx]
    c_non_zero = c[idx]
    par_ests["A0"] = np.float(h[np.argmin(abs(c-0))])
    par_ests["A1"] = np.float(h[np.argmin(abs(c-50))])
    par_ests["A2"] = np.float(h[np.argmin(abs(c-100))])
    wrapped = _Chi2Functor(gaussian3, c_non_zero, h_non_zero,
                                     np.sqrt(h_non_zero))

    m = Minuit(wrapped, **par_ests)
    fmin = m.migrad()

    xt = np.arange(0, 200)

    yt = gaussian3(xt, m.values['mu0'],  m.values['s0'],  m.values['A0'],
                   m.values['mu1'],  m.values['s1'],  m.values['A1'],
                   m.values['mu2'],  m.values['s2'],  m.values['A2'])

    d.append({
            'x': xt,
            'y': yt,
            'label': '{}: corrected (fit)'.format(qm),
            'color': 'green',
            'drawstyle': 'line',
            'linewidth': 2
        })


    d.append({
            'x': c,
            'y': hu,
            'drawstyle': 'steps-mid',
            'label': '{}: uncorrected'.format(qm),
            'marker': '.',
            'color': 'grey',
            'alpha': 0.5
        })

    row += [m.values['s1']*2.35, (m.values['mu1']-m.values['mu0'])/m.values['s1']]


    idx = (hu > 0) & np.isfinite(hu)
    h_non_zero = hu[idx]
    c_non_zero = c[idx]
    wrapped = _Chi2Functor(gaussian3, c_non_zero, h_non_zero,
                                     np.sqrt(h_non_zero))

    m = Minuit(wrapped, **par_ests)
    fmin = m.migrad()

    xt = np.arange(0, 200)

    yt = gaussian3(xt, m.values['mu0'],  m.values['s0'],  m.values['A0'],
                   m.values['mu1'],  m.values['s1'],  m.values['A1'],
                   m.values['mu2'],  m.values['s2'],  m.values['A2'])

    d.append({
            'x': xt,
            'y': yt,
            'label': '{}: uncorrected (fit)'.format(qm),
            'color': 'red',
            'linewidth': 2
        })

    row += [m.values['s1']*2.35, (m.values['mu1']-m.values['mu0'])/m.values['s1']]

    row.append("{:0.2f} %".format(100*(row[3]/row[1]-1)))

    y_range_max = max(y_range_max, np.max(h[c>25])*1.5)

    # output table
    table.append(row)

 fig = xana.simplePlot(d, y_log=False, figsize="2col",
                      aspect=2,
                      x_range=(0, 200),
                      legend='top-right-frame',
                      y_range=(0, y_range_max),
                      x_label='Energy (ADU)',
                      y_label='Counts')

 display(HTML(tabulate.tabulate(table, tablefmt='html', headers=headers,
                               numalign="right", floatfmt="0.2f")))
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

--- a/notebooks/AGIPD/Chracterize_AGIPD_Gain_PC_NBC.ipynb
+++ b/notebooks/AGIPD/Chracterize_AGIPD_Gain_PC_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Characterize AGIPD Pulse Capacitor Data #

 Author: S. Hauf, Version 1.0

 The following code characterizes AGIPD gain via data take with the pulse capacitor source (PCS). The PCS allows scanning through the high and medium gains of AGIPD, by subsequently intecreasing the number of charge pulses from a on-ASIC capicitor, thus increasing the charge a pixel sees in a given integration time.

 Because induced charge does not originate from X-rays on the sensor, the gains evaluated here will later need to be rescaled with gains deduced from X-ray data.

 PCS data is organized into multiple runs, as the on-ASIC current source cannot supply all pixels of a given module with charge at the same time. Hence, only certain pixel rows will have seen charge for a given image. These rows then first need to be combined into single module images again.

 We then use a K-means clustering algorithm to identify components in the resulting per-pixel data series, matching to three general regions:

 * a high gain slope
 * a transition region, where gain switching occurs
 * a medium gain slope.

 The same regions are present in the gain-bit data and are used to deduce the switching threshold.

 The resulting slopes are then fitted with a linear function and a combination of a linear and exponential decay function to determine the relative gains of the pixels with respect to the module. Additionally, we deduce masks for bad pixels form the data.

 %% Cell type:code id: tags:

 ``` python
 in_folder = '/gpfs/exfel/exp/SPB/202030/p900138/raw/' # path to input data, required
-modules = [14,] # modules to work on, required, range allowed
+modules = [-1] # modules to work on, required, range allowed
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/pc" # path to output to, required
 runs = [466, 467, 468, 469, 470, 471, 472, 473] # runs to use, required, range allowed
 n_sequences = 3 # number of sequence files, starting for 0 to evaluate
 cluster_profile = "noDB" # The ipcluster profile to use#
 local_output = True # output constants locally
 db_output = False # output constants to database
 bias_voltage = 300 # detector bias voltage
 cal_db_interface = "tcp://max-exfl016:8019"  # the database interface to use
 mem_cells = 0.  # number of memory cells used, use 0 to auto-derive
 interlaced = False # assume interlaced data format, for data prior to Dec. 2017
 fit_hook = True # fit a hook function to medium gain slope
 rawversion = 2 # RAW file format version
 instrument = "SPB"
 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 use_dir_creation_date = True
 creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC.ms e.g. 2019-07-04 11:02:41.00
 gain_setting = 0.1 # gain setting can have value 0 or 1, Default=0.1 for no (None) gain-setting
 karabo_da_control = "AGIPD1MCTRL00" # karabo DA for control infromation
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information
 ```

 %% Cell type:code id: tags:

 ``` python
 # imports, usually no need to change anything here
+
 from datetime import datetime
-import dateutil.parser
-import h5py
+from functools import partial
 import os
+import warnings
+warnings.filterwarnings('ignore')

+import dateutil.parser
+import h5py
+from ipyparallel import Client
 import numpy as np
 import matplotlib
 matplotlib.use("Qt4Agg")
 import matplotlib.pyplot as plt
 %matplotlib inline
-# make sure a cluster is running with ipcluster start --n=32, give it a while to start
-from ipyparallel import Client
-
-view = Client(profile=cluster_profile)[:]
-view.use_dill()

-from functools import partial
+from cal_tools.agipdlib import get_acq_rate, get_gain_setting, get_num_cells
+from cal_tools.enums import BadPixels
+from cal_tools.influx import InfluxLogger
+from cal_tools.plotting import plot_badpix_3d, show_overview
+from cal_tools.tools import (gain_map_files, get_dir_creation_date, get_notebook_name,
+                             parse_runs, run_prop_seq_from_path, get_dir_creation_date, send_to_db)
+from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions
 import XFELDetAna.xfelpyanatools as xana
-import warnings
-warnings.filterwarnings('ignore')

-from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions
-from cal_tools.tools import gain_map_files, parse_runs, run_prop_seq_from_path, get_notebook_name, get_dir_creation_date
-from cal_tools.influx import InfluxLogger
-from cal_tools.enums import BadPixels
-from cal_tools.plotting import show_overview, plot_badpix_3d
-from cal_tools.agipdlib import get_acq_rate, get_num_cells, get_gain_setting
+# make sure a cluster is running with ipcluster start --n=32, give it a while to start
+view = Client(profile=cluster_profile)[:]
+view.use_dill()

 IL_MODE = interlaced
 maxcells = mem_cells if not interlaced else mem_cells*2
 cells = mem_cells
 path_temp = in_folder+"/r{:04d}/"
 image_name_temp = 'RAW-R{:04d}-AGIPD{:02d}-S{:05d}.h5'
 seqs = n_sequences
 print("Parameters are:")
 print("Memory cells: {}/{}".format(cells, maxcells))
 print("Runs: {}".format(runs))
 print("Modules: {}".format(modules))
 print("Sequences: {}".format(seqs))
 print("Interlaced mode: {}".format(IL_MODE))

 run, prop, seq = run_prop_seq_from_path(in_folder)
 logger = InfluxLogger(detector="AGIPD", instrument=instrument, mem_cells=mem_cells,
                      notebook=get_notebook_name(), proposal=prop)

 loc = None
 if instrument == "SPB":
    loc = "SPB_DET_AGIPD1M-1"
    dinstance = "AGIPD1M1"
    karabo_id_control = "SPB_IRU_AGIPD1M1"
 else:
    loc = "MID_DET_AGIPD1M-1"
    dinstance = "AGIPD1M2"
    karabo_id_control = "MID_EXP_AGIPD1M1"
 print("Detector in use is {}".format(loc))
 ```

 %% Cell type:markdown id: tags:

 ## Read in data and merge ##

 The number of bursts in each sequence file is determined from the sequence files of the first module.

 %% Cell type:code id: tags:

 ``` python
 run = runs[0]
 bursts_per_file = []
 channel = 0

 for seq in range(seqs):
    fname = os.path.join(path_temp.format(run),
                         image_name_temp.format(run, channel, seq))
    print('Reading ',fname)

    if acq_rate == 0.:
-        acq_rate = get_acq_rate(fname, loc, channel)
+        acq_rate = get_acq_rate((fname, loc, channel))
        print("Acquisition rate set from file: {} MHz".format(acq_rate))

    if mem_cells == 0:
        cells = get_num_cells(fname, loc, channel)
        maxcells = cells
        mem_cells = cells  # avoid setting twice
        print("Memory cells set from file: {}".format(cells))

    f = h5py.File(fname, 'r', driver='core')
    if rawversion == 2:
        count = np.squeeze(f["/INDEX/{}/DET/{}CH0:xtdf/image/count".format(loc, channel)])
        bursts_per_file.append(np.count_nonzero(count))
    else:
        status = np.squeeze(f["/INDEX/{}/DET/{}CH0:xtdf/image/status".format(loc, channel)])
        bursts_per_file.append(np.count_nonzero(status != 0))
    f.close()
 bursts_per_file = np.array(bursts_per_file)
 print("Bursts per sequence file are: {}".format(bursts_per_file))


 # Define constant creation time.
 if creation_time.strip() is not "":
    try:
        creation_time = datetime.strptime(creation_time, '%Y-%m-%d %H:%M:%S.%f')
    except Exception as e:
        print(f"creation_time value error: {e}."
               "Use same format as YYYY-MM-DD HR:MN:SC.ms e.g. 2019-07-04 11:02:41.00/n")
        creation_time = None
        print("Given creation time wont be used.")
 else:
    creation_time = None

 if not creation_time and use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)

 if creation_time:
    print("Using {} as creation time".format(creation_time.isoformat()))
 ```

 %% Cell type:code id: tags:

 ``` python
 control_fname = f'{in_folder}/r{runs[0]:04d}/RAW-R{runs[0]:04d}-{karabo_da_control}-S00000.h5'

 if "{" in h5path_ctrl:
    h5path_ctrl = h5path_ctrl.format(karabo_id_control)

 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < dateutil.parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
        except Exception as e:
            print(f'Error while reading gain setting from: \n{control_fname}')
            print(e)
            print("Gain setting is not found in the control information")
            print("Data will not be processed")
            sequences = []
 print(f"Gain setting: {gain_setting}")
 ```

 %% Cell type:code id: tags:

 ``` python
 def read_and_merge_module_data(cells, path_temp, image_name_temp,
                               runs, seqs, il_mode, rawversion, instrument, channel):
    import h5py
    import numpy as np
    import os


    def cal_bursts_per_file(run, dseq=0):

        bursts_per_file = []
        channel = 0

        for seq in range(dseq, seqs+dseq):
            #print(run, channel, seq)
            fname = os.path.join(path_temp.format(run),
                                 image_name_temp.format(run, channel, seq))
            #print('Reading ',fname)
            with h5py.File(fname, 'r') as f:
                if rawversion == 2:
                    count = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)][()])
                    bursts_per_file.append(np.count_nonzero(count))
                    del count
                else:
                    status = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)][()])
                    bursts_per_file.append(np.count_nonzero(status != 0))
                    del status
        if bursts_per_file[0] == 0:
            return cal_bursts_per_file(run, dseq=dseq+1)  # late start of daq
        return np.array(bursts_per_file), dseq

    #bursts_per_file = np.hstack([0, bursts_per_file])

    bursts_total = np.max([np.sum(cal_bursts_per_file(run)[0]) for run in runs])

    cfac = 2 if il_mode else 1

    def read_raw_data_file(fname, channel, cells = cells, cells_tot = cells, bursts = 250,
                           skip_first_burst = True, first_burst_length = cells):
        data = None
        cellID_all = None
        with h5py.File(fname, 'r') as f:

            #print('Reading ',fname)
            image_path_temp = 'INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data'.format(instrument, channel)
            cellID_path_temp = 'INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId'.format(instrument, channel)
            if rawversion == 2:
                count = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)])
                first = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first".format(instrument, channel)])
                last_index = int(first[count != 0][-1]+count[count != 0][-1])
                first_index = int(first[count != 0][0])
            else:
                status = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)])
                if np.count_nonzero(status != 0) == 0:
                    return
                last = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last".format(instrument, channel)])
                last_index = int(last[status != 0][-1])
                first_index = int(last[status != 0][0])
            #print(first_index, last_index)
            data = f[image_path_temp][first_index:last_index,...][()]

            cellID_all = np.squeeze(f[cellID_path_temp][first_index:last_index,...][()])
            data = data[cellID_all<cells, ...]

        #bursts = int(data.shape[0]/adcells)
        #print('Bursts: ', bursts)
        analog = np.zeros((bursts - skip_first_burst, cells//cfac, 128, 512))
        digital = np.zeros((bursts - skip_first_burst, cells//cfac, 128, 512))
        cellID = np.zeros(( (bursts - skip_first_burst) * cells))
        offset = skip_first_burst * first_burst_length

        for b in range(min(bursts, data.shape[0]//cells-1)  - skip_first_burst-1):
            try:

                analog[b, : cells//cfac, ...] = np.swapaxes(data[b * cells_tot + offset : b * cells_tot  + cells + offset : cfac,
                                                         0, ...], -1, -2)
                digital[b, : cells//cfac, ...] = np.swapaxes(data[b * cells_tot + cfac - 1 + skip_first_burst * first_burst_length :
                                                          b * cells_tot  + cells + cfac - 1 + offset :cfac, cfac%2, ...], -1, -2)

                cellID[ b * cells : (b  + 1) * cells] = cellID_all[b * cells_tot + offset : b * cells_tot + cells + offset].flatten()
            except:
                #print(b * cells_tot + offset, b * cells_tot  + cells + offset)
                #print(b, offset, cells, data.shape[0]//cells)
                raise AttributeError("Foo")
        return {'analog': analog, 'digital': digital, 'cellID': cellID}


    pc_data = {'analog': np.zeros((bursts_total, cells//cfac, 128, 512)),
               'digital': np.zeros((bursts_total, cells//cfac, 128, 512)),
               'cellID': np.zeros(((bursts_total) * cells))
              }
    pc_data_merged = {'analog': np.zeros((bursts_total, cells//cfac, 128, 512)),
               'digital': np.zeros((bursts_total, cells//cfac, 128, 512)),
               'cellID': np.zeros(((bursts_total) * cells))
              }

    for run_idx, run in enumerate(runs):
        bursts_per_file, dseq = cal_bursts_per_file(run)
        print("Run {}: bursts per file: {} -> {} total".format(run, bursts_per_file, np.sum(bursts_per_file)))
        #Read files in
        last_burst = 0
        for seq in range(dseq, seqs+dseq):
            fname = os.path.join(path_temp.format(run),
                                 image_name_temp.format(run, channel, seq))
            if seq-dseq == 0:
                skip_first_burst = True
            else:
                skip_first_burst = False
            bursts = bursts_per_file[seq-dseq]

            try:
                aa = read_raw_data_file(fname, channel, bursts = bursts,
                                        skip_first_burst = skip_first_burst,
                                        first_burst_length = cells)
                pc_data['analog'][last_burst : last_burst+bursts_per_file[seq-dseq]-skip_first_burst, ...] = aa['analog']
                pc_data['digital'][last_burst : last_burst+bursts_per_file[seq-dseq]-skip_first_burst, ...] = aa['digital']
                pc_data['cellID'][last_burst * cells : (last_burst+bursts_per_file[seq-dseq]-skip_first_burst) * cells, ...] = aa['cellID']

            except Exception as e:
                print(e)
                pc_data['analog'][last_burst : last_burst+bursts_per_file[seq-dseq]-skip_first_burst, ...] = 0
                pc_data['digital'][last_burst : last_burst+bursts_per_file[seq-dseq]-skip_first_burst, ...] = 0
                pc_data['cellID'][last_burst * cells : (last_burst+bursts_per_file[seq-dseq]-skip_first_burst) * cells, ...] = 0
            finally:
                last_burst += bursts_per_file[seq-dseq]-skip_first_burst
        # Copy injected rows
        for row_i in range(8):
            try:
                pc_data_merged['analog'][:,:,row_i * 8 + (7 - run_idx),:] = pc_data['analog'][:bursts_total,:cells//cfac,row_i * 8 + (7 - run_idx),:]
                pc_data_merged['analog'][:,:,64 + row_i * 8 + run_idx ,:] = pc_data['analog'][:bursts_total,:cells//cfac, 64 + row_i * 8 + run_idx,:]
                pc_data_merged['digital'][:,:,row_i * 8 + (7 - run_idx),:] = pc_data['digital'][:bursts_total,:cells//cfac,row_i * 8 + (7 - run_idx),:]
                pc_data_merged['digital'][:,:,64 + row_i * 8 + run_idx ,:] = pc_data['digital'][:bursts_total,:cells//cfac, 64 + row_i * 8 + run_idx,:]
            except Exception as e:
                print(e)
        #Check cellIDs
        #Copy cellIDs of first run
        if run_idx == 0:
            pc_data_merged['cellID'][...] = pc_data['cellID'][...]
        #Check cellIDs of all the other runs
        #else:
        #    print('cellID difference:{}'.format(np.sum(pc_data_merged['cellID']-pc_data['cellID'])))
    return pc_data_merged['analog'], pc_data_merged['digital'], pc_data_merged['cellID']

 start = datetime.now()
 p = partial(read_and_merge_module_data, maxcells, path_temp, image_name_temp,
            runs, seqs, IL_MODE, rawversion, instrument)
 # chunk this a bit, so that we don't overuse available memory
 res = list(map(p, modules))
 duration = (datetime.now()-start).total_seconds()
 logger.runtime_summary_entry(success=True, runtime=duration)
 logger.send()
 ```

 %% Cell type:markdown id: tags:

 ## Slope clustering and Fitting ##

 The following two cells contain the actual algorithm logic as well as a preview of a single pixel and memory cells visualizing the data and the concepts.

 We start out with calculating an estimate of the slope in proximity of a given data value. This is done by calculating the slopes of a given value with 15 neighbours and averaging the result. Values are then clustered by these slopes into three regions via a K-means algorithm.

 * for the first region a linear function is fitted to the data, determining the gain slope and offset for the high gain mode.

   $$y = mx + b$$

 * for the second and third region a composite function of the form:

  $$y = A*e^{-(x-O)/C}+mx+b$$

  is fitted, covering both the transition region and the medium gain slope.

 %% Cell type:code id: tags:

 ``` python
 from sklearn.cluster import KMeans
 from iminuit import Minuit
 from iminuit.util import make_func_code, describe

 def calc_m_cluster(x, y):
    scan_range = 15
    ms = np.zeros((x.shape[0], scan_range))
    for i in range(scan_range):
        xdiffs = x - np.roll(x, i+1)
        ydiffs = y - np.roll(y, i+1)
        m = ydiffs/xdiffs
        ms[:,i] = m
    m = np.mean(ms, axis=1)

    k = KMeans(n_clusters=3, n_jobs=-2)
    k.fit(m.reshape(-1, 1))
    ms = []
    for lbl in np.unique(k.labels_):
        xl = x[k.labels_ == lbl]
        xd = np.reshape(xl, (len(xl), 1))
        xdiff = xd - xd.transpose()

        yl = y[k.labels_ == lbl]
        yd = np.reshape(yl, (len(yl), 1))
        ydiff = yd - yd.transpose()
        ms.append(np.mean(np.nanmean(ydiff/xdiff, axis=0)))
    return ms, k.labels_, k.cluster_centers_

 def rolling_window(a, window):
    shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
    strides = a.strides + (a.strides[-1],)
    return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)


 def calc_m_cluster2(x, y, r1=5, r2=0, r3=1.5):
    scan_range = 15
    ms = np.zeros((x.shape[0], scan_range))
    for i in range(scan_range):
        xdiffs = x - np.roll(x, i+1)
        ydiffs = y - np.roll(y, i+1)
        m = ydiffs/xdiffs
        ms[:,i] = m
    m = np.mean(ms, axis=1)
    mm = np.zeros_like(m)
    mm[...] = np.nan
    m[scan_range//2:-scan_range//2+1] = np.mean(rolling_window(m, scan_range),-1)
    reg1 = m > r1
    reg2 = m < r2
    reg3 = (m > r2) & (m < r3)
    reg4 = ~(reg1 | reg2 | reg3)
    labels = [reg1, reg2, reg3, reg4]
    regions = np.zeros_like(x, np.uint8)
    for r, lbl in enumerate(labels):
        regions[lbl] = r
    scan_range = 30
    mregions = np.round(np.mean(rolling_window(regions, scan_range),-1))
    regions[...] = np.nan
    regions[scan_range//2:-scan_range//2+1] = mregions


    labels = [regions == 0, regions == 1, regions == 2, regions == 3]

    idx = np.arange(x.size)
    maxlbl = x.size-1
    for i in range(0, len(labels)-1):
        nidx = labels[i+1]
        if np.any(nidx):
            maxlbl = np.max(idx[nidx])
            cidx = idx > maxlbl
            if np.any(cidx):
                labels[i][cidx] = False

    ms = []
    for lbl in labels:
        xl = x[lbl]
        xd = np.reshape(xl, (len(xl), 1))
        xdiff = xd - xd.transpose()

        yl = y[lbl]
        yd = np.reshape(yl, (len(yl), 1))
        ydiff = yd - yd.transpose()
        ms.append(np.mean(np.nanmean(ydiff/xdiff, axis=0)))

    return ms, labels, None

 def fit_data(fun, x, y, yerr, par_ests):
    par_ests["throw_nan"] = False
    par_ests["pedantic"] = False
    par_ests["print_level"] = 0

    f_sig = describe(fun)[1:]

    class _Chi2Functor:
        def __init__(self, f, x, y, err):
            self.f = f
            self.x = x[y != 0]
            self.y = y[y != 0]
            self.err = err[y != 0]
            f_sig = describe(f)
            # this is how you fake function
            # signature dynamically
            self.func_code = make_func_code(
                f_sig[1:])  # docking off independent variable
            self.func_defaults = None  # this keeps numpy.vectorize happy

        def __call__(self, *arg):
            # notice that it accept variable length
            # positional arguments
            # chi2 = sum((y-self.f(x,*arg))**2 for x,y in zip(self.x,self.y))
            return np.sum(((self.f(self.x, *arg) - self.y) ** 2) / self.err)

    wrapped = _Chi2Functor(fun, x, y, yerr)
    m = Minuit(wrapped, **par_ests)
    fmin = m.migrad()

    return m.values

 def lin_fun(x, m, b):
    return m*x+b

 def hook_fun(x, a, c, o, m, b):
    return a*np.exp(-(x-o)/c)+m*x+b
 ```

 %% Cell type:code id: tags:

 ``` python
 from cal_tools.tools import get_constant_from_db_and_time
 offsets = {}
 noises = {}
 thresholds = {}
 for mod, r in enumerate(res):
    ii = modules[mod]
    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
    det = getattr(Detectors, dinstance)
    offset, when = get_constant_from_db_and_time(getattr(det, qm),
                                                     Constants.AGIPD.Offset(),
                                                     Conditions.Dark.AGIPD(
                                                         memory_cells=mem_cells,
                                                         bias_voltage=bias_voltage, acquisition_rate=acq_rate,
                                                         gain_setting=gain_setting),
                                                         np.zeros((128, 512, mem_cells, 3)),
                                                     cal_db_interface, creation_time=creation_time)
    print("Offset for {} was injected on {}".format(qm, when))
    offsets[mod] = np.array(offset.data)

    noise, when = get_constant_from_db_and_time(getattr(det, qm),
                                                     Constants.AGIPD.Noise(),
                                                     Conditions.Dark.AGIPD(
                                                         memory_cells=mem_cells,
                                                         bias_voltage=bias_voltage, acquisition_rate=acq_rate,
                                                         gain_setting=gain_setting),
                                                         np.zeros((128, 512, mem_cells, 3)),
                                                     cal_db_interface, creation_time=creation_time)
    print("Noise for {} was injected on {}".format(qm, when))
    noises[mod] = np.array(noise.data)

    threshold, when = get_constant_from_db_and_time(getattr(det, qm),
                                                     Constants.AGIPD.ThresholdsDark(),
                                                     Conditions.Dark.AGIPD(
                                                         memory_cells=mem_cells,
                                                         bias_voltage=bias_voltage, acquisition_rate=acq_rate,
                                                         gain_setting=gain_setting),
                                                         np.zeros((128, 512, mem_cells, 3)),
                                                     cal_db_interface, creation_time=creation_time)
    print("Threshold for {} was injected on {}".format(qm, when))
    thresholds[mod] = np.array(threshold.data)
 ```

 %% Cell type:code id: tags:

 ``` python
 test_pixels = []
 tpix_range1 = [(0,16), (0,64)]
 for i in range(*tpix_range1[0]):
    for j in range(*tpix_range1[1]):
        test_pixels.append((j,i))
 test_cells = [4, 38, 64, 128]#, 200, 249]
 tcell = np.array(test_cells)
 tcell = tcell[tcell < mem_cells]
 if tcell.size == 0:
    test_cells = [mem_cells-1]
 else:
    test_cells = tcell.tolist()

 from mpl_toolkits.axes_grid1 import ImageGrid
 for mod, r in enumerate(res):
    dig, ana, cellId = r
    d = []
    d2 = []
    d3 = []

    H = [0, 0, 0, 0]

    ex, ey = None, None
    offset = offsets[mod]
    for pix in test_pixels:
        for cell in test_cells:
            color = np.random.rand(3,1)

            x = np.arange(dig.shape[0])
            y = dig[:,cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]
            if x.shape[0] == 0:
                continue

            ms, labels, centers = calc_m_cluster2(x, y)
            bound = None
            bound_m = None
            markers = ['o','.','x','v']
            colors = ['b', 'r', 'g', 'k']
            ymin = y.min()

            for i, lbl in enumerate(labels):
                if np.any(lbl):
                    #ym = y[lbl]-y[lbl].min()
                    if i == 0:
                        gain = 0
                    else:
                        gain = 1
                    ym = y[lbl] - offset[pix[0], pix[1], cell, gain]
                    #if i != 0:
                    #    ym += y[labels[0]].max()-y[labels[0]].min()
                    h, ex, ey = np.histogram2d(x[lbl], ym, range=((0, 600), (-500, 6000)), bins=(300, 650))
                    H[i] += h



    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111)
    for i in range(3):
        H[i][H[i]==0] = np.nan
    ax.imshow(H[0].T, origin="bottom", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    ax.imshow(H[1].T, origin="bottom", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='spring', alpha=0.7, vmin=0, vmax=100)
    ax.imshow(H[2].T, origin="bottom", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD response (ADU)")
    ax.set_xlabel("PC scan point (#)")
 ```

 %% Cell type:markdown id: tags:

 ### Examples from Pixel Subset ###

 The follwing is an visualization of the clustering and fitting for a subset of pixels. If data significantly mismatches expectations, the clustering and fitting algorithms should fail for this subset:

 * the first plot shows the clustering results for pixels which were sucessfully evaluated
 * the second plot shows the clustering results for pixels which failed to evaluate
 * the third plot shows the fits and fit residuals for the pixel clusters shown in the first plot

 Non-smooth behaviour is an indication that you are errorously processing interleaved data that is not, or vice versa, or have the wrong number of memory cells set.

 We do this twice for different detector regions

 %% Cell type:code id: tags:

 ``` python
 test_pixels = []
 tpix_range1 = [(250,254), (60,64)]
 for i in range(*tpix_range1[0]):
    for j in range(*tpix_range1[1]):
        test_pixels.append((j,i))
 test_cells = [4, 38]
 tcell = np.array(test_cells)
 tcell = tcell[tcell < mem_cells]
 if tcell.size == 0:
    test_cells = [mem_cells-1]
 else:
    test_cells = tcell.tolist()

 for mod, r in enumerate(res):
    dig, ana, cellId = r
    d = []
    d2 = []
    d3 = []
    offset = offsets[mod]
    noise = noises[mod]
    for pix in test_pixels:
        for cell in test_cells:
            color = np.random.rand(3,1)

            x = np.arange(dig.shape[0])
            y = dig[:,cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]

            ms, labels, centers = calc_m_cluster2(x, y)
            bound = None
            bound_m = None
            markers = ['o','.','x','v']
            colors = ['b', 'r', 'g', 'k']
            for i, lbl in enumerate(labels):
                if i == 0:
                    gain = 0
                else:
                    gain = 1
                d.append({'x': x[lbl],
                  'y': y[lbl] - offset[pix[0], pix[1], cell, gain],
                  'marker': markers[i],
                  'color': colors[i],
                  'linewidth': 0
                 })
                #if ms[i] < 0: # slope separating two regions
                #    bound = np.min(x[lbl])
                #    bound_m = ms[i]
            if labels[1].any():
                bound = np.min(x[labels[1]])
                bound_m = ms[1]
            if bound is None or bound < 20 and False:
                ya = ana[:,cell, pix[0], pix[1]][vidx]
                msa, labels, centers = calc_m_cluster2(x, ya, 25, -10, 25)
                if np.count_nonzero(labels[0]) > 0:

                    bound = np.min(x[labels[0]])
                    bound_m = ms[3]
                else:
                    avg_g = np.nanmean(ya)
                    bound = np.max(x[y < avg_g])
                    bound_m = ms[3]

            #print(bound)
            # fit linear slope
            if not np.isnan(bound_m):
                xl = x[(x<bound)]
                yl = y[(x<bound)] - offset[pix[0], pix[1], cell, 0]
                parms = {'m': bound_m, 'b': np.min(yl)}

                errors = np.ones(xl.shape)*noise[pix[0], pix[1], cell, 0]
                fitted = fit_data(lin_fun, xl, yl, errors , parms)
                yf = lin_fun(xl, fitted['m'], fitted['b'])
                max_devl = np.max(np.abs((yl-yf)/yl))

                d3.append({'x': xl,
                          'y': yf,
                          'color': 'k',
                          'linewidth': 1,
                           'y2': (yf-yl)/errors
                         })
            # fit hook slope
            if fit_hook:
                idx = (x >= bound) & (y > 0) & np.isfinite(x) & np.isfinite(y)
                xh = x[idx]
                yh = y[idx] - offset[pix[0], pix[1], cell, 1]
                if len(yh[yh > 0]) == 0:
                    break
                parms = {'m': bound_m/10 if bound_m/10>0.3 else 0.5, 'b': np.min(yh[yh > 0]), 'a': np.max(yh), 'c': 5, 'o': bound-1}
                parms["limit_m"] = [0.3, 1.0]
                parms["limit_c"] = [1., 1000]
                errors = np.ones(xh.shape)*noise[pix[0], pix[1], cell, 1]
                fitted = fit_data(hook_fun, xh, yh, errors, parms)
                yf = hook_fun(xh, fitted['a'], fitted['c'], fitted['o'], fitted['m'], fitted['b'])

                max_devh = np.max(np.abs((yh-yf)/yh))
                #print(fitted)
                d3.append({'x': xh,
                          'y': yf,
                          'color': 'red',
                          'linewidth': 1,
                          'y2': (yf-yh)/errors
                         })

            x = np.arange(ana.shape[0])
            y = ana[:,cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]

            #ms, labels, centers = calc_m_cluster2(x, y, 25, -10, 25)
            if len(y[labels[0]]) != 0 and len(y[labels[2]]) != 0:
                threshold = (np.mean(y[labels[0]])+np.mean(y[labels[2]]))/2

            for i, lbl in enumerate(labels):

                d2.append({'x': x[lbl],
                  'y': y[lbl],
                  'marker': markers[i],
                  'color': colors[i],
                  'lw': None

                 })

                d2.append({'x': np.array([x[0], x[-1]]),
                  'y': np.ones(2)*threshold,

                  'color': 'k',
                  'lw': 1

                 })

            #threshold = (np.min(y[x<bound]) + np.max(y[x>=bound]))/2


    fig = xana.simplePlot(d, y_label="PC pixel signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #")
    fig.savefig("{}/module_{}_pixel_plot.png".format(out_folder, modules[mod]))

    fig = xana.simplePlot(d2, y_label="PC gain signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #")
    fig.savefig("{}/module_{}_pixel_plot_gain.png".format(out_folder, modules[mod]))

    fig = xana.simplePlot(d3, secondpanel=True, y_label="PC signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #", y2_label="Residuals ($\sigma$)", y2_range=(-5,5))
    fig.savefig("{}/module_{}_pixel_plot_fits.png".format(out_folder, modules[mod]))
 ```

 %% Cell type:code id: tags:

 ``` python
 test_pixels = []
 tpix_range2 = [(96,128), (32,64)]
 for i in range(*tpix_range2[0]):
    for j in range(*tpix_range2[1]):
        test_pixels.append((j,i))

 for mod, r in enumerate(res):
    dig, ana, cellId = r
    d = []
    d2 = []
    d3 = []
    offset = offsets[mod]
    noise = noises[mod]
    for pix in test_pixels:
        for cell in test_cells:
            color = np.random.rand(3,1)

            x = np.arange(dig.shape[0])
            y = dig[:,cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]

            ms, labels, centers = calc_m_cluster2(x, y)
            bound = None
            bound_m = None
            markers = ['o','.','x','v']
            colors = ['b', 'r', 'g', 'k']
            for i, lbl in enumerate(labels):
                if i == 0:
                    gain = 0
                else:
                    gain = 1
                d.append({'x': x[lbl],
                  'y': y[lbl] - offset[pix[0], pix[1], cell, gain],
                  'marker': markers[i],
                  'color': colors[i],
                  'linewidth': 0
                 })
                #if ms[i] < 0: # slope separating two regions
                #    bound = np.min(x[lbl])
                #    bound_m = ms[i]
            if len(x[labels[1]]):
                bound = np.min(x[labels[1]])
                bound_m = ms[1]

                # fit linear slope
                idx = (x >= bound) & (y > 0) & np.isfinite(x) & np.isfinite(y)
                xl = x[(x<bound)]
                yl = y[(x<bound)] - offset[pix[0], pix[1], cell, 0]
                errors = np.ones(xl.shape)*noise[pix[0], pix[1], cell, 0]
                parms = {'m': bound_m, 'b': np.min(yl)}
                fitted = fit_data(lin_fun, xl, yl, errors, parms)

                yf = lin_fun(xl, fitted['m'], fitted['b'])
                max_devl = np.max(np.abs((yl-yf)/yl))

                xtt = np.arange(ana.shape[0])
                ytt = ana[:,cell, pix[0], pix[1]]

                vidx = (ytt > 1000) & np.isfinite(ytt)
                xtt = xtt[vidx]
                ytt = ytt[vidx]

                #ms, labels, centers = calc_m_cluster2(x, y, 25, -10, 25)
                if len(y[labels[0]]) != 0 and len(y[labels[2]]) != 0:
                    threshold = (np.mean(ytt[labels[0]])+np.mean(ytt[labels[2]]))/2

                if threshold > 10000 or threshold < 4000:
                    d3.append({
                        'x': xl,
                        'y': yf,
                        'color': 'k',
                        'linewidth': 1,
                        'y2': (yf-yl)/errors
                             })

            if bound is None:
                ya = ana[:,cell, pix[0], pix[1]][vidx]
                msa, labels, centers = calc_m_cluster2(x, ya, 25, -10, 25)
                if np.count_nonzero(labels[0]) > 0:
                    bound = np.min(x[labels[0]])
                    bound_m = ms[3]
                else:
                    avg_g = np.nanmean(ya)
                    bound = np.max(x[y < avg_g])
                    bound_m = ms[3]

            # fit hook slope
            try:
                if fit_hook and len(yh[yh > 0]) !=0:
                    idx = (x >= bound) & (y > 0) & np.isfinite(x) & np.isfinite(y)
                    xh = x[idx]
                    yh = y[idx] - offset[pix[0], pix[1], cell, 1]
                    errors = np.ones(xh.shape)*noise[pix[0], pix[1], cell, 1]
                    parms = {
                        'm': np.abs(bound_m/10),
                        'b': np.min(yh[yh > 0]),
                        'a': np.max(yh),
                        'c': 5.,
                        'o': bound-1
                            }
                    parms["limit_m"] = [0.3, 1.0]
                    parms["limit_c"] = [1., 1000]
                    fitted = fit_data(hook_fun, xh, yh, errors, parms)
                    yf = hook_fun(xh, fitted['a'], fitted['c'], fitted['o'], fitted['m'], fitted['b'])
                    max_devh = np.max(np.abs((yh-yf)/yh))
                    #print(fitted)
                    if threshold > 10000 or threshold < 4000 or fitted['m'] < 0.2:
                        d3.append({
                            'x': xh,
                            'y': yf,
                            'color': 'red',
                            'linewidth': 1,
                            'y2': (yf-yh)/errors
                         })
            except Exception as e:
                if "zero-size array" in str(e):
                    pass
                else:
                    print(e)

            if threshold > 10000 or threshold < 4000:
                for i, lbl in enumerate(labels):
                    d2.append({
                        'x': xtt[lbl],
                        'y': ytt[lbl],
                        'marker': markers[i],
                        'color': colors[i],
                        'lw': None
                     })

                    d2.append({'x': np.array([xtt[0], xtt[-1]]),
                      'y': np.ones(2)*threshold,
                      'color': 'k',
                      'lw': 1
                     })

            #threshold = (np.min(y[x<bound]) + np.max(y[x>=bound]))/2
    fig = xana.simplePlot(d, y_label="PC pixel signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #")
    fig.savefig("{}/module_{}_pixel_plot_fail.png".format(out_folder, modules[mod]))

    fig = xana.simplePlot(d2, y_label="PC gain signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #")
    fig.savefig("{}/module_{}_pixel_plot_gain_fail.png".format(out_folder, modules[mod]))

    fig = xana.simplePlot(d3, secondpanel=True, y_label="PC signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #", y2_label="Residuals ($\sigma$)", y2_range=(-5,5))
    fig.savefig("{}/module_{}_pixel_plot_fits_fail.png".format(out_folder, modules[mod]))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Here we perform the calculations in column-parallel for all modules
 def calibrate_single_row(cells, fit_hook, inp):

    from sklearn.cluster import KMeans
    from iminuit import Minuit
    from iminuit.util import make_func_code, describe
    import numpy as np

    yrd, yra, offset, noise = inp

    def rolling_window(a, window):
        shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
        strides = a.strides + (a.strides[-1],)
        return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)


    def calc_m_cluster2(x, y, r1=5, r2=0, r3=1.5):
        scan_range = 15
        ms = np.zeros((x.shape[0], scan_range))
        for i in range(scan_range):
            xdiffs = x - np.roll(x, i+1)
            ydiffs = y - np.roll(y, i+1)
            m = ydiffs/xdiffs
            ms[:,i] = m
        m = np.mean(ms, axis=1)
        mm = np.zeros_like(m)
        mm[...] = np.nan
        m[scan_range//2:-scan_range//2+1] = np.mean(rolling_window(m, scan_range),-1)
        reg1 = m > r1
        reg2 = m < r2
        reg3 = (m > r2) & (m < r3)
        reg4 = ~(reg1 | reg2 | reg3)
        labels = [reg1, reg2, reg3, reg4]
        regions = np.zeros_like(x, np.uint8)
        for r, lbl in enumerate(labels):
            regions[lbl] = r
        scan_range = 30
        mregions = np.round(np.mean(rolling_window(regions, scan_range),-1))
        regions[...] = np.nan
        regions[scan_range//2:-scan_range//2+1] = mregions


        labels = [regions == 0, regions == 1, regions == 2, regions == 3]

        idx = np.arange(x.size)
        maxlbl = x.size-1
        for i in range(0, len(labels)-1):
            nidx = labels[i+1]
            if np.any(nidx):
                maxlbl = np.max(idx[nidx])
                cidx = idx > maxlbl
                if np.any(cidx):
                    labels[i][cidx] = False

        ms = []
        for lbl in labels:
            xl = x[lbl]
            xd = np.reshape(xl, (len(xl), 1))
            xdiff = xd - xd.transpose()

            yl = y[lbl]
            yd = np.reshape(yl, (len(yl), 1))
            ydiff = yd - yd.transpose()
            ms.append(np.mean(np.nanmean(ydiff/xdiff, axis=0)))

        return ms, labels, None

    def fit_data(fun, x, y, yerr, par_ests):
        par_ests["throw_nan"] = False
        par_ests["pedantic"] = False
        par_ests["print_level"] = 0

        f_sig = describe(fun)[1:]

        class _Chi2Functor:
            def __init__(self, f, x, y, err):
                self.f = f
                self.x = x
                self.y = y
                self.err = err
                f_sig = describe(f)
                # this is how you fake function
                # signature dynamically
                self.func_code = make_func_code(
                    f_sig[1:])  # docking off independent variable
                self.func_defaults = None  # this keeps numpy.vectorize happy

            def __call__(self, *arg):
                # notice that it accept variable length
                # positional arguments
                # chi2 = sum((y-self.f(x,*arg))**2 for x,y in zip(self.x,self.y))
                return np.sum(((self.f(self.x, *arg) - self.y) ** 2) / self.err)

        wrapped = _Chi2Functor(fun, x, y, yerr)
        m = Minuit(wrapped, **par_ests)
        fmin = m.migrad()

        return m.values

    def lin_fun(x, m, b):
        return m*x+b

    def hook_fun(x, a, c, o, m, b):
        return a*np.exp(-(x-o)/c)+m*x+b

    # linear slope
    ml = np.zeros(yrd.shape[1:])
    bl = np.zeros(yrd.shape[1:])
    devl = np.zeros(yrd.shape[1:])
    ml[...] = np.nan
    bl[...] = np.nan
    devl[...] = np.nan

    #hook function
    mh = np.zeros(yrd.shape[1:])
    bh = np.zeros(yrd.shape[1:])
    ch = np.zeros(yrd.shape[1:])
    oh = np.zeros(yrd.shape[1:])
    ah = np.zeros(yrd.shape[1:])
    devh = np.zeros(yrd.shape[1:])
    dhm = np.zeros(yrd.shape[1:])
    mh[...] = np.nan
    bh[...] = np.nan
    ch[...] = np.nan
    oh[...] = np.nan
    ah[...] = np.nan
    devh[...] = np.nan
    dhm[...] = np.nan

    # threshold
    thresh = np.zeros(list(yrd.shape[1:])+[3,])
    thresh[...] = np.nan
    failures = []

    for col in range(yrd.shape[-1]):
        try:
            y = yrd[:,col]
            x = np.arange(y.shape[0])

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]

            ms, labels, centers = calc_m_cluster2(x, y)

            bound = np.min(x[labels[1]])
            bound_m = ms[1]

            # fit linear slope
            xl = x[x<bound]
            yl = y[x<bound] - offset[col, 0]
            errors = np.ones(xl.shape)*noise[col, 0]
            parms = {'m': bound_m, 'b': np.min(yl)}
            fitted = fit_data(lin_fun, xl, yl, errors, parms)
            yf = lin_fun(xl, fitted['m'], fitted['b'])
            max_devl = np.median(np.abs((yl-yf)/yl))
            ml[col] = fitted['m']
            bl[col] = fitted['b']
            devl[col] = max_devl
            #if np.any(labels[0]) and np.any(labels[2]):
                #dhm[col] = y[labels[0]].max()-y[labels[2]].min()
            dhml = lin_fun(bound, fitted['m'], fitted['b'])
            # fit hook slope
            if fit_hook:
                idx = (x >= bound) & (y > 0) & np.isfinite(x) & np.isfinite(y)
                xh = x[idx]
                yh = y[idx] - offset[col, 1]
                errors = np.ones(xh.shape)*noise[col, 1]
                parms = {'m': bound_m/10 if bound_m/10 > 0.3 else 0.5, 'b': np.min(yh[yh > 0]), 'a': np.max(yh), 'c': 5., 'o': bound-1}
                parms["limit_m"] = [0.3, 1.0]
                parms["limit_c"] = [1., 1000]
                fitted = fit_data(hook_fun, xh, yh, errors, parms)
                yf = hook_fun(xh, fitted['a'], fitted['c'], fitted['o'], fitted['m'], fitted['b'])
                max_devh = np.median(np.abs((yh-yf)/yh))

                mh[col] = fitted['m']
                bh[col] = fitted['b']
                ah[col] = fitted['a']
                oh[col] = fitted['o']
                ch[col] = fitted['c']
                devh[col] = max_devh
                dhm[col] = bound #(dhml) - lin_fun(bound, fitted['m'], fitted['b'])

            y = yra[:,col]
            x = np.arange(y.shape[0])

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]

            threshold = (np.mean(y[labels[0]])+np.mean(y[labels[2]]))/2
            thresh[col,0] = threshold
            thresh[col,1] = np.mean(y[labels[0]])
            thresh[col,2] = np.mean(y[labels[2]])
        except Exception as e:
            print(e)
            failures.append((col, str(e)))
    del yrd
    del yra
    return thresh, (ml, bl, devl), (mh, bh, ah, oh, ch, devh), failures, dhm

 start = datetime.now()
 fres = {}
 failures = []
 for i, r in enumerate(res):
    offset = offsets[i]
    noise = noises[i]
    ii = modules[i]
    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
    dig, ana, cellId = r


    # linear slope
    ml = np.zeros(dig.shape[1:])
    bl = np.zeros(dig.shape[1:])
    devl = np.zeros(dig.shape[1:])

    #hook function
    mh = np.zeros(dig.shape[1:])
    bh = np.zeros(dig.shape[1:])
    ch = np.zeros(dig.shape[1:])
    oh = np.zeros(dig.shape[1:])
    ah = np.zeros(dig.shape[1:])
    devh = np.zeros(dig.shape[1:])
    dhma = np.zeros(dig.shape[1:])

    # threshold
    thresh = np.zeros(list(dig.shape[1:]))
    thresh_bounds = np.zeros(list(dig.shape[1:])+[2,])

    for cell in range(dig.shape[1]):
        inp = []
        for j in range(dig.shape[2]):
            inp.append((dig[:,cell,j,:], ana[:,cell,j,:], offset[j,:,cell,:], noise[j,:,cell,:]))

        p = partial(calibrate_single_row, cells, fit_hook)
        #print("Running {} tasks in parallel".format(len(inp)))
        frs = view.map_sync(p, inp)
        #frs = list(map(p, inp))

        for j, fr in enumerate(frs):
            threshr, lin, hook, fails, dhm = fr
            mlr, blr, devlr = lin
            mhr, bhr, ahr, ohr, chro, devhr = hook
            failures.append(fails)

            ml[cell,j,:] = mlr
            bl[cell,j,:] = blr
            devl[cell,j,:] = devlr

            mh[cell,j,:] = mhr
            bh[cell,j,:] = bhr
            oh[cell,j,:] = ohr
            ch[cell,j,:] = chro
            ah[cell,j,:] = ahr
            devh[cell,j,:] = devhr
            dhma[cell,j,:] = dhm

            thresh[cell,j,...] = threshr[...,0]
            thresh_bounds[cell,j,...] = threshr[...,1:]

    fres[qm] = {'ml': ml,
                'bl': bl,
                'devl': devl,
                'tresh': thresh,
                'tresh_bounds': thresh_bounds,
                'dhm': dhma}
    if fit_hook:
            fres[qm].update({
                'mh': mh,
                'bh': bh,
                'oh': oh,
                'ch': ch,
                'ah': ah,
                'devh': devh,
               })

 duration = (datetime.now()-start).total_seconds()
 logger.runtime_summary_entry(success=True, runtime=duration)
 logger.send()
 ```

 %% Cell type:markdown id: tags:

 Results of slope fitting from PC runs values are
 distinguished on axis 0 by index:

 0: linear slope - m value
 1: linear slope - b value
 2: linear slope - deviation
 3: hook function - m value
 4: hook function - b value
 5: hook function - o value
 6: hook function - c value
 7: hook function - a value
 8: hook function - deviation

 %% Cell type:code id: tags:

 ``` python
 def slope_dict_to_arr(d):
    key_to_index = {
        "ml": 0,
        "bl": 1,
        "devl": 2,
        "mh": 3,
        "bh": 4,
        "oh": 5,
        "ch": 6,
        "ah": 7,
        "devh": 8,
        "tresh": 9,

    }
    arr = np.zeros([11]+list(d["ml"].shape), np.float32)
    for key, item in d.items():
        if key not in key_to_index:
            continue
        arr[key_to_index[key],...] = item

    return arr
 ```

 %% Cell type:code id: tags:

 ``` python
 from collections import OrderedDict
 bad_pixels = OrderedDict()
 for qm, data in fres.items():
    mask = np.zeros(data['ml'].shape, np.uint32)
    mask[(data['tresh'][...,0] < 50) | (data['tresh'][...,0] > 8500)] |= BadPixels.CI_GAIN_OF_OF_THRESHOLD.value
    mask[(data['devl'] == 0)] |= BadPixels.CI_LINEAR_DEVIATION.value
    mask[(np.abs(data['devl']) > 0.5)] |= BadPixels.CI_LINEAR_DEVIATION.value
    mask[(~np.isfinite(data['devl']))] |= BadPixels.CI_EVAL_ERROR.value
    bad_pixels[qm] = mask
 ```

 %% Cell type:code id: tags:

 ``` python
 if local_output:
    ofile = "{}/agipd_pc_store_{}_{}_{}.h5".format(out_folder, "_".join([str(run) for run in runs]), modules[0], modules[-1])
    store_file = h5py.File(ofile, "w")
    for qm, r in fres.items():
        for key, item in r.items():
            store_file["/{}/{}/0/data".format(qm, key)] = item
        #arr = slope_dict_to_arr(r)
        #store_file["/{}/SlopesPC/0/data".format(qm)] = arr
        store_file["/{}/{}/0/data".format(qm, "BadPixelsPC")] = bad_pixels[qm]
    store_file.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = proposal + ' ' + ' '.join(list(map(str,runs)))
 ```

 %% Cell type:code id: tags:

 ``` python
+md = None
+
 if db_output:
+
+    det = getattr(Detectors, dinstance)
+
    for qm, r in fres.items():
-        metadata = ConstantMetaData()
-        slopespc = Constants.AGIPD.SlopesPC()
-        slopespc.data = slope_dict_to_arr(r)
-        metadata.calibration_constant = slopespc
-
-        # set the operating condition
-        condition = Conditions.Dark.AGIPD(memory_cells=maxcells, bias_voltage=bias_voltage,
-                                          acquisition_rate=acq_rate, gain_setting=gain_setting)
-        metadata.detector_condition = condition

-        # specify the a version for this constant
+        for const in ["SlopesPC", "BadPixelsPC"]:

-        if creation_time is None:
-            metadata.calibration_constant_version = Versions.Now(device=getattr(Detectors.AGIPD1M1, qm))
-        else:
-            metadata.calibration_constant_version = Versions.Timespan(device=getattr(Detectors.AGIPD1M1, qm),
-                                                                      start=creation_time)
+            dbconst = getattr(Constants.AGIPD, const)()

-        metadata.calibration_constant_version.raw_data_location = file_loc
-        metadata.send(cal_db_interface)
-        print(f'Constant SlopesPC for module {qm} was injected to the calibration DB. '
-              f'Begin at: {metadata.calibration_constant_version.begin_at}')
-
-        # bad pixels
-
-        metadata = ConstantMetaData()
-        badpixpc = Constants.AGIPD.BadPixelsPC()
-        badpixpc.data = bad_pixels[qm]
-        metadata.calibration_constant = badpixpc
-
-        # set the operating condition
-        condition = Conditions.Dark.AGIPD(memory_cells=maxcells, bias_voltage=bias_voltage,
-                                          acquisition_rate=acq_rate, gain_setting=gain_setting)
-        metadata.detector_condition = condition
-
-        # specify the a version for this constant
-        if creation_time is None:
-            metadata.calibration_constant_version = Versions.Now(device=getattr(Detectors.AGIPD1M1, qm))
-        else:
-            metadata.calibration_constant_version = Versions.Timespan(device=getattr(Detectors.AGIPD1M1, qm),
-                                                                      start=creation_time)
-        metadata.calibration_constant_version.raw_data_location = file_loc
-        metadata.send(cal_db_interface)
-        print(f'Constant BadPixelsPC for module {qm} was injected to the calibration DB. '
-              f'Begin at: {metadata.calibration_constant_version.begin_at}')
+            # set the operating condition
+            condition = Conditions.Dark.AGIPD(memory_cells=maxcells, bias_voltage=bias_voltage,
+                                      acquisition_rate=acq_rate, gain_setting=gain_setting)
+
+            if const == "SlopesPC":
+                dbconst.data = slope_dict_to_arr(r)
+            else:
+                dbconst.data = bad_pixels[qm]
+
+            md = send_to_db(getattr(det, qm), dbconst, condition, file_loc,
+                            cal_db_interface, creation_time=creation_time)
+
+print("Constants parameter conditions are:\n")
+print(f"• memory_cells: {maxcells}\n• bias_voltage: {bias_voltage}\n"
+      f"• acquisition_rate: {acq_rate}\n• gain_setting: {gain_setting}\n"
+      f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:markdown id: tags:

 ## Overview Plots ##

 Each of the following plots represents one of the fit parameters of memory cell 4 on a module:

 For the linear function of the high gain region

   $$y = mx + b$$

 * ml denotes the $m$ parameter
 * bl denotes the $b$ parameter
 * devl denotes the anbsolute relative deviation from linearity.

 For the composite function of the medium gain and transition region

  $$y = A*e^{-(x-O)/C}+mx+b$$

 * oh denotes the $O$ parameter
 * ch denotes the $C$ parameter
 * mh denotes the $m$ parameter
 * bh denotes the $b$ parameter
 * devh denotes the anbsolute relative deviation from the linear part of the function.

 Additionally, the thresholds and bad pixels (mask) are shown.

 Finally, the red and white rectangles indicate the first and second pixel ranges

 %% Cell type:code id: tags:

 ``` python
 import matplotlib.pyplot as plt
 from mpl_toolkits.axes_grid1 import AxesGrid
 import matplotlib.patches as patches

 cell_to_preview = min(59, mem_cells-1)
 for module, data in fres.items():
    fig = plt.figure(figsize=(20,20))
    grid = AxesGrid(fig, 111,
                    nrows_ncols=(7 if fit_hook else 3, 2),
                    axes_pad=(0.9, 0.15),
                    label_mode="1",
                    share_all=True,
                    cbar_location="right",
                    cbar_mode="each",
                    cbar_size="7%",
                    cbar_pad="2%",
                    )


    mask = bad_pixels[module]

    i = 0
    for key, citem in data.items():
        item = citem.copy()
        item[~np.isfinite(item)] = 0
        med = np.nanmedian(item)
        bound = 0.1
        maxcnt = 10
        if med < 0:
            bound = -bound

        while(np.count_nonzero((item < med-bound*med) | (item > med+bound*med))/item.size > 0.01):
            bound *=2
            maxcnt -= 1
            if maxcnt < 0:
                break


        if "bounds" in key:
            d = item[cell_to_preview,...,0]
            im = grid[i].imshow(d, interpolation="nearest",
                               vmin=med-bound*med, vmax=med+bound*med)
        else:
            d = item[cell_to_preview,...]
            im = grid[i].imshow(d, interpolation="nearest",
                               vmin=med-bound*med, vmax=med+bound*med)
        cb = grid.cbar_axes[i].colorbar(im)

        # axes coordinates are 0,0 is bottom left and 1,1 is upper right
        x0, x1 = tpix_range1[0][0], tpix_range1[0][1]
        y0, y1 = tpix_range1[1][0], tpix_range1[1][1]
        p = patches.Rectangle(
            (x0, y0), x1-x0, y1-y0, fill=False, color="red")

        grid[i].add_patch(p)

        x0, x1 = tpix_range2[0][0], tpix_range2[0][1]
        y0, y1 = tpix_range2[1][0], tpix_range2[1][1]
        p = patches.Rectangle(
            (x0, y0), x1-x0, y1-y0, fill=False, color="white")

        grid[i].add_patch(p)

        grid[i].text(20, 50, key, color="w", fontsize=50)

        i += 1

    im = grid[-1].imshow(mask[cell_to_preview,...], interpolation="nearest",
                           vmin=0, vmax=1)
    cb = grid.cbar_axes[-1].colorbar(im)

    grid[-1].text(20, 50, "mask", color="w", fontsize=50)
    fig.savefig("{}/module_{}_PC.png".format(out_folder, module))
 ```

 %% Cell type:markdown id: tags:

 ### Memory Cell dependent behavior of thresholding ###

 %% Cell type:code id: tags:

 ``` python
 toplot = {"tresh": "Gain theshold (ADU)",
          "ml": "Slope (linear)",
          "bl": "Offset (linear) (ADU)"}
 from matplotlib.colors import LogNorm, PowerNorm
 for module, data in fres.items():

    bins = 100

    for typ, label in toplot.items():
        r_hist = np.zeros((mem_cells, bins))
        mask = bad_pixels[module]
        thresh = data[typ]
        hrange = [0.5*np.nanmedian(thresh), 1.5*np.nanmedian(thresh)]
        if hrange[1] < hrange[0]:
            hrange = hrange[::-1]
        for c in range(mem_cells):
            d = thresh[c,...]
            h, e = np.histogram(d.flatten(), bins=bins, range=hrange)
            r_hist[c, :] = h
        fig = plt.figure(figsize=(5,5))
        ax = fig.add_subplot(111)
        im = ax.imshow(r_hist[:,:].T[::-1,:], interpolation="nearest",
                  aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(r_hist)),
                 extent=[0, mem_cells, hrange[0], hrange[1]])
        ax.set_xlabel("Memory cell")
        ax.set_ylabel(label)
        cb = fig.colorbar(im)
        cb.set_label("Counts")
    #fig.savefig("/gpfs/exfel/data/scratch/haufs/test/agipd_gain_threholds.pdf", bbox_inches="tight")
 ```

 %% Cell type:markdown id: tags:

 ## Global Bad Pixel Behaviour ##

 The following plots show the results of bad pixel evaluation for all evaluated memory cells. Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2. This excludes single bad pixels present only in disconnected pixels. Hence, any bad pixels spanning at least 2 pixels in the x/y-plane, or across at least two memory cells are indicated. Colors encode the bad pixel type, or mixed type.

 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.CI_GAIN_OF_OF_THRESHOLD.value: (BadPixels.CI_GAIN_OF_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.CI_EVAL_ERROR.value: (BadPixels.CI_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.CI_GAIN_OF_OF_THRESHOLD.value | BadPixels.OFFSET_OUT_OF_THRESHOLD.value: ('MIXED', '#DD00DD80')}

 rebin = 2 if not high_res_badpix_3d else 1

 gain = 0
 for mod, data in bad_pixels.items():
    plot_badpix_3d(np.moveaxis(data, 0, 2), cols, title=mod, rebin_fac=rebin, azim=60.)
 ```

 %% Cell type:code id: tags:

 ``` python
 one_photon = 55 # ADU
 test_pixels = []
 tpix_range1 = [(0,8), (0,8)]
 for i in range(*tpix_range1[0]):
    for j in range(*tpix_range1[1]):
        test_pixels.append((j,i))
 test_cells = [4, 38, 64, 128, 200, 249]
 tcell = np.array(test_cells)
 tcell = tcell[tcell < mem_cells]
 if tcell.size == 0:
    test_cells = [mem_cells-1]
 else:
    test_cells = tcell.tolist()
 from mpl_toolkits.axes_grid1 import ImageGrid
 for mod, r in enumerate(res):
    dig, ana, cellId = r
    d = []
    d2 = []
    d3 = []

    H = [0, 0, 0, 0]
    H2 = [0, 0, 0, 0]
    Ha = [0, 0, 0, 0]
    ii = modules[mod]
    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
    cdata = fres[qm]
    ex, ey, ea = None, None, None
    medml = np.nanmean(cdata['ml'])
    medmh = np.nanmean(cdata['mh'][cdata['mh']> 0.5])
    offset = offsets[mod]
    threshold = thresholds[mod]

    medth = np.nanmean(threshold[...,0])
    for pix in test_pixels:
        for cell in test_cells:
            color = np.random.rand(3,1)

            x = np.arange(dig.shape[0])
            y = dig[:,cell, pix[0], pix[1]]
            a = ana[:,cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]
            a = a[vidx]

            ms, labels, centers = calc_m_cluster2(x, y)
            bound = None
            bound_m = None
            markers = ['o','.','x','v']
            colors = ['b', 'r', 'g', 'k']
            ymin = y.min()

            amin = a[labels[2]].min()
            for i, lbl in enumerate(labels):

                if np.any(lbl):
                    if i == 0:
                        cm = (cdata['ml'][cell, pix[0], pix[1]]/medml)

                        o = offset[pix[0], pix[1], cell, 0]
                        ym = (y[lbl]-o)/cm

                    elif i >= 1:
                        mh = cdata['mh'][cell, pix[0], pix[1]]
                        ml = cdata['ml'][cell, pix[0], pix[1]]
                        cml = ml/medml
                        cmh = mh/medmh
                        cm = medml/medmh
                        oh = cdata['bh'][cell, pix[0], pix[1]]
                        o = offset[pix[0], pix[1], cell, 1] + oh

                        ym = (y[lbl]-o)/cmh*cm

                        if i == 1:
                            ah = cdata['ah'][cell, pix[0], pix[1]]
                            ch = cdata['ch'][cell, pix[0], pix[1]]
                            ohh = cdata['oh'][cell, pix[0], pix[1]]
                            tx = ch * np.log(ah/(y[lbl]-o))+ohh

                            chook  = (ah*np.exp(-(tx-ohh)/ch) - mh*tx)/cmh*cm

                            ym -= chook

                    h, ex, ey = np.histogram2d(x[lbl], ym/one_photon, range=((0, 600), (0, 15000/one_photon)), bins=(300, 600))
                    H[i] += h

            labels = [a < threshold[pix[0], pix[1], cell,0], a >= threshold[pix[0], pix[1], cell,0]]
            for i, lbl in enumerate(labels):

                if np.any(lbl):
                    if i == 0:
                        cm = (cdata['ml'][cell, pix[0], pix[1]]/medml)

                        o = offset[pix[0], pix[1], cell, 0]
                        ym = (y[lbl]-o)/cm

                    elif i >= 1:
                        mh = cdata['mh'][cell, pix[0], pix[1]]
                        ml = cdata['ml'][cell, pix[0], pix[1]]
                        cml = ml/medml
                        cmh = mh/medmh
                        cm = medml/medmh
                        oh = cdata['bh'][cell, pix[0], pix[1]]
                        o = offset[pix[0], pix[1], cell, 1] + oh

                        ym = (y[lbl]-o)/cmh*cm

                        if i == 1:
                            ah = cdata['ah'][cell, pix[0], pix[1]]
                            ch = cdata['ch'][cell, pix[0], pix[1]]
                            ohh = cdata['oh'][cell, pix[0], pix[1]]
                            tx = ch * np.log(ah/(y[lbl]-o))+ohh

                            chook  = (ah*np.exp(-(tx-ohh)/ch) - mh*tx)/cmh*cm
                            idx = (a[lbl]-amin) < 0
                            ym[idx] -= chook[idx]

                            #ym = a[lbl]-amin

                    h, ex, ey = np.histogram2d(x[lbl], ym/one_photon, range=((0, 600), (0, 15000/one_photon)), bins=(300, 600))
                    H2[i] += h

            labels = [a < threshold[pix[0], pix[1], cell,0], a >= threshold[pix[0], pix[1], cell,0]]
            for i, lbl in enumerate(labels):

                if np.any(lbl):

                    #if i == 0:
                    #    amin = a[lbl].min()
                    #else:
                    #    amin = a[labels[0]].min() #a[labels[1]].min()# /(threshold[pix[0], pix[1], cell,0]/medth)
                    am = a[lbl] - amin
                    h, ex, ea = np.histogram2d(x[lbl], am, range=((0, 600), (-100, 5000)), bins=(300, 400))
                    Ha[i] += h



    fig = plt.figure(figsize=(10,15))
    ax = fig.add_subplot(311)
    for i in range(3):
        H[i][H[i]==0] = np.nan
    ax.imshow(H[0].T, origin="bottom", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    ax.imshow(H[1].T, origin="bottom", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='spring', alpha=0.7, vmin=0, vmax=100)
    ax.imshow(H[2].T, origin="bottom", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD response (ADU)")
    ax.set_xlabel("PC scan point (#)")

    x = np.arange(0, 600)
    ideal = medml*x/one_photon
    ax.plot(x, ideal, color='red')
    ax.plot(x, ideal + np.sqrt(ideal), color='red')
    ax.plot(x, ideal - np.sqrt(ideal), color='red')


    ax = fig.add_subplot(312)
    for i in range(2):
        H2[i][H2[i]==0] = np.nan
    ax.imshow(H2[0].T, origin="bottom", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    ax.imshow(H2[1].T, origin="bottom", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD response (ADU)")
    ax.set_xlabel("PC scan point (#)")

    x = np.arange(0, 600)
    ideal = medml*x/one_photon
    ax.plot(x, ideal, color='red')
    ax.plot(x, ideal + np.sqrt(ideal), color='red')
    ax.plot(x, ideal - np.sqrt(ideal), color='red')


    ax = fig.add_subplot(313)
    for i in range(2):
        Ha[i][Ha[i]==0] = np.nan
    ax.imshow(Ha[0].T, origin="bottom", extent=[ex[0], ex[-1], ea[0], ea[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    #ax.imshow(Ha[1].T, origin="bottom", extent=[ex[0], ex[-1], ea[0], ea[-1]],
    #          aspect='auto', cmap='spring', alpha=0.7, vmin=0, vmax=100)
    ax.imshow(Ha[1].T, origin="bottom", extent=[ex[0], ex[-1], ea[0], ea[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD gain (ADU)")
    ax.set_xlabel("PC scan point (#)")

 ```

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:markdown id: tags:

 # Characterize AGIPD Pulse Capacitor Data #

 Author: S. Hauf, Version 1.0

 The following code characterizes AGIPD gain via data take with the pulse capacitor source (PCS). The PCS allows scanning through the high and medium gains of AGIPD, by subsequently intecreasing the number of charge pulses from a on-ASIC capicitor, thus increasing the charge a pixel sees in a given integration time.

 Because induced charge does not originate from X-rays on the sensor, the gains evaluated here will later need to be rescaled with gains deduced from X-ray data.

 PCS data is organized into multiple runs, as the on-ASIC current source cannot supply all pixels of a given module with charge at the same time. Hence, only certain pixel rows will have seen charge for a given image. These rows then first need to be combined into single module images again.

 We then use a K-means clustering algorithm to identify components in the resulting per-pixel data series, matching to three general regions:

 * a high gain slope
 * a transition region, where gain switching occurs
 * a medium gain slope.

 The same regions are present in the gain-bit data and are used to deduce the switching threshold.

 The resulting slopes are then fitted with a linear function and a combination of a linear and exponential decay function to determine the relative gains of the pixels with respect to the module. Additionally, we deduce masks for bad pixels form the data.

 %% Cell type:code id: tags:

 ``` python
 in_folder = '/gpfs/exfel/exp/SPB/202030/p900138/raw/' # path to input data, required
-modules = [14,] # modules to work on, required, range allowed
+modules = [-1] # modules to work on, required, range allowed
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/pc" # path to output to, required
 runs = [466, 467, 468, 469, 470, 471, 472, 473] # runs to use, required, range allowed
 n_sequences = 3 # number of sequence files, starting for 0 to evaluate
 cluster_profile = "noDB" # The ipcluster profile to use#
 local_output = True # output constants locally
 db_output = False # output constants to database
 bias_voltage = 300 # detector bias voltage
 cal_db_interface = "tcp://max-exfl016:8019"  # the database interface to use
 mem_cells = 0.  # number of memory cells used, use 0 to auto-derive
 interlaced = False # assume interlaced data format, for data prior to Dec. 2017
 fit_hook = True # fit a hook function to medium gain slope
 rawversion = 2 # RAW file format version
 instrument = "SPB"
 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 use_dir_creation_date = True
 creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC.ms e.g. 2019-07-04 11:02:41.00
 gain_setting = 0.1 # gain setting can have value 0 or 1, Default=0.1 for no (None) gain-setting
 karabo_da_control = "AGIPD1MCTRL00" # karabo DA for control infromation
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information
 ```

 %% Cell type:code id: tags:

 ``` python
 # imports, usually no need to change anything here
+
 from datetime import datetime
-import dateutil.parser
-import h5py
+from functools import partial
 import os
+import warnings
+warnings.filterwarnings('ignore')

+import dateutil.parser
+import h5py
+from ipyparallel import Client
 import numpy as np
 import matplotlib
 matplotlib.use("Qt4Agg")
 import matplotlib.pyplot as plt
 %matplotlib inline
-# make sure a cluster is running with ipcluster start --n=32, give it a while to start
-from ipyparallel import Client
-
-view = Client(profile=cluster_profile)[:]
-view.use_dill()

-from functools import partial
+from cal_tools.agipdlib import get_acq_rate, get_gain_setting, get_num_cells
+from cal_tools.enums import BadPixels
+from cal_tools.influx import InfluxLogger
+from cal_tools.plotting import plot_badpix_3d, show_overview
+from cal_tools.tools import (gain_map_files, get_dir_creation_date, get_notebook_name,
+                             parse_runs, run_prop_seq_from_path, get_dir_creation_date, send_to_db)
+from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions
 import XFELDetAna.xfelpyanatools as xana
-import warnings
-warnings.filterwarnings('ignore')

-from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions
-from cal_tools.tools import gain_map_files, parse_runs, run_prop_seq_from_path, get_notebook_name, get_dir_creation_date
-from cal_tools.influx import InfluxLogger
-from cal_tools.enums import BadPixels
-from cal_tools.plotting import show_overview, plot_badpix_3d
-from cal_tools.agipdlib import get_acq_rate, get_num_cells, get_gain_setting
+# make sure a cluster is running with ipcluster start --n=32, give it a while to start
+view = Client(profile=cluster_profile)[:]
+view.use_dill()

 IL_MODE = interlaced
 maxcells = mem_cells if not interlaced else mem_cells*2
 cells = mem_cells
 path_temp = in_folder+"/r{:04d}/"
 image_name_temp = 'RAW-R{:04d}-AGIPD{:02d}-S{:05d}.h5'
 seqs = n_sequences
 print("Parameters are:")
 print("Memory cells: {}/{}".format(cells, maxcells))
 print("Runs: {}".format(runs))
 print("Modules: {}".format(modules))
 print("Sequences: {}".format(seqs))
 print("Interlaced mode: {}".format(IL_MODE))

 run, prop, seq = run_prop_seq_from_path(in_folder)
 logger = InfluxLogger(detector="AGIPD", instrument=instrument, mem_cells=mem_cells,
                      notebook=get_notebook_name(), proposal=prop)

 loc = None
 if instrument == "SPB":
    loc = "SPB_DET_AGIPD1M-1"
    dinstance = "AGIPD1M1"
    karabo_id_control = "SPB_IRU_AGIPD1M1"
 else:
    loc = "MID_DET_AGIPD1M-1"
    dinstance = "AGIPD1M2"
    karabo_id_control = "MID_EXP_AGIPD1M1"
 print("Detector in use is {}".format(loc))
 ```

 %% Cell type:markdown id: tags:

 ## Read in data and merge ##

 The number of bursts in each sequence file is determined from the sequence files of the first module.

 %% Cell type:code id: tags:

 ``` python
 run = runs[0]
 bursts_per_file = []
 channel = 0

 for seq in range(seqs):
    fname = os.path.join(path_temp.format(run),
                         image_name_temp.format(run, channel, seq))
    print('Reading ',fname)

    if acq_rate == 0.:
-        acq_rate = get_acq_rate(fname, loc, channel)
+        acq_rate = get_acq_rate((fname, loc, channel))
        print("Acquisition rate set from file: {} MHz".format(acq_rate))

    if mem_cells == 0:
        cells = get_num_cells(fname, loc, channel)
        maxcells = cells
        mem_cells = cells  # avoid setting twice
        print("Memory cells set from file: {}".format(cells))

    f = h5py.File(fname, 'r', driver='core')
    if rawversion == 2:
        count = np.squeeze(f["/INDEX/{}/DET/{}CH0:xtdf/image/count".format(loc, channel)])
        bursts_per_file.append(np.count_nonzero(count))
    else:
        status = np.squeeze(f["/INDEX/{}/DET/{}CH0:xtdf/image/status".format(loc, channel)])
        bursts_per_file.append(np.count_nonzero(status != 0))
    f.close()
 bursts_per_file = np.array(bursts_per_file)
 print("Bursts per sequence file are: {}".format(bursts_per_file))


 # Define constant creation time.
 if creation_time.strip() is not "":
    try:
        creation_time = datetime.strptime(creation_time, '%Y-%m-%d %H:%M:%S.%f')
    except Exception as e:
        print(f"creation_time value error: {e}."
               "Use same format as YYYY-MM-DD HR:MN:SC.ms e.g. 2019-07-04 11:02:41.00/n")
        creation_time = None
        print("Given creation time wont be used.")
 else:
    creation_time = None

 if not creation_time and use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)

 if creation_time:
    print("Using {} as creation time".format(creation_time.isoformat()))
 ```

 %% Cell type:code id: tags:

 ``` python
 control_fname = f'{in_folder}/r{runs[0]:04d}/RAW-R{runs[0]:04d}-{karabo_da_control}-S00000.h5'

 if "{" in h5path_ctrl:
    h5path_ctrl = h5path_ctrl.format(karabo_id_control)

 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < dateutil.parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
        except Exception as e:
            print(f'Error while reading gain setting from: \n{control_fname}')
            print(e)
            print("Gain setting is not found in the control information")
            print("Data will not be processed")
            sequences = []
 print(f"Gain setting: {gain_setting}")
 ```

 %% Cell type:code id: tags:

 ``` python
 def read_and_merge_module_data(cells, path_temp, image_name_temp,
                               runs, seqs, il_mode, rawversion, instrument, channel):
    import h5py
    import numpy as np
    import os


    def cal_bursts_per_file(run, dseq=0):

        bursts_per_file = []
        channel = 0

        for seq in range(dseq, seqs+dseq):
            #print(run, channel, seq)
            fname = os.path.join(path_temp.format(run),
                                 image_name_temp.format(run, channel, seq))
            #print('Reading ',fname)
            with h5py.File(fname, 'r') as f:
                if rawversion == 2:
                    count = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)][()])
                    bursts_per_file.append(np.count_nonzero(count))
                    del count
                else:
                    status = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)][()])
                    bursts_per_file.append(np.count_nonzero(status != 0))
                    del status
        if bursts_per_file[0] == 0:
            return cal_bursts_per_file(run, dseq=dseq+1)  # late start of daq
        return np.array(bursts_per_file), dseq

    #bursts_per_file = np.hstack([0, bursts_per_file])

    bursts_total = np.max([np.sum(cal_bursts_per_file(run)[0]) for run in runs])

    cfac = 2 if il_mode else 1

    def read_raw_data_file(fname, channel, cells = cells, cells_tot = cells, bursts = 250,
                           skip_first_burst = True, first_burst_length = cells):
        data = None
        cellID_all = None
        with h5py.File(fname, 'r') as f:

            #print('Reading ',fname)
            image_path_temp = 'INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data'.format(instrument, channel)
            cellID_path_temp = 'INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId'.format(instrument, channel)
            if rawversion == 2:
                count = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)])
                first = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first".format(instrument, channel)])
                last_index = int(first[count != 0][-1]+count[count != 0][-1])
                first_index = int(first[count != 0][0])
            else:
                status = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)])
                if np.count_nonzero(status != 0) == 0:
                    return
                last = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last".format(instrument, channel)])
                last_index = int(last[status != 0][-1])
                first_index = int(last[status != 0][0])
            #print(first_index, last_index)
            data = f[image_path_temp][first_index:last_index,...][()]

            cellID_all = np.squeeze(f[cellID_path_temp][first_index:last_index,...][()])
            data = data[cellID_all<cells, ...]

        #bursts = int(data.shape[0]/adcells)
        #print('Bursts: ', bursts)
        analog = np.zeros((bursts - skip_first_burst, cells//cfac, 128, 512))
        digital = np.zeros((bursts - skip_first_burst, cells//cfac, 128, 512))
        cellID = np.zeros(( (bursts - skip_first_burst) * cells))
        offset = skip_first_burst * first_burst_length

        for b in range(min(bursts, data.shape[0]//cells-1)  - skip_first_burst-1):
            try:

                analog[b, : cells//cfac, ...] = np.swapaxes(data[b * cells_tot + offset : b * cells_tot  + cells + offset : cfac,
                                                         0, ...], -1, -2)
                digital[b, : cells//cfac, ...] = np.swapaxes(data[b * cells_tot + cfac - 1 + skip_first_burst * first_burst_length :
                                                          b * cells_tot  + cells + cfac - 1 + offset :cfac, cfac%2, ...], -1, -2)

                cellID[ b * cells : (b  + 1) * cells] = cellID_all[b * cells_tot + offset : b * cells_tot + cells + offset].flatten()
            except:
                #print(b * cells_tot + offset, b * cells_tot  + cells + offset)
                #print(b, offset, cells, data.shape[0]//cells)
                raise AttributeError("Foo")
        return {'analog': analog, 'digital': digital, 'cellID': cellID}


    pc_data = {'analog': np.zeros((bursts_total, cells//cfac, 128, 512)),
               'digital': np.zeros((bursts_total, cells//cfac, 128, 512)),
               'cellID': np.zeros(((bursts_total) * cells))
              }
    pc_data_merged = {'analog': np.zeros((bursts_total, cells//cfac, 128, 512)),
               'digital': np.zeros((bursts_total, cells//cfac, 128, 512)),
               'cellID': np.zeros(((bursts_total) * cells))
              }

    for run_idx, run in enumerate(runs):
        bursts_per_file, dseq = cal_bursts_per_file(run)
        print("Run {}: bursts per file: {} -> {} total".format(run, bursts_per_file, np.sum(bursts_per_file)))
        #Read files in
        last_burst = 0
        for seq in range(dseq, seqs+dseq):
            fname = os.path.join(path_temp.format(run),
                                 image_name_temp.format(run, channel, seq))
            if seq-dseq == 0:
                skip_first_burst = True
            else:
                skip_first_burst = False
            bursts = bursts_per_file[seq-dseq]

            try:
                aa = read_raw_data_file(fname, channel, bursts = bursts,
                                        skip_first_burst = skip_first_burst,
                                        first_burst_length = cells)
                pc_data['analog'][last_burst : last_burst+bursts_per_file[seq-dseq]-skip_first_burst, ...] = aa['analog']
                pc_data['digital'][last_burst : last_burst+bursts_per_file[seq-dseq]-skip_first_burst, ...] = aa['digital']
                pc_data['cellID'][last_burst * cells : (last_burst+bursts_per_file[seq-dseq]-skip_first_burst) * cells, ...] = aa['cellID']

            except Exception as e:
                print(e)
                pc_data['analog'][last_burst : last_burst+bursts_per_file[seq-dseq]-skip_first_burst, ...] = 0
                pc_data['digital'][last_burst : last_burst+bursts_per_file[seq-dseq]-skip_first_burst, ...] = 0
                pc_data['cellID'][last_burst * cells : (last_burst+bursts_per_file[seq-dseq]-skip_first_burst) * cells, ...] = 0
            finally:
                last_burst += bursts_per_file[seq-dseq]-skip_first_burst
        # Copy injected rows
        for row_i in range(8):
            try:
                pc_data_merged['analog'][:,:,row_i * 8 + (7 - run_idx),:] = pc_data['analog'][:bursts_total,:cells//cfac,row_i * 8 + (7 - run_idx),:]
                pc_data_merged['analog'][:,:,64 + row_i * 8 + run_idx ,:] = pc_data['analog'][:bursts_total,:cells//cfac, 64 + row_i * 8 + run_idx,:]
                pc_data_merged['digital'][:,:,row_i * 8 + (7 - run_idx),:] = pc_data['digital'][:bursts_total,:cells//cfac,row_i * 8 + (7 - run_idx),:]
                pc_data_merged['digital'][:,:,64 + row_i * 8 + run_idx ,:] = pc_data['digital'][:bursts_total,:cells//cfac, 64 + row_i * 8 + run_idx,:]
            except Exception as e:
                print(e)
        #Check cellIDs
        #Copy cellIDs of first run
        if run_idx == 0:
            pc_data_merged['cellID'][...] = pc_data['cellID'][...]
        #Check cellIDs of all the other runs
        #else:
        #    print('cellID difference:{}'.format(np.sum(pc_data_merged['cellID']-pc_data['cellID'])))
    return pc_data_merged['analog'], pc_data_merged['digital'], pc_data_merged['cellID']

 start = datetime.now()
 p = partial(read_and_merge_module_data, maxcells, path_temp, image_name_temp,
            runs, seqs, IL_MODE, rawversion, instrument)
 # chunk this a bit, so that we don't overuse available memory
 res = list(map(p, modules))
 duration = (datetime.now()-start).total_seconds()
 logger.runtime_summary_entry(success=True, runtime=duration)
 logger.send()
 ```

 %% Cell type:markdown id: tags:

 ## Slope clustering and Fitting ##

 The following two cells contain the actual algorithm logic as well as a preview of a single pixel and memory cells visualizing the data and the concepts.

 We start out with calculating an estimate of the slope in proximity of a given data value. This is done by calculating the slopes of a given value with 15 neighbours and averaging the result. Values are then clustered by these slopes into three regions via a K-means algorithm.

 * for the first region a linear function is fitted to the data, determining the gain slope and offset for the high gain mode.

   $$y = mx + b$$

 * for the second and third region a composite function of the form:

  $$y = A*e^{-(x-O)/C}+mx+b$$

  is fitted, covering both the transition region and the medium gain slope.

 %% Cell type:code id: tags:

 ``` python
 from sklearn.cluster import KMeans
 from iminuit import Minuit
 from iminuit.util import make_func_code, describe

 def calc_m_cluster(x, y):
    scan_range = 15
    ms = np.zeros((x.shape[0], scan_range))
    for i in range(scan_range):
        xdiffs = x - np.roll(x, i+1)
        ydiffs = y - np.roll(y, i+1)
        m = ydiffs/xdiffs
        ms[:,i] = m
    m = np.mean(ms, axis=1)

    k = KMeans(n_clusters=3, n_jobs=-2)
    k.fit(m.reshape(-1, 1))
    ms = []
    for lbl in np.unique(k.labels_):
        xl = x[k.labels_ == lbl]
        xd = np.reshape(xl, (len(xl), 1))
        xdiff = xd - xd.transpose()

        yl = y[k.labels_ == lbl]
        yd = np.reshape(yl, (len(yl), 1))
        ydiff = yd - yd.transpose()
        ms.append(np.mean(np.nanmean(ydiff/xdiff, axis=0)))
    return ms, k.labels_, k.cluster_centers_

 def rolling_window(a, window):
    shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
    strides = a.strides + (a.strides[-1],)
    return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)


 def calc_m_cluster2(x, y, r1=5, r2=0, r3=1.5):
    scan_range = 15
    ms = np.zeros((x.shape[0], scan_range))
    for i in range(scan_range):
        xdiffs = x - np.roll(x, i+1)
        ydiffs = y - np.roll(y, i+1)
        m = ydiffs/xdiffs
        ms[:,i] = m
    m = np.mean(ms, axis=1)
    mm = np.zeros_like(m)
    mm[...] = np.nan
    m[scan_range//2:-scan_range//2+1] = np.mean(rolling_window(m, scan_range),-1)
    reg1 = m > r1
    reg2 = m < r2
    reg3 = (m > r2) & (m < r3)
    reg4 = ~(reg1 | reg2 | reg3)
    labels = [reg1, reg2, reg3, reg4]
    regions = np.zeros_like(x, np.uint8)
    for r, lbl in enumerate(labels):
        regions[lbl] = r
    scan_range = 30
    mregions = np.round(np.mean(rolling_window(regions, scan_range),-1))
    regions[...] = np.nan
    regions[scan_range//2:-scan_range//2+1] = mregions


    labels = [regions == 0, regions == 1, regions == 2, regions == 3]

    idx = np.arange(x.size)
    maxlbl = x.size-1
    for i in range(0, len(labels)-1):
        nidx = labels[i+1]
        if np.any(nidx):
            maxlbl = np.max(idx[nidx])
            cidx = idx > maxlbl
            if np.any(cidx):
                labels[i][cidx] = False

    ms = []
    for lbl in labels:
        xl = x[lbl]
        xd = np.reshape(xl, (len(xl), 1))
        xdiff = xd - xd.transpose()

        yl = y[lbl]
        yd = np.reshape(yl, (len(yl), 1))
        ydiff = yd - yd.transpose()
        ms.append(np.mean(np.nanmean(ydiff/xdiff, axis=0)))

    return ms, labels, None

 def fit_data(fun, x, y, yerr, par_ests):
    par_ests["throw_nan"] = False
    par_ests["pedantic"] = False
    par_ests["print_level"] = 0

    f_sig = describe(fun)[1:]

    class _Chi2Functor:
        def __init__(self, f, x, y, err):
            self.f = f
            self.x = x[y != 0]
            self.y = y[y != 0]
            self.err = err[y != 0]
            f_sig = describe(f)
            # this is how you fake function
            # signature dynamically
            self.func_code = make_func_code(
                f_sig[1:])  # docking off independent variable
            self.func_defaults = None  # this keeps numpy.vectorize happy

        def __call__(self, *arg):
            # notice that it accept variable length
            # positional arguments
            # chi2 = sum((y-self.f(x,*arg))**2 for x,y in zip(self.x,self.y))
            return np.sum(((self.f(self.x, *arg) - self.y) ** 2) / self.err)

    wrapped = _Chi2Functor(fun, x, y, yerr)
    m = Minuit(wrapped, **par_ests)
    fmin = m.migrad()

    return m.values

 def lin_fun(x, m, b):
    return m*x+b

 def hook_fun(x, a, c, o, m, b):
    return a*np.exp(-(x-o)/c)+m*x+b
 ```

 %% Cell type:code id: tags:

 ``` python
 from cal_tools.tools import get_constant_from_db_and_time
 offsets = {}
 noises = {}
 thresholds = {}
 for mod, r in enumerate(res):
    ii = modules[mod]
    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
    det = getattr(Detectors, dinstance)
    offset, when = get_constant_from_db_and_time(getattr(det, qm),
                                                     Constants.AGIPD.Offset(),
                                                     Conditions.Dark.AGIPD(
                                                         memory_cells=mem_cells,
                                                         bias_voltage=bias_voltage, acquisition_rate=acq_rate,
                                                         gain_setting=gain_setting),
                                                         np.zeros((128, 512, mem_cells, 3)),
                                                     cal_db_interface, creation_time=creation_time)
    print("Offset for {} was injected on {}".format(qm, when))
    offsets[mod] = np.array(offset.data)

    noise, when = get_constant_from_db_and_time(getattr(det, qm),
                                                     Constants.AGIPD.Noise(),
                                                     Conditions.Dark.AGIPD(
                                                         memory_cells=mem_cells,
                                                         bias_voltage=bias_voltage, acquisition_rate=acq_rate,
                                                         gain_setting=gain_setting),
                                                         np.zeros((128, 512, mem_cells, 3)),
                                                     cal_db_interface, creation_time=creation_time)
    print("Noise for {} was injected on {}".format(qm, when))
    noises[mod] = np.array(noise.data)

    threshold, when = get_constant_from_db_and_time(getattr(det, qm),
                                                     Constants.AGIPD.ThresholdsDark(),
                                                     Conditions.Dark.AGIPD(
                                                         memory_cells=mem_cells,
                                                         bias_voltage=bias_voltage, acquisition_rate=acq_rate,
                                                         gain_setting=gain_setting),
                                                         np.zeros((128, 512, mem_cells, 3)),
                                                     cal_db_interface, creation_time=creation_time)
    print("Threshold for {} was injected on {}".format(qm, when))
    thresholds[mod] = np.array(threshold.data)
 ```

 %% Cell type:code id: tags:

 ``` python
 test_pixels = []
 tpix_range1 = [(0,16), (0,64)]
 for i in range(*tpix_range1[0]):
    for j in range(*tpix_range1[1]):
        test_pixels.append((j,i))
 test_cells = [4, 38, 64, 128]#, 200, 249]
 tcell = np.array(test_cells)
 tcell = tcell[tcell < mem_cells]
 if tcell.size == 0:
    test_cells = [mem_cells-1]
 else:
    test_cells = tcell.tolist()

 from mpl_toolkits.axes_grid1 import ImageGrid
 for mod, r in enumerate(res):
    dig, ana, cellId = r
    d = []
    d2 = []
    d3 = []

    H = [0, 0, 0, 0]

    ex, ey = None, None
    offset = offsets[mod]
    for pix in test_pixels:
        for cell in test_cells:
            color = np.random.rand(3,1)

            x = np.arange(dig.shape[0])
            y = dig[:,cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]
            if x.shape[0] == 0:
                continue

            ms, labels, centers = calc_m_cluster2(x, y)
            bound = None
            bound_m = None
            markers = ['o','.','x','v']
            colors = ['b', 'r', 'g', 'k']
            ymin = y.min()

            for i, lbl in enumerate(labels):
                if np.any(lbl):
                    #ym = y[lbl]-y[lbl].min()
                    if i == 0:
                        gain = 0
                    else:
                        gain = 1
                    ym = y[lbl] - offset[pix[0], pix[1], cell, gain]
                    #if i != 0:
                    #    ym += y[labels[0]].max()-y[labels[0]].min()
                    h, ex, ey = np.histogram2d(x[lbl], ym, range=((0, 600), (-500, 6000)), bins=(300, 650))
                    H[i] += h



    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111)
    for i in range(3):
        H[i][H[i]==0] = np.nan
    ax.imshow(H[0].T, origin="bottom", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    ax.imshow(H[1].T, origin="bottom", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='spring', alpha=0.7, vmin=0, vmax=100)
    ax.imshow(H[2].T, origin="bottom", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD response (ADU)")
    ax.set_xlabel("PC scan point (#)")
 ```

 %% Cell type:markdown id: tags:

 ### Examples from Pixel Subset ###

 The follwing is an visualization of the clustering and fitting for a subset of pixels. If data significantly mismatches expectations, the clustering and fitting algorithms should fail for this subset:

 * the first plot shows the clustering results for pixels which were sucessfully evaluated
 * the second plot shows the clustering results for pixels which failed to evaluate
 * the third plot shows the fits and fit residuals for the pixel clusters shown in the first plot

 Non-smooth behaviour is an indication that you are errorously processing interleaved data that is not, or vice versa, or have the wrong number of memory cells set.

 We do this twice for different detector regions

 %% Cell type:code id: tags:

 ``` python
 test_pixels = []
 tpix_range1 = [(250,254), (60,64)]
 for i in range(*tpix_range1[0]):
    for j in range(*tpix_range1[1]):
        test_pixels.append((j,i))
 test_cells = [4, 38]
 tcell = np.array(test_cells)
 tcell = tcell[tcell < mem_cells]
 if tcell.size == 0:
    test_cells = [mem_cells-1]
 else:
    test_cells = tcell.tolist()

 for mod, r in enumerate(res):
    dig, ana, cellId = r
    d = []
    d2 = []
    d3 = []
    offset = offsets[mod]
    noise = noises[mod]
    for pix in test_pixels:
        for cell in test_cells:
            color = np.random.rand(3,1)

            x = np.arange(dig.shape[0])
            y = dig[:,cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]

            ms, labels, centers = calc_m_cluster2(x, y)
            bound = None
            bound_m = None
            markers = ['o','.','x','v']
            colors = ['b', 'r', 'g', 'k']
            for i, lbl in enumerate(labels):
                if i == 0:
                    gain = 0
                else:
                    gain = 1
                d.append({'x': x[lbl],
                  'y': y[lbl] - offset[pix[0], pix[1], cell, gain],
                  'marker': markers[i],
                  'color': colors[i],
                  'linewidth': 0
                 })
                #if ms[i] < 0: # slope separating two regions
                #    bound = np.min(x[lbl])
                #    bound_m = ms[i]
            if labels[1].any():
                bound = np.min(x[labels[1]])
                bound_m = ms[1]
            if bound is None or bound < 20 and False:
                ya = ana[:,cell, pix[0], pix[1]][vidx]
                msa, labels, centers = calc_m_cluster2(x, ya, 25, -10, 25)
                if np.count_nonzero(labels[0]) > 0:

                    bound = np.min(x[labels[0]])
                    bound_m = ms[3]
                else:
                    avg_g = np.nanmean(ya)
                    bound = np.max(x[y < avg_g])
                    bound_m = ms[3]

            #print(bound)
            # fit linear slope
            if not np.isnan(bound_m):
                xl = x[(x<bound)]
                yl = y[(x<bound)] - offset[pix[0], pix[1], cell, 0]
                parms = {'m': bound_m, 'b': np.min(yl)}

                errors = np.ones(xl.shape)*noise[pix[0], pix[1], cell, 0]
                fitted = fit_data(lin_fun, xl, yl, errors , parms)
                yf = lin_fun(xl, fitted['m'], fitted['b'])
                max_devl = np.max(np.abs((yl-yf)/yl))

                d3.append({'x': xl,
                          'y': yf,
                          'color': 'k',
                          'linewidth': 1,
                           'y2': (yf-yl)/errors
                         })
            # fit hook slope
            if fit_hook:
                idx = (x >= bound) & (y > 0) & np.isfinite(x) & np.isfinite(y)
                xh = x[idx]
                yh = y[idx] - offset[pix[0], pix[1], cell, 1]
                if len(yh[yh > 0]) == 0:
                    break
                parms = {'m': bound_m/10 if bound_m/10>0.3 else 0.5, 'b': np.min(yh[yh > 0]), 'a': np.max(yh), 'c': 5, 'o': bound-1}
                parms["limit_m"] = [0.3, 1.0]
                parms["limit_c"] = [1., 1000]
                errors = np.ones(xh.shape)*noise[pix[0], pix[1], cell, 1]
                fitted = fit_data(hook_fun, xh, yh, errors, parms)
                yf = hook_fun(xh, fitted['a'], fitted['c'], fitted['o'], fitted['m'], fitted['b'])

                max_devh = np.max(np.abs((yh-yf)/yh))
                #print(fitted)
                d3.append({'x': xh,
                          'y': yf,
                          'color': 'red',
                          'linewidth': 1,
                          'y2': (yf-yh)/errors
                         })

            x = np.arange(ana.shape[0])
            y = ana[:,cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]

            #ms, labels, centers = calc_m_cluster2(x, y, 25, -10, 25)
            if len(y[labels[0]]) != 0 and len(y[labels[2]]) != 0:
                threshold = (np.mean(y[labels[0]])+np.mean(y[labels[2]]))/2

            for i, lbl in enumerate(labels):

                d2.append({'x': x[lbl],
                  'y': y[lbl],
                  'marker': markers[i],
                  'color': colors[i],
                  'lw': None

                 })

                d2.append({'x': np.array([x[0], x[-1]]),
                  'y': np.ones(2)*threshold,

                  'color': 'k',
                  'lw': 1

                 })

            #threshold = (np.min(y[x<bound]) + np.max(y[x>=bound]))/2


    fig = xana.simplePlot(d, y_label="PC pixel signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #")
    fig.savefig("{}/module_{}_pixel_plot.png".format(out_folder, modules[mod]))

    fig = xana.simplePlot(d2, y_label="PC gain signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #")
    fig.savefig("{}/module_{}_pixel_plot_gain.png".format(out_folder, modules[mod]))

    fig = xana.simplePlot(d3, secondpanel=True, y_label="PC signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #", y2_label="Residuals ($\sigma$)", y2_range=(-5,5))
    fig.savefig("{}/module_{}_pixel_plot_fits.png".format(out_folder, modules[mod]))
 ```

 %% Cell type:code id: tags:

 ``` python
 test_pixels = []
 tpix_range2 = [(96,128), (32,64)]
 for i in range(*tpix_range2[0]):
    for j in range(*tpix_range2[1]):
        test_pixels.append((j,i))

 for mod, r in enumerate(res):
    dig, ana, cellId = r
    d = []
    d2 = []
    d3 = []
    offset = offsets[mod]
    noise = noises[mod]
    for pix in test_pixels:
        for cell in test_cells:
            color = np.random.rand(3,1)

            x = np.arange(dig.shape[0])
            y = dig[:,cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]

            ms, labels, centers = calc_m_cluster2(x, y)
            bound = None
            bound_m = None
            markers = ['o','.','x','v']
            colors = ['b', 'r', 'g', 'k']
            for i, lbl in enumerate(labels):
                if i == 0:
                    gain = 0
                else:
                    gain = 1
                d.append({'x': x[lbl],
                  'y': y[lbl] - offset[pix[0], pix[1], cell, gain],
                  'marker': markers[i],
                  'color': colors[i],
                  'linewidth': 0
                 })
                #if ms[i] < 0: # slope separating two regions
                #    bound = np.min(x[lbl])
                #    bound_m = ms[i]
            if len(x[labels[1]]):
                bound = np.min(x[labels[1]])
                bound_m = ms[1]

                # fit linear slope
                idx = (x >= bound) & (y > 0) & np.isfinite(x) & np.isfinite(y)
                xl = x[(x<bound)]
                yl = y[(x<bound)] - offset[pix[0], pix[1], cell, 0]
                errors = np.ones(xl.shape)*noise[pix[0], pix[1], cell, 0]
                parms = {'m': bound_m, 'b': np.min(yl)}
                fitted = fit_data(lin_fun, xl, yl, errors, parms)

                yf = lin_fun(xl, fitted['m'], fitted['b'])
                max_devl = np.max(np.abs((yl-yf)/yl))

                xtt = np.arange(ana.shape[0])
                ytt = ana[:,cell, pix[0], pix[1]]

                vidx = (ytt > 1000) & np.isfinite(ytt)
                xtt = xtt[vidx]
                ytt = ytt[vidx]

                #ms, labels, centers = calc_m_cluster2(x, y, 25, -10, 25)
                if len(y[labels[0]]) != 0 and len(y[labels[2]]) != 0:
                    threshold = (np.mean(ytt[labels[0]])+np.mean(ytt[labels[2]]))/2

                if threshold > 10000 or threshold < 4000:
                    d3.append({
                        'x': xl,
                        'y': yf,
                        'color': 'k',
                        'linewidth': 1,
                        'y2': (yf-yl)/errors
                             })

            if bound is None:
                ya = ana[:,cell, pix[0], pix[1]][vidx]
                msa, labels, centers = calc_m_cluster2(x, ya, 25, -10, 25)
                if np.count_nonzero(labels[0]) > 0:
                    bound = np.min(x[labels[0]])
                    bound_m = ms[3]
                else:
                    avg_g = np.nanmean(ya)
                    bound = np.max(x[y < avg_g])
                    bound_m = ms[3]

            # fit hook slope
            try:
                if fit_hook and len(yh[yh > 0]) !=0:
                    idx = (x >= bound) & (y > 0) & np.isfinite(x) & np.isfinite(y)
                    xh = x[idx]
                    yh = y[idx] - offset[pix[0], pix[1], cell, 1]
                    errors = np.ones(xh.shape)*noise[pix[0], pix[1], cell, 1]
                    parms = {
                        'm': np.abs(bound_m/10),
                        'b': np.min(yh[yh > 0]),
                        'a': np.max(yh),
                        'c': 5.,
                        'o': bound-1
                            }
                    parms["limit_m"] = [0.3, 1.0]
                    parms["limit_c"] = [1., 1000]
                    fitted = fit_data(hook_fun, xh, yh, errors, parms)
                    yf = hook_fun(xh, fitted['a'], fitted['c'], fitted['o'], fitted['m'], fitted['b'])
                    max_devh = np.max(np.abs((yh-yf)/yh))
                    #print(fitted)
                    if threshold > 10000 or threshold < 4000 or fitted['m'] < 0.2:
                        d3.append({
                            'x': xh,
                            'y': yf,
                            'color': 'red',
                            'linewidth': 1,
                            'y2': (yf-yh)/errors
                         })
            except Exception as e:
                if "zero-size array" in str(e):
                    pass
                else:
                    print(e)

            if threshold > 10000 or threshold < 4000:
                for i, lbl in enumerate(labels):
                    d2.append({
                        'x': xtt[lbl],
                        'y': ytt[lbl],
                        'marker': markers[i],
                        'color': colors[i],
                        'lw': None
                     })

                    d2.append({'x': np.array([xtt[0], xtt[-1]]),
                      'y': np.ones(2)*threshold,
                      'color': 'k',
                      'lw': 1
                     })

            #threshold = (np.min(y[x<bound]) + np.max(y[x>=bound]))/2
    fig = xana.simplePlot(d, y_label="PC pixel signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #")
    fig.savefig("{}/module_{}_pixel_plot_fail.png".format(out_folder, modules[mod]))

    fig = xana.simplePlot(d2, y_label="PC gain signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #")
    fig.savefig("{}/module_{}_pixel_plot_gain_fail.png".format(out_folder, modules[mod]))

    fig = xana.simplePlot(d3, secondpanel=True, y_label="PC signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #", y2_label="Residuals ($\sigma$)", y2_range=(-5,5))
    fig.savefig("{}/module_{}_pixel_plot_fits_fail.png".format(out_folder, modules[mod]))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Here we perform the calculations in column-parallel for all modules
 def calibrate_single_row(cells, fit_hook, inp):

    from sklearn.cluster import KMeans
    from iminuit import Minuit
    from iminuit.util import make_func_code, describe
    import numpy as np

    yrd, yra, offset, noise = inp

    def rolling_window(a, window):
        shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
        strides = a.strides + (a.strides[-1],)
        return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)


    def calc_m_cluster2(x, y, r1=5, r2=0, r3=1.5):
        scan_range = 15
        ms = np.zeros((x.shape[0], scan_range))
        for i in range(scan_range):
            xdiffs = x - np.roll(x, i+1)
            ydiffs = y - np.roll(y, i+1)
            m = ydiffs/xdiffs
            ms[:,i] = m
        m = np.mean(ms, axis=1)
        mm = np.zeros_like(m)
        mm[...] = np.nan
        m[scan_range//2:-scan_range//2+1] = np.mean(rolling_window(m, scan_range),-1)
        reg1 = m > r1
        reg2 = m < r2
        reg3 = (m > r2) & (m < r3)
        reg4 = ~(reg1 | reg2 | reg3)
        labels = [reg1, reg2, reg3, reg4]
        regions = np.zeros_like(x, np.uint8)
        for r, lbl in enumerate(labels):
            regions[lbl] = r
        scan_range = 30
        mregions = np.round(np.mean(rolling_window(regions, scan_range),-1))
        regions[...] = np.nan
        regions[scan_range//2:-scan_range//2+1] = mregions


        labels = [regions == 0, regions == 1, regions == 2, regions == 3]

        idx = np.arange(x.size)
        maxlbl = x.size-1
        for i in range(0, len(labels)-1):
            nidx = labels[i+1]
            if np.any(nidx):
                maxlbl = np.max(idx[nidx])
                cidx = idx > maxlbl
                if np.any(cidx):
                    labels[i][cidx] = False

        ms = []
        for lbl in labels:
            xl = x[lbl]
            xd = np.reshape(xl, (len(xl), 1))
            xdiff = xd - xd.transpose()

            yl = y[lbl]
            yd = np.reshape(yl, (len(yl), 1))
            ydiff = yd - yd.transpose()
            ms.append(np.mean(np.nanmean(ydiff/xdiff, axis=0)))

        return ms, labels, None

    def fit_data(fun, x, y, yerr, par_ests):
        par_ests["throw_nan"] = False
        par_ests["pedantic"] = False
        par_ests["print_level"] = 0

        f_sig = describe(fun)[1:]

        class _Chi2Functor:
            def __init__(self, f, x, y, err):
                self.f = f
                self.x = x
                self.y = y
                self.err = err
                f_sig = describe(f)
                # this is how you fake function
                # signature dynamically
                self.func_code = make_func_code(
                    f_sig[1:])  # docking off independent variable
                self.func_defaults = None  # this keeps numpy.vectorize happy

            def __call__(self, *arg):
                # notice that it accept variable length
                # positional arguments
                # chi2 = sum((y-self.f(x,*arg))**2 for x,y in zip(self.x,self.y))
                return np.sum(((self.f(self.x, *arg) - self.y) ** 2) / self.err)

        wrapped = _Chi2Functor(fun, x, y, yerr)
        m = Minuit(wrapped, **par_ests)
        fmin = m.migrad()

        return m.values

    def lin_fun(x, m, b):
        return m*x+b

    def hook_fun(x, a, c, o, m, b):
        return a*np.exp(-(x-o)/c)+m*x+b

    # linear slope
    ml = np.zeros(yrd.shape[1:])
    bl = np.zeros(yrd.shape[1:])
    devl = np.zeros(yrd.shape[1:])
    ml[...] = np.nan
    bl[...] = np.nan
    devl[...] = np.nan

    #hook function
    mh = np.zeros(yrd.shape[1:])
    bh = np.zeros(yrd.shape[1:])
    ch = np.zeros(yrd.shape[1:])
    oh = np.zeros(yrd.shape[1:])
    ah = np.zeros(yrd.shape[1:])
    devh = np.zeros(yrd.shape[1:])
    dhm = np.zeros(yrd.shape[1:])
    mh[...] = np.nan
    bh[...] = np.nan
    ch[...] = np.nan
    oh[...] = np.nan
    ah[...] = np.nan
    devh[...] = np.nan
    dhm[...] = np.nan

    # threshold
    thresh = np.zeros(list(yrd.shape[1:])+[3,])
    thresh[...] = np.nan
    failures = []

    for col in range(yrd.shape[-1]):
        try:
            y = yrd[:,col]
            x = np.arange(y.shape[0])

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]

            ms, labels, centers = calc_m_cluster2(x, y)

            bound = np.min(x[labels[1]])
            bound_m = ms[1]

            # fit linear slope
            xl = x[x<bound]
            yl = y[x<bound] - offset[col, 0]
            errors = np.ones(xl.shape)*noise[col, 0]
            parms = {'m': bound_m, 'b': np.min(yl)}
            fitted = fit_data(lin_fun, xl, yl, errors, parms)
            yf = lin_fun(xl, fitted['m'], fitted['b'])
            max_devl = np.median(np.abs((yl-yf)/yl))
            ml[col] = fitted['m']
            bl[col] = fitted['b']
            devl[col] = max_devl
            #if np.any(labels[0]) and np.any(labels[2]):
                #dhm[col] = y[labels[0]].max()-y[labels[2]].min()
            dhml = lin_fun(bound, fitted['m'], fitted['b'])
            # fit hook slope
            if fit_hook:
                idx = (x >= bound) & (y > 0) & np.isfinite(x) & np.isfinite(y)
                xh = x[idx]
                yh = y[idx] - offset[col, 1]
                errors = np.ones(xh.shape)*noise[col, 1]
                parms = {'m': bound_m/10 if bound_m/10 > 0.3 else 0.5, 'b': np.min(yh[yh > 0]), 'a': np.max(yh), 'c': 5., 'o': bound-1}
                parms["limit_m"] = [0.3, 1.0]
                parms["limit_c"] = [1., 1000]
                fitted = fit_data(hook_fun, xh, yh, errors, parms)
                yf = hook_fun(xh, fitted['a'], fitted['c'], fitted['o'], fitted['m'], fitted['b'])
                max_devh = np.median(np.abs((yh-yf)/yh))

                mh[col] = fitted['m']
                bh[col] = fitted['b']
                ah[col] = fitted['a']
                oh[col] = fitted['o']
                ch[col] = fitted['c']
                devh[col] = max_devh
                dhm[col] = bound #(dhml) - lin_fun(bound, fitted['m'], fitted['b'])

            y = yra[:,col]
            x = np.arange(y.shape[0])

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]

            threshold = (np.mean(y[labels[0]])+np.mean(y[labels[2]]))/2
            thresh[col,0] = threshold
            thresh[col,1] = np.mean(y[labels[0]])
            thresh[col,2] = np.mean(y[labels[2]])
        except Exception as e:
            print(e)
            failures.append((col, str(e)))
    del yrd
    del yra
    return thresh, (ml, bl, devl), (mh, bh, ah, oh, ch, devh), failures, dhm

 start = datetime.now()
 fres = {}
 failures = []
 for i, r in enumerate(res):
    offset = offsets[i]
    noise = noises[i]
    ii = modules[i]
    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
    dig, ana, cellId = r


    # linear slope
    ml = np.zeros(dig.shape[1:])
    bl = np.zeros(dig.shape[1:])
    devl = np.zeros(dig.shape[1:])

    #hook function
    mh = np.zeros(dig.shape[1:])
    bh = np.zeros(dig.shape[1:])
    ch = np.zeros(dig.shape[1:])
    oh = np.zeros(dig.shape[1:])
    ah = np.zeros(dig.shape[1:])
    devh = np.zeros(dig.shape[1:])
    dhma = np.zeros(dig.shape[1:])

    # threshold
    thresh = np.zeros(list(dig.shape[1:]))
    thresh_bounds = np.zeros(list(dig.shape[1:])+[2,])

    for cell in range(dig.shape[1]):
        inp = []
        for j in range(dig.shape[2]):
            inp.append((dig[:,cell,j,:], ana[:,cell,j,:], offset[j,:,cell,:], noise[j,:,cell,:]))

        p = partial(calibrate_single_row, cells, fit_hook)
        #print("Running {} tasks in parallel".format(len(inp)))
        frs = view.map_sync(p, inp)
        #frs = list(map(p, inp))

        for j, fr in enumerate(frs):
            threshr, lin, hook, fails, dhm = fr
            mlr, blr, devlr = lin
            mhr, bhr, ahr, ohr, chro, devhr = hook
            failures.append(fails)

            ml[cell,j,:] = mlr
            bl[cell,j,:] = blr
            devl[cell,j,:] = devlr

            mh[cell,j,:] = mhr
            bh[cell,j,:] = bhr
            oh[cell,j,:] = ohr
            ch[cell,j,:] = chro
            ah[cell,j,:] = ahr
            devh[cell,j,:] = devhr
            dhma[cell,j,:] = dhm

            thresh[cell,j,...] = threshr[...,0]
            thresh_bounds[cell,j,...] = threshr[...,1:]

    fres[qm] = {'ml': ml,
                'bl': bl,
                'devl': devl,
                'tresh': thresh,
                'tresh_bounds': thresh_bounds,
                'dhm': dhma}
    if fit_hook:
            fres[qm].update({
                'mh': mh,
                'bh': bh,
                'oh': oh,
                'ch': ch,
                'ah': ah,
                'devh': devh,
               })

 duration = (datetime.now()-start).total_seconds()
 logger.runtime_summary_entry(success=True, runtime=duration)
 logger.send()
 ```

 %% Cell type:markdown id: tags:

 Results of slope fitting from PC runs values are
 distinguished on axis 0 by index:

 0: linear slope - m value
 1: linear slope - b value
 2: linear slope - deviation
 3: hook function - m value
 4: hook function - b value
 5: hook function - o value
 6: hook function - c value
 7: hook function - a value
 8: hook function - deviation

 %% Cell type:code id: tags:

 ``` python
 def slope_dict_to_arr(d):
    key_to_index = {
        "ml": 0,
        "bl": 1,
        "devl": 2,
        "mh": 3,
        "bh": 4,
        "oh": 5,
        "ch": 6,
        "ah": 7,
        "devh": 8,
        "tresh": 9,

    }
    arr = np.zeros([11]+list(d["ml"].shape), np.float32)
    for key, item in d.items():
        if key not in key_to_index:
            continue
        arr[key_to_index[key],...] = item

    return arr
 ```

 %% Cell type:code id: tags:

 ``` python
 from collections import OrderedDict
 bad_pixels = OrderedDict()
 for qm, data in fres.items():
    mask = np.zeros(data['ml'].shape, np.uint32)
    mask[(data['tresh'][...,0] < 50) | (data['tresh'][...,0] > 8500)] |= BadPixels.CI_GAIN_OF_OF_THRESHOLD.value
    mask[(data['devl'] == 0)] |= BadPixels.CI_LINEAR_DEVIATION.value
    mask[(np.abs(data['devl']) > 0.5)] |= BadPixels.CI_LINEAR_DEVIATION.value
    mask[(~np.isfinite(data['devl']))] |= BadPixels.CI_EVAL_ERROR.value
    bad_pixels[qm] = mask
 ```

 %% Cell type:code id: tags:

 ``` python
 if local_output:
    ofile = "{}/agipd_pc_store_{}_{}_{}.h5".format(out_folder, "_".join([str(run) for run in runs]), modules[0], modules[-1])
    store_file = h5py.File(ofile, "w")
    for qm, r in fres.items():
        for key, item in r.items():
            store_file["/{}/{}/0/data".format(qm, key)] = item
        #arr = slope_dict_to_arr(r)
        #store_file["/{}/SlopesPC/0/data".format(qm)] = arr
        store_file["/{}/{}/0/data".format(qm, "BadPixelsPC")] = bad_pixels[qm]
    store_file.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = proposal + ' ' + ' '.join(list(map(str,runs)))
 ```

 %% Cell type:code id: tags:

 ``` python
+md = None
+
 if db_output:
+
+    det = getattr(Detectors, dinstance)
+
    for qm, r in fres.items():
-        metadata = ConstantMetaData()
-        slopespc = Constants.AGIPD.SlopesPC()
-        slopespc.data = slope_dict_to_arr(r)
-        metadata.calibration_constant = slopespc
-
-        # set the operating condition
-        condition = Conditions.Dark.AGIPD(memory_cells=maxcells, bias_voltage=bias_voltage,
-                                          acquisition_rate=acq_rate, gain_setting=gain_setting)
-        metadata.detector_condition = condition

-        # specify the a version for this constant
+        for const in ["SlopesPC", "BadPixelsPC"]:

-        if creation_time is None:
-            metadata.calibration_constant_version = Versions.Now(device=getattr(Detectors.AGIPD1M1, qm))
-        else:
-            metadata.calibration_constant_version = Versions.Timespan(device=getattr(Detectors.AGIPD1M1, qm),
-                                                                      start=creation_time)
+            dbconst = getattr(Constants.AGIPD, const)()

-        metadata.calibration_constant_version.raw_data_location = file_loc
-        metadata.send(cal_db_interface)
-        print(f'Constant SlopesPC for module {qm} was injected to the calibration DB. '
-              f'Begin at: {metadata.calibration_constant_version.begin_at}')
-
-        # bad pixels
-
-        metadata = ConstantMetaData()
-        badpixpc = Constants.AGIPD.BadPixelsPC()
-        badpixpc.data = bad_pixels[qm]
-        metadata.calibration_constant = badpixpc
-
-        # set the operating condition
-        condition = Conditions.Dark.AGIPD(memory_cells=maxcells, bias_voltage=bias_voltage,
-                                          acquisition_rate=acq_rate, gain_setting=gain_setting)
-        metadata.detector_condition = condition
-
-        # specify the a version for this constant
-        if creation_time is None:
-            metadata.calibration_constant_version = Versions.Now(device=getattr(Detectors.AGIPD1M1, qm))
-        else:
-            metadata.calibration_constant_version = Versions.Timespan(device=getattr(Detectors.AGIPD1M1, qm),
-                                                                      start=creation_time)
-        metadata.calibration_constant_version.raw_data_location = file_loc
-        metadata.send(cal_db_interface)
-        print(f'Constant BadPixelsPC for module {qm} was injected to the calibration DB. '
-              f'Begin at: {metadata.calibration_constant_version.begin_at}')
+            # set the operating condition
+            condition = Conditions.Dark.AGIPD(memory_cells=maxcells, bias_voltage=bias_voltage,
+                                      acquisition_rate=acq_rate, gain_setting=gain_setting)
+
+            if const == "SlopesPC":
+                dbconst.data = slope_dict_to_arr(r)
+            else:
+                dbconst.data = bad_pixels[qm]
+
+            md = send_to_db(getattr(det, qm), dbconst, condition, file_loc,
+                            cal_db_interface, creation_time=creation_time)
+
+print("Constants parameter conditions are:\n")
+print(f"• memory_cells: {maxcells}\n• bias_voltage: {bias_voltage}\n"
+      f"• acquisition_rate: {acq_rate}\n• gain_setting: {gain_setting}\n"
+      f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:markdown id: tags:

 ## Overview Plots ##

 Each of the following plots represents one of the fit parameters of memory cell 4 on a module:

 For the linear function of the high gain region

   $$y = mx + b$$

 * ml denotes the $m$ parameter
 * bl denotes the $b$ parameter
 * devl denotes the anbsolute relative deviation from linearity.

 For the composite function of the medium gain and transition region

  $$y = A*e^{-(x-O)/C}+mx+b$$

 * oh denotes the $O$ parameter
 * ch denotes the $C$ parameter
 * mh denotes the $m$ parameter
 * bh denotes the $b$ parameter
 * devh denotes the anbsolute relative deviation from the linear part of the function.

 Additionally, the thresholds and bad pixels (mask) are shown.

 Finally, the red and white rectangles indicate the first and second pixel ranges

 %% Cell type:code id: tags:

 ``` python
 import matplotlib.pyplot as plt
 from mpl_toolkits.axes_grid1 import AxesGrid
 import matplotlib.patches as patches

 cell_to_preview = min(59, mem_cells-1)
 for module, data in fres.items():
    fig = plt.figure(figsize=(20,20))
    grid = AxesGrid(fig, 111,
                    nrows_ncols=(7 if fit_hook else 3, 2),
                    axes_pad=(0.9, 0.15),
                    label_mode="1",
                    share_all=True,
                    cbar_location="right",
                    cbar_mode="each",
                    cbar_size="7%",
                    cbar_pad="2%",
                    )


    mask = bad_pixels[module]

    i = 0
    for key, citem in data.items():
        item = citem.copy()
        item[~np.isfinite(item)] = 0
        med = np.nanmedian(item)
        bound = 0.1
        maxcnt = 10
        if med < 0:
            bound = -bound

        while(np.count_nonzero((item < med-bound*med) | (item > med+bound*med))/item.size > 0.01):
            bound *=2
            maxcnt -= 1
            if maxcnt < 0:
                break


        if "bounds" in key:
            d = item[cell_to_preview,...,0]
            im = grid[i].imshow(d, interpolation="nearest",
                               vmin=med-bound*med, vmax=med+bound*med)
        else:
            d = item[cell_to_preview,...]
            im = grid[i].imshow(d, interpolation="nearest",
                               vmin=med-bound*med, vmax=med+bound*med)
        cb = grid.cbar_axes[i].colorbar(im)

        # axes coordinates are 0,0 is bottom left and 1,1 is upper right
        x0, x1 = tpix_range1[0][0], tpix_range1[0][1]
        y0, y1 = tpix_range1[1][0], tpix_range1[1][1]
        p = patches.Rectangle(
            (x0, y0), x1-x0, y1-y0, fill=False, color="red")

        grid[i].add_patch(p)

        x0, x1 = tpix_range2[0][0], tpix_range2[0][1]
        y0, y1 = tpix_range2[1][0], tpix_range2[1][1]
        p = patches.Rectangle(
            (x0, y0), x1-x0, y1-y0, fill=False, color="white")

        grid[i].add_patch(p)

        grid[i].text(20, 50, key, color="w", fontsize=50)

        i += 1

    im = grid[-1].imshow(mask[cell_to_preview,...], interpolation="nearest",
                           vmin=0, vmax=1)
    cb = grid.cbar_axes[-1].colorbar(im)

    grid[-1].text(20, 50, "mask", color="w", fontsize=50)
    fig.savefig("{}/module_{}_PC.png".format(out_folder, module))
 ```

 %% Cell type:markdown id: tags:

 ### Memory Cell dependent behavior of thresholding ###

 %% Cell type:code id: tags:

 ``` python
 toplot = {"tresh": "Gain theshold (ADU)",
          "ml": "Slope (linear)",
          "bl": "Offset (linear) (ADU)"}
 from matplotlib.colors import LogNorm, PowerNorm
 for module, data in fres.items():

    bins = 100

    for typ, label in toplot.items():
        r_hist = np.zeros((mem_cells, bins))
        mask = bad_pixels[module]
        thresh = data[typ]
        hrange = [0.5*np.nanmedian(thresh), 1.5*np.nanmedian(thresh)]
        if hrange[1] < hrange[0]:
            hrange = hrange[::-1]
        for c in range(mem_cells):
            d = thresh[c,...]
            h, e = np.histogram(d.flatten(), bins=bins, range=hrange)
            r_hist[c, :] = h
        fig = plt.figure(figsize=(5,5))
        ax = fig.add_subplot(111)
        im = ax.imshow(r_hist[:,:].T[::-1,:], interpolation="nearest",
                  aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(r_hist)),
                 extent=[0, mem_cells, hrange[0], hrange[1]])
        ax.set_xlabel("Memory cell")
        ax.set_ylabel(label)
        cb = fig.colorbar(im)
        cb.set_label("Counts")
    #fig.savefig("/gpfs/exfel/data/scratch/haufs/test/agipd_gain_threholds.pdf", bbox_inches="tight")
 ```

 %% Cell type:markdown id: tags:

 ## Global Bad Pixel Behaviour ##

 The following plots show the results of bad pixel evaluation for all evaluated memory cells. Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2. This excludes single bad pixels present only in disconnected pixels. Hence, any bad pixels spanning at least 2 pixels in the x/y-plane, or across at least two memory cells are indicated. Colors encode the bad pixel type, or mixed type.

 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.CI_GAIN_OF_OF_THRESHOLD.value: (BadPixels.CI_GAIN_OF_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.CI_EVAL_ERROR.value: (BadPixels.CI_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.CI_GAIN_OF_OF_THRESHOLD.value | BadPixels.OFFSET_OUT_OF_THRESHOLD.value: ('MIXED', '#DD00DD80')}

 rebin = 2 if not high_res_badpix_3d else 1

 gain = 0
 for mod, data in bad_pixels.items():
    plot_badpix_3d(np.moveaxis(data, 0, 2), cols, title=mod, rebin_fac=rebin, azim=60.)
 ```

 %% Cell type:code id: tags:

 ``` python
 one_photon = 55 # ADU
 test_pixels = []
 tpix_range1 = [(0,8), (0,8)]
 for i in range(*tpix_range1[0]):
    for j in range(*tpix_range1[1]):
        test_pixels.append((j,i))
 test_cells = [4, 38, 64, 128, 200, 249]
 tcell = np.array(test_cells)
 tcell = tcell[tcell < mem_cells]
 if tcell.size == 0:
    test_cells = [mem_cells-1]
 else:
    test_cells = tcell.tolist()
 from mpl_toolkits.axes_grid1 import ImageGrid
 for mod, r in enumerate(res):
    dig, ana, cellId = r
    d = []
    d2 = []
    d3 = []

    H = [0, 0, 0, 0]
    H2 = [0, 0, 0, 0]
    Ha = [0, 0, 0, 0]
    ii = modules[mod]
    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
    cdata = fres[qm]
    ex, ey, ea = None, None, None
    medml = np.nanmean(cdata['ml'])
    medmh = np.nanmean(cdata['mh'][cdata['mh']> 0.5])
    offset = offsets[mod]
    threshold = thresholds[mod]

    medth = np.nanmean(threshold[...,0])
    for pix in test_pixels:
        for cell in test_cells:
            color = np.random.rand(3,1)

            x = np.arange(dig.shape[0])
            y = dig[:,cell, pix[0], pix[1]]
            a = ana[:,cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]
            a = a[vidx]

            ms, labels, centers = calc_m_cluster2(x, y)
            bound = None
            bound_m = None
            markers = ['o','.','x','v']
            colors = ['b', 'r', 'g', 'k']
            ymin = y.min()

            amin = a[labels[2]].min()
            for i, lbl in enumerate(labels):

                if np.any(lbl):
                    if i == 0:
                        cm = (cdata['ml'][cell, pix[0], pix[1]]/medml)

                        o = offset[pix[0], pix[1], cell, 0]
                        ym = (y[lbl]-o)/cm

                    elif i >= 1:
                        mh = cdata['mh'][cell, pix[0], pix[1]]
                        ml = cdata['ml'][cell, pix[0], pix[1]]
                        cml = ml/medml
                        cmh = mh/medmh
                        cm = medml/medmh
                        oh = cdata['bh'][cell, pix[0], pix[1]]
                        o = offset[pix[0], pix[1], cell, 1] + oh

                        ym = (y[lbl]-o)/cmh*cm

                        if i == 1:
                            ah = cdata['ah'][cell, pix[0], pix[1]]
                            ch = cdata['ch'][cell, pix[0], pix[1]]
                            ohh = cdata['oh'][cell, pix[0], pix[1]]
                            tx = ch * np.log(ah/(y[lbl]-o))+ohh

                            chook  = (ah*np.exp(-(tx-ohh)/ch) - mh*tx)/cmh*cm

                            ym -= chook

                    h, ex, ey = np.histogram2d(x[lbl], ym/one_photon, range=((0, 600), (0, 15000/one_photon)), bins=(300, 600))
                    H[i] += h

            labels = [a < threshold[pix[0], pix[1], cell,0], a >= threshold[pix[0], pix[1], cell,0]]
            for i, lbl in enumerate(labels):

                if np.any(lbl):
                    if i == 0:
                        cm = (cdata['ml'][cell, pix[0], pix[1]]/medml)

                        o = offset[pix[0], pix[1], cell, 0]
                        ym = (y[lbl]-o)/cm

                    elif i >= 1:
                        mh = cdata['mh'][cell, pix[0], pix[1]]
                        ml = cdata['ml'][cell, pix[0], pix[1]]
                        cml = ml/medml
                        cmh = mh/medmh
                        cm = medml/medmh
                        oh = cdata['bh'][cell, pix[0], pix[1]]
                        o = offset[pix[0], pix[1], cell, 1] + oh

                        ym = (y[lbl]-o)/cmh*cm

                        if i == 1:
                            ah = cdata['ah'][cell, pix[0], pix[1]]
                            ch = cdata['ch'][cell, pix[0], pix[1]]
                            ohh = cdata['oh'][cell, pix[0], pix[1]]
                            tx = ch * np.log(ah/(y[lbl]-o))+ohh

                            chook  = (ah*np.exp(-(tx-ohh)/ch) - mh*tx)/cmh*cm
                            idx = (a[lbl]-amin) < 0
                            ym[idx] -= chook[idx]

                            #ym = a[lbl]-amin

                    h, ex, ey = np.histogram2d(x[lbl], ym/one_photon, range=((0, 600), (0, 15000/one_photon)), bins=(300, 600))
                    H2[i] += h

            labels = [a < threshold[pix[0], pix[1], cell,0], a >= threshold[pix[0], pix[1], cell,0]]
            for i, lbl in enumerate(labels):

                if np.any(lbl):

                    #if i == 0:
                    #    amin = a[lbl].min()
                    #else:
                    #    amin = a[labels[0]].min() #a[labels[1]].min()# /(threshold[pix[0], pix[1], cell,0]/medth)
                    am = a[lbl] - amin
                    h, ex, ea = np.histogram2d(x[lbl], am, range=((0, 600), (-100, 5000)), bins=(300, 400))
                    Ha[i] += h



    fig = plt.figure(figsize=(10,15))
    ax = fig.add_subplot(311)
    for i in range(3):
        H[i][H[i]==0] = np.nan
    ax.imshow(H[0].T, origin="bottom", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    ax.imshow(H[1].T, origin="bottom", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='spring', alpha=0.7, vmin=0, vmax=100)
    ax.imshow(H[2].T, origin="bottom", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD response (ADU)")
    ax.set_xlabel("PC scan point (#)")

    x = np.arange(0, 600)
    ideal = medml*x/one_photon
    ax.plot(x, ideal, color='red')
    ax.plot(x, ideal + np.sqrt(ideal), color='red')
    ax.plot(x, ideal - np.sqrt(ideal), color='red')


    ax = fig.add_subplot(312)
    for i in range(2):
        H2[i][H2[i]==0] = np.nan
    ax.imshow(H2[0].T, origin="bottom", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    ax.imshow(H2[1].T, origin="bottom", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD response (ADU)")
    ax.set_xlabel("PC scan point (#)")

    x = np.arange(0, 600)
    ideal = medml*x/one_photon
    ax.plot(x, ideal, color='red')
    ax.plot(x, ideal + np.sqrt(ideal), color='red')
    ax.plot(x, ideal - np.sqrt(ideal), color='red')


    ax = fig.add_subplot(313)
    for i in range(2):
        Ha[i][Ha[i]==0] = np.nan
    ax.imshow(Ha[0].T, origin="bottom", extent=[ex[0], ex[-1], ea[0], ea[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    #ax.imshow(Ha[1].T, origin="bottom", extent=[ex[0], ex[-1], ea[0], ea[-1]],
    #          aspect='auto', cmap='spring', alpha=0.7, vmin=0, vmax=100)
    ax.imshow(Ha[1].T, origin="bottom", extent=[ex[0], ex[-1], ea[0], ea[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD gain (ADU)")
    ax.set_xlabel("PC scan point (#)")

 ```

 %% Cell type:code id: tags:

 ``` python
 ```

--- a/notebooks/AGIPD/playground/AGIPD_SingleM_test_Dark.ipynb
+++ b/notebooks/AGIPD/playground/AGIPD_SingleM_test_Dark.ipynb
--- a/notebooks/DSSC/Characterize_DSSC_Darks_NBC.ipynb
+++ b/notebooks/DSSC/Characterize_DSSC_Darks_NBC.ipynb
 %% Cell type:markdown id: tags:

 # DSSC Characterize Dark Images #

 Author: S. Hauf, Version: 0.1

 The following code analyzes a set of dark images taken with the DSSC detector to deduce detector offsets and noise. Data for the detector is presented in one run and don't acquire multiple gain stages.

 The notebook explicitely does what pyDetLib provides in its offset calculation method for streaming data.

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB" # The ipcluster profile to use
 in_folder = "/gpfs/exfel/exp/SCS/202030/p900125/raw" # path to input data, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/DSSC" # path to output to, required
 sequences = [0] # sequence files to evaluate.
 modules = [-1]  # modules to run for
 run = 222 # run numbr in which data was recorded, required

 karabo_id = "SCS_DET_DSSC1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = '/INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
 h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images

 use_dir_creation_date = True # use the dir creation date for determining the creation time
 cal_db_interface = "tcp://max-exfl016:8020" # the database interface to use
 cal_db_timeout = 3000000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database

 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300 # detector bias voltage
 rawversion = 2 # RAW file format version

 thresholds_offset_sigma = 3. # thresholds in terms of n sigma noise for offset deduced bad pixels
 thresholds_offset_hard = [4000, 8500] # thresholds in absolute ADU terms for offset deduced bad pixels

 thresholds_noise_sigma = 5. # thresholds in terms of n sigma noise for offset deduced bad pixels
 thresholds_noise_hard = [4, 20] # thresholds in absolute ADU terms for offset deduced bad pixels

 instrument = "SCS" # the instrument
 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h
 ```

 %% Cell type:code id: tags:

 ``` python
 # imports and things that do not usually need to be changed
 from datetime import datetime
+import os
 import warnings
 warnings.filterwarnings('ignore')
 from collections import OrderedDict
-import os
+
 import h5py
-import numpy as np
+from ipyparallel import Client
+from IPython.display import display, Markdown, Latex
 import matplotlib
 matplotlib.use('agg')
 import matplotlib.pyplot as plt
 %matplotlib inline
+import numpy as np
+import tabulate

-from cal_tools.tools import (map_gain_stages, parse_runs, run_prop_seq_from_path,
-                             get_notebook_name, get_dir_creation_date,
-                             get_random_db_interface, get_from_db, save_const_to_h5)
-from cal_tools.influx import InfluxLogger
 from cal_tools.enums import BadPixels
-from cal_tools.plotting import (show_overview, plot_badpix_3d,
-                                create_constant_overview,
+from cal_tools.influx import InfluxLogger
+from cal_tools.plotting import (create_constant_overview,
+                                plot_badpix_3d, show_overview,
                                show_processed_modules)
-
-# make sure a cluster is running with ipcluster start --n=32, give it a while to start
-from ipyparallel import Client
-from IPython.display import display, Markdown, Latex
-import tabulate
-
+from cal_tools.tools import (get_dir_creation_date, get_from_db,
+                             get_notebook_name, get_random_db_interface,
+                             map_gain_stages, parse_runs,
+                             run_prop_seq_from_path,
+                             save_const_to_h5, send_to_db)
+from iCalibrationDB import Constants, Conditions, Detectors, Versions
 view = Client(profile=cluster_profile)[:]
 view.use_dill()

-from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions
-
+# make sure a cluster is running with ipcluster start --n=32, give it a while to start

 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)
 gain_names = ['High', 'Medium', 'Low']

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(16))
    karabo_da = ["DSSC{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 max_cells = mem_cells

 offset_runs = OrderedDict()
 offset_runs["high"] = run

 creation_time=None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time} as creation time of constant.")

 run, prop, seq = run_prop_seq_from_path(in_folder)
 logger = InfluxLogger(detector="DSSC", instrument=instrument, mem_cells=mem_cells,
                      notebook=get_notebook_name(), proposal=prop)

 dinstance = "DSSC1M1"

 print(f"Detector in use is {karabo_id}")

 cal_db_interface = get_random_db_interface(cal_db_interface)
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Parameters are:")
 print(f"Proposal: {prop}")
 print(f"Memory cells: {mem_cells}/{max_cells}")
 print("Runs: {}".format([ v for v in offset_runs.values()]))
 print(f"Sequences: {sequences}")
 print(f"Using DB: {db_output}")
 print(f"Input: {in_folder}")
 print(f"Output: {out_folder}")
 print(f"Bias voltage: {bias_voltage}V")
+file_loc = f'proposal:{prop} runs:{[ v for v in offset_runs.values()][0]}'
 ```

 %% Cell type:markdown id: tags:

 The following lines will create a queue of files which will the be executed module-parallel. Distinguishing between different gains.

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 os.makedirs(out_folder, exist_ok=True)
 gmf = map_gain_stages(in_folder, offset_runs, path_template, karabo_da, sequences)
 gain_mapped_files, total_sequences, total_file_size = gmf
 print(f"Will process a total of {total_sequences} file.")
 ```

 %% Cell type:markdown id: tags:

 ## Calculate Offsets, Noise and Thresholds ##

 The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array.

 %% Cell type:code id: tags:

 ``` python
 import copy
 from functools import partial
 def characterize_module(cells, bp_thresh, rawversion, karabo_id, h5path, h5path_idx, inp):
    import numpy as np
    import copy
    import h5py
    from cal_tools.enums import BadPixels

    from hashlib import blake2b
    import struct
    import binascii

    def get_num_cells(fname, h5path):
        with h5py.File(fname, "r") as f:

            cells = f[f"{h5path}/cellId"][()]
            maxcell = np.max(cells)
            options = [100, 200, 400, 500, 600, 700, 800]
            dists = np.array([(o-maxcell) for o in options])
            dists[dists<0] = 10000 # assure to always go higher
            return options[np.argmin(dists)]

    filename, channel = inp

    h5path = h5path.format(channel)
    h5path_idx = h5path_idx.format(channel)

    if cells == 0:
        cells = get_num_cells(filename, h5path)

    print(f"Using {cells} memory cells")

    pulseid_checksum = None

    thresholds_offset_hard, thresholds_offset_sigma, thresholds_noise_hard, thresholds_noise_sigma = bp_thresh

    infile = h5py.File(filename, "r", driver="core")
    if rawversion == 2:
        count = np.squeeze(infile[f"{h5path_idx}/count"])
        first = np.squeeze(infile[f"{h5path_idx}/first"])
        last_index = int(first[count != 0][-1]+count[count != 0][-1])
        first_index = int(first[count != 0][0])
        pulseids = infile[f"{h5path}/pulseId"][first_index:int(first[count != 0][1])]
        bveto = blake2b(pulseids.data, digest_size=8)
        pulseid_checksum = struct.unpack('d', binascii.unhexlify(bveto.hexdigest()))[0]
    else:
        status = np.squeeze(infile[f"{h5path_idx}/status"])
        if np.count_nonzero(status != 0) == 0:
            return
        last = np.squeeze(infile[f"{h5path_idx}/last"])
        first = np.squeeze(infile[f"{h5path_idx}/first"])
        last_index = int(last[status != 0][-1]) + 1
        first_index = int(first[status != 0][0])
    im = np.array(infile[f"{h5path}/data"][first_index:last_index,...])
    cellIds = np.squeeze(infile[f"{h5path}/cellId"][first_index:last_index,...])

    infile.close()


    im = im[:, 0, ...].astype(np.float32)

    im = np.rollaxis(im, 2)
    im = np.rollaxis(im, 2, 1)

    mcells = cells
    offset = np.zeros((im.shape[0], im.shape[1], mcells))
    noise = np.zeros((im.shape[0], im.shape[1], mcells))

    for cc in np.unique(cellIds[cellIds < mcells]):
        cellidx = cellIds == cc
        offset[...,cc] = np.median(im[..., cellidx], axis=2)
        noise[...,cc] = np.std(im[..., cellidx], axis=2)


    # bad pixels
    bp = np.zeros(offset.shape, np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0,1))
    offset_std = np.nanstd(offset, axis=(0,1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[(offset < thresholds_offset_hard[0]) | (offset > thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0,1))
    noise_std = np.nanstd(noise, axis=(0,1))

    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[(noise < thresholds_noise_hard[0]) | (noise > thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value


    return offset, noise, bp, cells, pulseid_checksum


 offset_g = OrderedDict()
 noise_g = OrderedDict()
 gain_g = OrderedDict()
 badpix_g = OrderedDict()
 gg = 0

 start = datetime.now()
 all_cells = []
 checksums = {}

 for gain, mapped_files in gain_mapped_files.items():
    inp = []
    dones = []
    for i in modules:
        qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
        if qm in mapped_files and not mapped_files[qm].empty():
            fname_in = mapped_files[qm].get()
            print("Process file: ", fname_in)
            dones.append(mapped_files[qm].empty())
        else:
            continue
        inp.append((fname_in, i))

    p = partial(characterize_module, max_cells,
               (thresholds_offset_hard, thresholds_offset_sigma,
                thresholds_noise_hard, thresholds_noise_sigma), rawversion, karabo_id, h5path, h5path_idx)
    results = list(map(p, inp))

    for ii, r in enumerate(results):
        i = modules[ii]
        offset, noise,  bp, thiscell, pulseid_checksum = r
        all_cells.append(thiscell)
        qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
        if qm not in offset_g:
            offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2]))
            noise_g[qm] = np.zeros_like(offset_g[qm])

            badpix_g[qm] = np.zeros_like(offset_g[qm], np.uint32)
            checksums[qm] = pulseid_checksum

        offset_g[qm][...] = offset
        noise_g[qm][...] = noise
        badpix_g[qm][...] = bp
    gg +=1

 duration = (datetime.now()-start).total_seconds()
 logger.runtime_summary_entry(success=True, runtime=duration,
                                   total_sequences=total_sequences,
                                   filesize=total_file_size)
 logger.send()
 if len(all_cells) > 0:
    max_cells = np.max(all_cells)
    print(f"Using {max_cells} memory cells")
 else:
    raise ValueError("0 processed memory cells. No raw data available.")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve existing constants for comparison
 clist = ["Offset", "Noise"]
 old_const = {}
 old_mdata = {}

 print('Retrieve pre-existing constants for comparison.')
 detinst = getattr(Detectors, dinstance)
 for qm in offset_g.keys():
    device = getattr(detinst, qm)
    for const in clist:
        condition = Conditions.Dark.DSSC(memory_cells=max_cells,
                                         bias_voltage=bias_voltage,
                                         pulseid_checksum=checksums[qm])

        data, mdata = get_from_db(device,
                                  getattr(Constants.DSSC, const)(),
                                  condition,
                                  None,
                                  cal_db_interface, creation_time=creation_time,
                                  verbosity=2, timeout=cal_db_timeout)

        old_const[const] = data

        if mdata is not None and data is not None:
            time = mdata.calibration_constant_version.begin_at
            old_mdata[const] = time.isoformat()
            os.makedirs(f'{out_folder}/old/', exist_ok=True)
-            save_const_to_h5(mdata, f'{out_folder}/old/')
+            save_const_to_h5(device,
+                             getattr(Constants.DSSC, const)(),
+                             condition, data, file_loc, creation_time,
+                             f'{out_folder}/old/')
        else:
            old_mdata[const] = "Not found"
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()
 for i in modules:
    qm = f"Q{i//4+1}M{i%4+1}"
    try:
        res[qm] = {'Offset': offset_g[qm],
                   'Noise': noise_g[qm],
                   #TODO: No badpixelsdark, yet.
                   #'BadPixelsDark': badpix_g[qm]
                   }
    except Exception as e:
        print(f"Error: No constants for {qm}: {e}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Push the same constant two different times.
 # One with the generated pulseID check sum setting for the offline calibration.
 # And another for the online calibration as it doesn't have this pulseID checksum, yet.
+md = None
 for dont_use_pulseIds in [True, False]:
    for qm in res.keys():
        detinst = getattr(Detectors, dinstance)
        device = getattr(detinst, qm)

        for const in res[qm].keys():
-
-            metadata = ConstantMetaData()
            dconst = getattr(Constants.DSSC, const)()
            dconst.data = res[qm][const]
-            metadata.calibration_constant = dconst

            pidsum = None if dont_use_pulseIds else checksums[qm]
            # set the operating condition
            condition = Conditions.Dark.DSSC(memory_cells=max_cells,
                                             bias_voltage=bias_voltage,
                                             pulseid_checksum=pidsum)
-
-            metadata.detector_condition = condition
-
-            # specify the a version for this constant
-            if creation_time is None:
-                metadata.calibration_constant_version = Versions.Now(device=device)
-            else:
-                metadata.calibration_constant_version = Versions.Timespan(device=device, start=creation_time)
-
            if db_output:
-                try:
-                    metadata.send(cal_db_interface, timeout=cal_db_timeout)
-                except Exception as e:
-                    print("Error", e)
+                md = send_to_db(device, dconst, condition, file_loc,
+                                cal_db_interface, creation_time=creation_time, timeout=cal_db_timeout)

            if local_output:
                # Don't save constant localy two times.
                if dont_use_pulseIds:
-                    save_const_to_h5(metadata, out_folder)
+                    md = save_const_to_h5(device, dconst, condition,
+                                          dconst.data, file_loc,
+                                          creation_time, out_folder)
                    print(f"Calibration constant {const} is stored locally.\n")

        if not dont_use_pulseIds:
-            print("Generated constants with conditions:\n")
+            print("Constants parameter conditions are:\n")
            print(f"• memory_cells: {max_cells}\n• bias_voltage: {bias_voltage}\n"
-                  f"• pulseid_checksum: {pidsum}\n• creation_time: {creation_time}\n")
+                  f"• pulseid_checksum: {pidsum}\n"
+                  f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:code id: tags:

 ``` python
 mnames = []
 for i in modules:
    qm = f"Q{i//4+1}M{i % 4+1}"
    display(Markdown(f'## Position of the module {mnames} and it\'s ASICs##'))
    mnames.append(qm)

 show_processed_modules(dinstance=dinstance, constants=None, mnames=mnames, mode="position")
 ```

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:code id: tags:

 ``` python
 cell = 9
 gain = 0
 out_folder = None
 show_overview(res, cell, gain, out_folder=out_folder, infix="_{}".format(run))
 ```

 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.NOISE_OUT_OF_THRESHOLD.value: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR.value: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value | BadPixels.NOISE_OUT_OF_THRESHOLD.value: ('MIXED', '#DD00DD80')}

 if high_res_badpix_3d:
    display(Markdown("""

    ## Global Bad Pixel Behaviour ##

    The following plots show the results of bad pixel evaluation for all evaluated memory cells.
    Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2.
    This excludes single bad pixels present only in disconnected pixels.
    Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
    Colors encode the bad pixel type, or mixed type.

    """))
    # set rebin_fac to 1 for avoiding rebining and
    # losing real values of badpixels(High resolution).
    gain = 0
    for mod, data in badpix_g.items():
        plot_badpix_3d(data, cols, title=mod, rebin_fac=2)
        plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Aggregate values, and per Cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior.

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(offset_g, "Offset (ADU)", max_cells, entries=1)
 ```

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(noise_g, "Noise (ADU)", max_cells, 0, 100, entries=1)
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixel_aggregate_g = OrderedDict()
 for m, d in badpix_g.items():
    bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)
 create_constant_overview(bad_pixel_aggregate_g, "Bad pixel fraction", max_cells, entries=1)
 ```

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 for const in ['Offset', 'Noise']:
    table = [['','High gain', 'High gain']]
    for qm in res.keys():

        data = np.copy(res[qm][const])

        if old_const[const] is not None:
            dataold = np.copy(old_const[const])

        f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
        n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

        for i, f in enumerate(f_list):
            line = [n_list[i]]
            line.append('{:6.1f}'.format(f(data[...,gain])))
            if old_const[const] is not None:
                line.append('{:6.1f}'.format(f(dataold[...,gain])))
            else:
                line.append('-')

            table.append(line)

    display(Markdown('### {} [ADU], good and bad pixels ###'.format(const)))
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:markdown id: tags:

 # DSSC Characterize Dark Images #

 Author: S. Hauf, Version: 0.1

 The following code analyzes a set of dark images taken with the DSSC detector to deduce detector offsets and noise. Data for the detector is presented in one run and don't acquire multiple gain stages.

 The notebook explicitely does what pyDetLib provides in its offset calculation method for streaming data.

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB" # The ipcluster profile to use
 in_folder = "/gpfs/exfel/exp/SCS/202030/p900125/raw" # path to input data, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/DSSC" # path to output to, required
 sequences = [0] # sequence files to evaluate.
 modules = [-1]  # modules to run for
 run = 222 # run numbr in which data was recorded, required

 karabo_id = "SCS_DET_DSSC1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = '/INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
 h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images

 use_dir_creation_date = True # use the dir creation date for determining the creation time
 cal_db_interface = "tcp://max-exfl016:8020" # the database interface to use
 cal_db_timeout = 3000000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database

 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300 # detector bias voltage
 rawversion = 2 # RAW file format version

 thresholds_offset_sigma = 3. # thresholds in terms of n sigma noise for offset deduced bad pixels
 thresholds_offset_hard = [4000, 8500] # thresholds in absolute ADU terms for offset deduced bad pixels

 thresholds_noise_sigma = 5. # thresholds in terms of n sigma noise for offset deduced bad pixels
 thresholds_noise_hard = [4, 20] # thresholds in absolute ADU terms for offset deduced bad pixels

 instrument = "SCS" # the instrument
 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h
 ```

 %% Cell type:code id: tags:

 ``` python
 # imports and things that do not usually need to be changed
 from datetime import datetime
+import os
 import warnings
 warnings.filterwarnings('ignore')
 from collections import OrderedDict
-import os
+
 import h5py
-import numpy as np
+from ipyparallel import Client
+from IPython.display import display, Markdown, Latex
 import matplotlib
 matplotlib.use('agg')
 import matplotlib.pyplot as plt
 %matplotlib inline
+import numpy as np
+import tabulate

-from cal_tools.tools import (map_gain_stages, parse_runs, run_prop_seq_from_path,
-                             get_notebook_name, get_dir_creation_date,
-                             get_random_db_interface, get_from_db, save_const_to_h5)
-from cal_tools.influx import InfluxLogger
 from cal_tools.enums import BadPixels
-from cal_tools.plotting import (show_overview, plot_badpix_3d,
-                                create_constant_overview,
+from cal_tools.influx import InfluxLogger
+from cal_tools.plotting import (create_constant_overview,
+                                plot_badpix_3d, show_overview,
                                show_processed_modules)
-
-# make sure a cluster is running with ipcluster start --n=32, give it a while to start
-from ipyparallel import Client
-from IPython.display import display, Markdown, Latex
-import tabulate
-
+from cal_tools.tools import (get_dir_creation_date, get_from_db,
+                             get_notebook_name, get_random_db_interface,
+                             map_gain_stages, parse_runs,
+                             run_prop_seq_from_path,
+                             save_const_to_h5, send_to_db)
+from iCalibrationDB import Constants, Conditions, Detectors, Versions
 view = Client(profile=cluster_profile)[:]
 view.use_dill()

-from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions
-
+# make sure a cluster is running with ipcluster start --n=32, give it a while to start

 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)
 gain_names = ['High', 'Medium', 'Low']

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(16))
    karabo_da = ["DSSC{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 max_cells = mem_cells

 offset_runs = OrderedDict()
 offset_runs["high"] = run

 creation_time=None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time} as creation time of constant.")

 run, prop, seq = run_prop_seq_from_path(in_folder)
 logger = InfluxLogger(detector="DSSC", instrument=instrument, mem_cells=mem_cells,
                      notebook=get_notebook_name(), proposal=prop)

 dinstance = "DSSC1M1"

 print(f"Detector in use is {karabo_id}")

 cal_db_interface = get_random_db_interface(cal_db_interface)
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Parameters are:")
 print(f"Proposal: {prop}")
 print(f"Memory cells: {mem_cells}/{max_cells}")
 print("Runs: {}".format([ v for v in offset_runs.values()]))
 print(f"Sequences: {sequences}")
 print(f"Using DB: {db_output}")
 print(f"Input: {in_folder}")
 print(f"Output: {out_folder}")
 print(f"Bias voltage: {bias_voltage}V")
+file_loc = f'proposal:{prop} runs:{[ v for v in offset_runs.values()][0]}'
 ```

 %% Cell type:markdown id: tags:

 The following lines will create a queue of files which will the be executed module-parallel. Distinguishing between different gains.

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 os.makedirs(out_folder, exist_ok=True)
 gmf = map_gain_stages(in_folder, offset_runs, path_template, karabo_da, sequences)
 gain_mapped_files, total_sequences, total_file_size = gmf
 print(f"Will process a total of {total_sequences} file.")
 ```

 %% Cell type:markdown id: tags:

 ## Calculate Offsets, Noise and Thresholds ##

 The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array.

 %% Cell type:code id: tags:

 ``` python
 import copy
 from functools import partial
 def characterize_module(cells, bp_thresh, rawversion, karabo_id, h5path, h5path_idx, inp):
    import numpy as np
    import copy
    import h5py
    from cal_tools.enums import BadPixels

    from hashlib import blake2b
    import struct
    import binascii

    def get_num_cells(fname, h5path):
        with h5py.File(fname, "r") as f:

            cells = f[f"{h5path}/cellId"][()]
            maxcell = np.max(cells)
            options = [100, 200, 400, 500, 600, 700, 800]
            dists = np.array([(o-maxcell) for o in options])
            dists[dists<0] = 10000 # assure to always go higher
            return options[np.argmin(dists)]

    filename, channel = inp

    h5path = h5path.format(channel)
    h5path_idx = h5path_idx.format(channel)

    if cells == 0:
        cells = get_num_cells(filename, h5path)

    print(f"Using {cells} memory cells")

    pulseid_checksum = None

    thresholds_offset_hard, thresholds_offset_sigma, thresholds_noise_hard, thresholds_noise_sigma = bp_thresh

    infile = h5py.File(filename, "r", driver="core")
    if rawversion == 2:
        count = np.squeeze(infile[f"{h5path_idx}/count"])
        first = np.squeeze(infile[f"{h5path_idx}/first"])
        last_index = int(first[count != 0][-1]+count[count != 0][-1])
        first_index = int(first[count != 0][0])
        pulseids = infile[f"{h5path}/pulseId"][first_index:int(first[count != 0][1])]
        bveto = blake2b(pulseids.data, digest_size=8)
        pulseid_checksum = struct.unpack('d', binascii.unhexlify(bveto.hexdigest()))[0]
    else:
        status = np.squeeze(infile[f"{h5path_idx}/status"])
        if np.count_nonzero(status != 0) == 0:
            return
        last = np.squeeze(infile[f"{h5path_idx}/last"])
        first = np.squeeze(infile[f"{h5path_idx}/first"])
        last_index = int(last[status != 0][-1]) + 1
        first_index = int(first[status != 0][0])
    im = np.array(infile[f"{h5path}/data"][first_index:last_index,...])
    cellIds = np.squeeze(infile[f"{h5path}/cellId"][first_index:last_index,...])

    infile.close()


    im = im[:, 0, ...].astype(np.float32)

    im = np.rollaxis(im, 2)
    im = np.rollaxis(im, 2, 1)

    mcells = cells
    offset = np.zeros((im.shape[0], im.shape[1], mcells))
    noise = np.zeros((im.shape[0], im.shape[1], mcells))

    for cc in np.unique(cellIds[cellIds < mcells]):
        cellidx = cellIds == cc
        offset[...,cc] = np.median(im[..., cellidx], axis=2)
        noise[...,cc] = np.std(im[..., cellidx], axis=2)


    # bad pixels
    bp = np.zeros(offset.shape, np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0,1))
    offset_std = np.nanstd(offset, axis=(0,1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[(offset < thresholds_offset_hard[0]) | (offset > thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0,1))
    noise_std = np.nanstd(noise, axis=(0,1))

    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[(noise < thresholds_noise_hard[0]) | (noise > thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value


    return offset, noise, bp, cells, pulseid_checksum


 offset_g = OrderedDict()
 noise_g = OrderedDict()
 gain_g = OrderedDict()
 badpix_g = OrderedDict()
 gg = 0

 start = datetime.now()
 all_cells = []
 checksums = {}

 for gain, mapped_files in gain_mapped_files.items():
    inp = []
    dones = []
    for i in modules:
        qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
        if qm in mapped_files and not mapped_files[qm].empty():
            fname_in = mapped_files[qm].get()
            print("Process file: ", fname_in)
            dones.append(mapped_files[qm].empty())
        else:
            continue
        inp.append((fname_in, i))

    p = partial(characterize_module, max_cells,
               (thresholds_offset_hard, thresholds_offset_sigma,
                thresholds_noise_hard, thresholds_noise_sigma), rawversion, karabo_id, h5path, h5path_idx)
    results = list(map(p, inp))

    for ii, r in enumerate(results):
        i = modules[ii]
        offset, noise,  bp, thiscell, pulseid_checksum = r
        all_cells.append(thiscell)
        qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
        if qm not in offset_g:
            offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2]))
            noise_g[qm] = np.zeros_like(offset_g[qm])

            badpix_g[qm] = np.zeros_like(offset_g[qm], np.uint32)
            checksums[qm] = pulseid_checksum

        offset_g[qm][...] = offset
        noise_g[qm][...] = noise
        badpix_g[qm][...] = bp
    gg +=1

 duration = (datetime.now()-start).total_seconds()
 logger.runtime_summary_entry(success=True, runtime=duration,
                                   total_sequences=total_sequences,
                                   filesize=total_file_size)
 logger.send()
 if len(all_cells) > 0:
    max_cells = np.max(all_cells)
    print(f"Using {max_cells} memory cells")
 else:
    raise ValueError("0 processed memory cells. No raw data available.")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve existing constants for comparison
 clist = ["Offset", "Noise"]
 old_const = {}
 old_mdata = {}

 print('Retrieve pre-existing constants for comparison.')
 detinst = getattr(Detectors, dinstance)
 for qm in offset_g.keys():
    device = getattr(detinst, qm)
    for const in clist:
        condition = Conditions.Dark.DSSC(memory_cells=max_cells,
                                         bias_voltage=bias_voltage,
                                         pulseid_checksum=checksums[qm])

        data, mdata = get_from_db(device,
                                  getattr(Constants.DSSC, const)(),
                                  condition,
                                  None,
                                  cal_db_interface, creation_time=creation_time,
                                  verbosity=2, timeout=cal_db_timeout)

        old_const[const] = data

        if mdata is not None and data is not None:
            time = mdata.calibration_constant_version.begin_at
            old_mdata[const] = time.isoformat()
            os.makedirs(f'{out_folder}/old/', exist_ok=True)
-            save_const_to_h5(mdata, f'{out_folder}/old/')
+            save_const_to_h5(device,
+                             getattr(Constants.DSSC, const)(),
+                             condition, data, file_loc, creation_time,
+                             f'{out_folder}/old/')
        else:
            old_mdata[const] = "Not found"
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()
 for i in modules:
    qm = f"Q{i//4+1}M{i%4+1}"
    try:
        res[qm] = {'Offset': offset_g[qm],
                   'Noise': noise_g[qm],
                   #TODO: No badpixelsdark, yet.
                   #'BadPixelsDark': badpix_g[qm]
                   }
    except Exception as e:
        print(f"Error: No constants for {qm}: {e}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Push the same constant two different times.
 # One with the generated pulseID check sum setting for the offline calibration.
 # And another for the online calibration as it doesn't have this pulseID checksum, yet.
+md = None
 for dont_use_pulseIds in [True, False]:
    for qm in res.keys():
        detinst = getattr(Detectors, dinstance)
        device = getattr(detinst, qm)

        for const in res[qm].keys():
-
-            metadata = ConstantMetaData()
            dconst = getattr(Constants.DSSC, const)()
            dconst.data = res[qm][const]
-            metadata.calibration_constant = dconst

            pidsum = None if dont_use_pulseIds else checksums[qm]
            # set the operating condition
            condition = Conditions.Dark.DSSC(memory_cells=max_cells,
                                             bias_voltage=bias_voltage,
                                             pulseid_checksum=pidsum)
-
-            metadata.detector_condition = condition
-
-            # specify the a version for this constant
-            if creation_time is None:
-                metadata.calibration_constant_version = Versions.Now(device=device)
-            else:
-                metadata.calibration_constant_version = Versions.Timespan(device=device, start=creation_time)
-
            if db_output:
-                try:
-                    metadata.send(cal_db_interface, timeout=cal_db_timeout)
-                except Exception as e:
-                    print("Error", e)
+                md = send_to_db(device, dconst, condition, file_loc,
+                                cal_db_interface, creation_time=creation_time, timeout=cal_db_timeout)

            if local_output:
                # Don't save constant localy two times.
                if dont_use_pulseIds:
-                    save_const_to_h5(metadata, out_folder)
+                    md = save_const_to_h5(device, dconst, condition,
+                                          dconst.data, file_loc,
+                                          creation_time, out_folder)
                    print(f"Calibration constant {const} is stored locally.\n")

        if not dont_use_pulseIds:
-            print("Generated constants with conditions:\n")
+            print("Constants parameter conditions are:\n")
            print(f"• memory_cells: {max_cells}\n• bias_voltage: {bias_voltage}\n"
-                  f"• pulseid_checksum: {pidsum}\n• creation_time: {creation_time}\n")
+                  f"• pulseid_checksum: {pidsum}\n"
+                  f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:code id: tags:

 ``` python
 mnames = []
 for i in modules:
    qm = f"Q{i//4+1}M{i % 4+1}"
    display(Markdown(f'## Position of the module {mnames} and it\'s ASICs##'))
    mnames.append(qm)

 show_processed_modules(dinstance=dinstance, constants=None, mnames=mnames, mode="position")
 ```

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:code id: tags:

 ``` python
 cell = 9
 gain = 0
 out_folder = None
 show_overview(res, cell, gain, out_folder=out_folder, infix="_{}".format(run))
 ```

 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.NOISE_OUT_OF_THRESHOLD.value: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR.value: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value | BadPixels.NOISE_OUT_OF_THRESHOLD.value: ('MIXED', '#DD00DD80')}

 if high_res_badpix_3d:
    display(Markdown("""

    ## Global Bad Pixel Behaviour ##

    The following plots show the results of bad pixel evaluation for all evaluated memory cells.
    Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2.
    This excludes single bad pixels present only in disconnected pixels.
    Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
    Colors encode the bad pixel type, or mixed type.

    """))
    # set rebin_fac to 1 for avoiding rebining and
    # losing real values of badpixels(High resolution).
    gain = 0
    for mod, data in badpix_g.items():
        plot_badpix_3d(data, cols, title=mod, rebin_fac=2)
        plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Aggregate values, and per Cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior.

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(offset_g, "Offset (ADU)", max_cells, entries=1)
 ```

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(noise_g, "Noise (ADU)", max_cells, 0, 100, entries=1)
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixel_aggregate_g = OrderedDict()
 for m, d in badpix_g.items():
    bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)
 create_constant_overview(bad_pixel_aggregate_g, "Bad pixel fraction", max_cells, entries=1)
 ```

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 for const in ['Offset', 'Noise']:
    table = [['','High gain', 'High gain']]
    for qm in res.keys():

        data = np.copy(res[qm][const])

        if old_const[const] is not None:
            dataold = np.copy(old_const[const])

        f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
        n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

        for i, f in enumerate(f_list):
            line = [n_list[i]]
            line.append('{:6.1f}'.format(f(data[...,gain])))
            if old_const[const] is not None:
                line.append('{:6.1f}'.format(f(dataold[...,gain])))
            else:
                line.append('-')

            table.append(line)

    display(Markdown('### {} [ADU], good and bad pixels ###'.format(const)))
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

--- a/notebooks/FastCCD/Characterize_Darks_NewDAQ_FastCCD_NBC_New_Common_Mode.ipynb
+++ b/notebooks/FastCCD/Characterize_Darks_NewDAQ_FastCCD_NBC_New_Common_Mode.ipynb
 %% Cell type:markdown id: tags:

 # FastCCD Dark Characterization

 Author: I. Klačková, S. Hauf, K. Setoodehnia and M. Cascella

 The following notebook provides dark image analysis of the FastCCD detector.

 Dark characterization evaluates offset and noise of the FastCCD detector, corrects the noise for Common Mode (CM), and defines bad pixels relative to offset and CM corrected noise. Bad pixels are then excluded and CM corrected noise is recalculated excluding the bad pixels. Resulting offset and CM corrected noise maps, as well as the bad pixel map are sent to the calibration database.

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB" # ipcluster profile to use
 in_folder = "/gpfs/exfel/exp/SCS/201930/p900074/raw" # input folder, required
-out_folder = '/gpfs/exfel/exp/SCS/201930/p900074/scratch/' # output folder, required
+out_folder = '/gpfs/exfel/data/scratch/ahmedk/test/fastccd' # output folder, required
 sequence = 0 # sequence file to use
 run = 351 # which run to read data from, required

 karabo_da = ['DA05'] # data aggregators
 karabo_id = "SCS_CDIDET_FCCD2M" # karabo prefix of PNCCD devices
 receiver_id = "FCCD" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5' # the template to use to access data
 h5path = '/INSTRUMENT/{}/DAQ/{}:daqOutput/data/image/pixels' # path to the data in the HDF5 file
 h5path_t = '/CONTROL/{}/CTRL/LSLAN/inputA/crdg/value'  # path to find temperature
 h5path_cntrl = '/RUN/{}/DET/FCCD'  # path to find control data

 use_dir_creation_date = True # To be used to retrieve calibration constants later on (for database time derivation)
 cal_db_interface = "tcp://max-exfl016:8020" # the calibration database interface to use
 cal_db_timeout = 300000 # timeout on calibration database requests
 db_output = False # Output constants to the calibration database
 local_output = True # output constants locally

 number_dark_frames = 0 # number of images to be used, if set to 0 all available images are used
 # The two operation modes for FastCCD have fixed names which cannot be changed:
 operation_mode = "FF" # FS stands for frame-store and FF for full-frame opeartion.
 temp_limits = 5 # to find calibration constants later on, the sensor temperature is allowed to vary by 5 units
 bad_pixel_offset_sigma = 5. # Any pixel whose offset is beyond 5 standard deviations, is a bad pixel
 bad_pixel_noise_sigma = 5. # Any pixel whose noise is beyond 5 standard deviations, is a bad pixel
 sigmaNoise = 5. # Any pixel whose signal exceeds 'sigmaNoise'*noiseCM (common mode corrected noise) will be masked
 fix_temperature = 0. # Fixed operation temperature in Kelvins. If set to 0, mean value of the data file's temperature is
                     # used.
 chunkSize = 100 # Number of images to read per chunk
 cpuCores = 40 # Specifies the number of running cpu cores
 commonModeAxis = 1 # Axis along which common mode will be calculated (0: along rows, 1: along columns)
 run_parallel = True # For parallel computation


 # According to our gain calibration using 55Fe source, we have the following conversion gains (e.g., 1 ADU = 6.3 e-):
 ADU_to_electron_upper_hg = 6.3 # for upper hemisphere and high gain
 ADU_to_electron_lower_hg = 6.4 # for lower hemisphere and high gain
 ADU_to_electron_upper_mg = 23.4 # for upper hemisphere and medium gain
 ADU_to_electron_lower_mg = 23.4 # for lower hemisphere and medium gain
 ADU_to_electron_upper_lg = 49.3 # for upper hemisphere and low gain
 ADU_to_electron_lower_lg = 47.3 # for lower hemisphere and low gain
 ```

 %% Cell type:code id: tags:

 ``` python
 # Required Packages:
-
 import copy
 import datetime
-import os
 import time
+import os
 import warnings
 warnings.filterwarnings('ignore')

 import h5py
 from IPython.display import display, Markdown
 import matplotlib.pyplot as plt
 %matplotlib inline
 import numpy as np
 from prettytable import PrettyTable

-from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions
-from iCalibrationDB.detectors import DetectorTypes
-from cal_tools.tools import get_dir_creation_date, save_const_to_h5, get_random_db_interface
+from cal_tools.tools import get_dir_creation_date, get_random_db_interface, save_const_to_h5
 from cal_tools.enums import BadPixels
+from iCalibrationDB import Constants, Conditions, Detectors, Versions
+from iCalibrationDB.detectors import DetectorTypes
 from XFELDetAna import xfelpyanatools as xana
 from XFELDetAna import xfelpycaltools as xcal
 from XFELDetAna.detectors.fastccd import readerh5 as fastccdreaderh5
 from XFELDetAna.util import env
 env.iprofile = cluster_profile
 import XFELDetAna.xfelprofiler as xprof
 profiler = xprof.Profiler()
 profiler.disable()
 from XFELDetAna.xfelreaders import ChunkReader
 ```

 %% Cell type:code id: tags:

 ``` python
 # Output Folder Creation:
 if not os.path.exists(out_folder):
    os.makedirs(out_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Number of Images:
 def nImagesOrLimit(nImages, limit):
    if limit == 0:
        return nImages
    else:
        return min(nImages, limit)
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = 'proposal:{} runs:{}'.format(proposal, run)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Detector Operation Mode, Calibration Database Settings, and Some Initial Run Parameters & Paths:

 display(Markdown('### Initial Settings'))
 if operation_mode == "FS":
    x = 960 # rows of the FastCCD to analyze in FS mode
    y = 960 # columns of the FastCCD to analyze in FS mode
    print('\nYou are analyzing data in FS mode.')
 else:
    x = 1934 # rows of the FastCCD to analyze in FF mode
    y = 960 # columns of the FastCCD to analyze in FF mode
    print('\nYou are analyzing data in FF mode.')

 ped_dir = "{}/r{:04d}".format(in_folder, run)
 fp_name = path_template.format(run, karabo_da[0])
 fp_path = '{}/{}'.format(ped_dir, fp_name)
 filename = fp_path.format(sequence)
 h5path = h5path.format(karabo_id, receiver_id)
 h5path_t = h5path_t.format(karabo_id)
 h5path_cntrl = h5path_cntrl.format(karabo_id)

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)

 cal_db_interface = get_random_db_interface(cal_db_interface)
 print('Calibration database interface: {}'.format(cal_db_interface))
 print("Sending constants to the calibration database: {}".format(db_output))
 print("HDF5 path to data: {}".format(h5path))
 print("Run number: {}".format(run))
 print("Reading data from: {}".format(filename))
 if creation_time:
    print("Using {} as creation time".format(creation_time.isoformat()))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Reading Parameters such as Detector Bias, Gain, etc. from the Data:

 memoryCells = 1 # FastCCD has 1 memory cell
 sensorSize = [x, y]
 blockSize = [sensorSize[0]//2, sensorSize[1]] # Sensor area will be analysed according to blocksize
 xcal.defaultBlockSize = blockSize
 nImages = fastccdreaderh5.getDataSize(filename, h5path)[0] # Specifies total number of images to proceed
 nImages = nImagesOrLimit(nImages, number_dark_frames)
 profile = False

 with h5py.File(filename, 'r') as f:
    bias_voltage = int(f['{}/biasclock/bias/value'.format(h5path_cntrl)][0])
    det_gain = int(f['{}/exposure/gain/value'.format(h5path_cntrl)][0])
    integration_time = int(f['{}/exposure/exposure_time/value'.format(h5path_cntrl)][0])
    temperature = np.mean(f[h5path_t])
    temperature = round(temperature, 2)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Printing the Parameters Read from the Data File:

 display(Markdown('### Evaluated Parameters'))
 print("Number of dark images to analyze:", nImages)

 gain_dict = {
        "high gain" : 8,
        "medium gain" : 2,
        "low gain" : 1,
        "auto gain" : 0
    }

 for gain, value in gain_dict.items():
    if det_gain == value:
        gain_setting = gain

 print("Bias voltage is {} V.".format(bias_voltage))
 print("Detector gain is set to x{} ({}).".format(det_gain, gain_setting))
 print("Detector integration time is set to {}".format(integration_time), 'ms.')


 if fix_temperature != 0.:
    print("Using a fixed temperature of {} K".format(fix_temperature))
 else:
    # This is needed while sending the
    # calibration constant to the DB later
    fix_temperature = temperature + 273.15
    print("Temperature is not fixed.")
    print("Mean temperature was {:0.2f} °C / {:0.2f} K".format(temperature, fix_temperature))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Reading Files in Chunks:

 # Chunk reader returns an iterator to access the data in the file within the ranges:

 reader = ChunkReader(filename, fastccdreaderh5.readData, nImages, chunkSize, path = h5path, pixels_x = sensorSize[0],
                     pixels_y = sensorSize[1],)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Calculator:

 # noiseCal is a noise map calculator, which internally also produces a per-pixel mean map, i.e. an offset map:

 noiseCal = xcal.NoiseCalculator(sensorSize, memoryCells, cores=cpuCores, blockSize=blockSize, runParallel=run_parallel)
 ```

 %% Cell type:markdown id: tags:

 ### First Iteration

 Characterization of dark images with purpose to create dark maps (offset, noise and bad pixel maps) is an iterative process. Firstly, initial offset and noise maps are produced from raw dark data.

 %% Cell type:code id: tags:

 ``` python
 counter1 = 1 # To count how many "if data.shape[2] >= chunkSize" instances are there.
 counter2 = 0 # To count how many "if data.shape[2] < chunkSize" instances are there.
 chunkSize_new = 0 # See below

 for data in reader.readChunks():
    data = np.bitwise_and(data.astype(np.uint16), 0b0011111111111111).astype(np.float32)
    dx = np.count_nonzero(data, axis=(0, 1))
    data = data[:,:,dx != 0]
    # Some sequences may have less than 500 frames in them. To find out how many images there are, we will temporarily
    # change chunkSize to be the same as whatever number of frames the last chunk of data has:
    if data.shape[2] < chunkSize:
        chunkSize_new = data.shape[2]
        print("Number of images are less than chunkSize. chunkSize is temporarily changed to {}."
              .format(chunkSize_new))
        images = images + chunkSize_new
        counter2 += 1
    else:
        images = counter1*chunkSize + counter2*chunkSize_new
        counter1 += 1

    noiseCal.fill(data) # Filling calculators with data
    chunkSize = 100 # resetting the chunkSize to its default value for the next sequence or data-chunk

 print('A total number of {} images are processed.'.format(images))
 ```

 %% Cell type:code id: tags:

 ``` python
 offsetMap = noiseCal.getOffset() # Producing offset map
 noiseMap = noiseCal.get() # Producing noise map
 noiseCal.reset() # Resetting noise calculator
 print("Initial maps are created.")
 ```

 %% Cell type:markdown id: tags:

 ### Offset and Noise Maps prior to Common Mode Correction

 In the following, the histogram of the FastCCD offset, FastCCD offset map, as well as the initial uncorrected noise map are plotted:

 %% Cell type:code id: tags:

 ``` python
 #************** OFFSET MAP HISTOGRAM ***********#
 ho, co = np.histogram(offsetMap.flatten(), bins=700) # ho = offset histogram; co = offset bin centers
 do = {'x': co[:-1],
     'y': ho,
     'y_err': np.sqrt(ho[:]),
     'drawstyle': 'bars',
     'color': 'cornflowerblue',
     'label': 'Raw Signal (ADU)'
     }
 fig = xana.simplePlot(do, figsize='1col', aspect=1, x_label = 'Raw Signal (ADU)', y_label="Counts",
                      x_range = (3400, 4400), title = 'Offset Histogram')
 #fig.savefig('Offset_Hist.svg', format='svg', dpi=1200, bbox_inches='tight')

 t0 = PrettyTable()
 t0.title = "Raw Signal"
 t0.field_names = ["Mean", "Median", "Standard Deviation"]
 t0.add_row(["{:0.3f} (ADU)".format(np.mean(data)), "{:0.3f} (ADU)".format(np.median(data)), "{:0.3f} (ADU)"
            .format(np.std(data))])
 print(t0,'\n')

 #************** OffsetMAP *******************#
 fig = xana.heatmapPlot(offsetMap[:,:,0], x_label='Column Number', y_label='Row Number',  aspect=1,
                       x_range=(0, y), y_range=(0, x), vmin=3000, vmax=4300, lut_label='Offset (ADU)',
                       panel_x_label='Columns Stat (ADU)', panel_y_label='Rows Stat (ADU)',
                       panel_top_low_lim = 3000, panel_top_high_lim = 4500, panel_side_low_lim = 3000,
                       panel_side_high_lim = 5000, title = 'OffsetMap')
 #fig.savefig('RawOffsetMap.pdf', format='pdf', dpi=400, bbox_inches='tight')

 #************** Raw NoiseMAP *******************#
 fig = xana.heatmapPlot(noiseMap[:,:,0], x_label='Column Number', y_label='Row Number', aspect=1,
                       lut_label='Uncorrected Noise (ADU)', x_range=(0, y),
                       y_range=(0, x), vmax=2*np.mean(noiseMap), panel_x_label='Columns Stat (ADU)',
                       panel_y_label='Rows Stat (ADU)', panel_top_low_lim = 0, panel_top_high_lim = 20,
                       panel_side_low_lim = 0, panel_side_high_lim = 50, title = 'Uncorrected NoiseMap')
 #fig.savefig('RawNoiseMap.pdf', format='pdf', dpi=400, bbox_inches='tight')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Offset Correction:

 offsetCorrection = xcal.OffsetCorrection(sensorSize, offsetMap, nCells = memoryCells, cores=cpuCores, gains=None,
                                         runParallel=run_parallel, blockSize=blockSize)

 offsetCorrection.debug()

 # Common Mode Correction:
 # This is the new method subtracting the median of all pixels that are read out at the same time along a row:
 cmCorrection = xcal.CommonModeCorrection([data.shape[0], data.shape[1]], [data.shape[0]//2, data.shape[1]],
                                         commonModeAxis, parallel=run_parallel, dType=np.float32, stride=10,
                                         noiseMap=noiseMap.astype(np.float32), minFrac=0)

 cmCorrection.debug()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Histogram Calculators:

 # For offset corrected data:
 histCalCorrected = xcal.HistogramCalculator(sensorSize, bins=400, range=[-200, 200], memoryCells=memoryCells,
                                            cores=cpuCores, gains=None, blockSize=blockSize)
 # For common mode corrected data:
 histCalCMCorrected = xcal.HistogramCalculator(sensorSize, bins=400, range=[-200, 200], memoryCells=memoryCells,
                                              cores=cpuCores, gains=None, blockSize=blockSize)
 ```

 %% Cell type:markdown id: tags:

 ### Second Iteration

 During the second iteration, the data are offset corrected and then common mode corrected to produced a common mode corrected noise map. The common mode correction is calculated by subtracting out the median of all pixels that are read out at the same time along a row.

 %% Cell type:code id: tags:

 ``` python
 counter1 = 1 # To count how many "if data.shape[2] >= chunkSize" instances are there.
 counter2 = 0 # To count how many "if data.shape[2] < chunkSize" instances are there.
 chunkSize_new = 0 # See below

 for data in reader.readChunks():

    data = np.bitwise_and(data.astype(np.uint16), 0b0011111111111111).astype(np.float32)
    dx = np.count_nonzero(data, axis=(0, 1))
    data = data[:,:,dx != 0]
    # Some sequences may have less than 500 frames in them. To find out how many images there are, we will temporarily
    # change chunkSize to be the same as whatever number of frames the last chunk of data has:
    if data.shape[2] < chunkSize:
        chunkSize_new = data.shape[2]
        print("Number of images are less than chunkSize. chunkSize is temporarily changed to {}."
              .format(chunkSize_new))
        images = images + chunkSize_new
        counter2 += 1
    else:
        images = counter1*chunkSize + counter2*chunkSize_new
        counter1 += 1

    data = offsetCorrection.correct(data) # Offset correction
    offset_corr_data = copy.copy(data) # I am copying this so that I can have access to it in the table below
    histCalCorrected.fill(data)
    cellTable=np.zeros(data.shape[2], np.int32) # Common mode correction
    data = cmCorrection.correct(data.astype(np.float32), cellTable=cellTable) # Common mode correction
    histCalCMCorrected.fill(data)
    noiseCal.fill(data)  # Filling noise calculator with common mode (CM) corrected data
    chunkSize = 100 # resetting the chunkSize to its default value for the next sequence or data-chunk

 print('A total number of {} images are processed.'.format(images))
 print("Offset and common mode corrections are applied.")
 ```

 %% Cell type:code id: tags:

 ``` python
 noiseMapCM = noiseCal.get() # Produces CM corrected noise map
 ho, eo, co, so = histCalCorrected.get()
 hCM, eCM, cCM, sCM = histCalCMCorrected.get()
 ```

 %% Cell type:code id: tags:

 ``` python
 # We are copying these so that we can replot them later after the calculators are reset:

 ho_second_trial = copy.copy(ho)
 co_second_trial = copy.copy(co)
 hCM_second_trial = copy.copy(hCM)
 cCM_second_trial = copy.copy(cCM)
 ```

 %% Cell type:markdown id: tags:

 ### Signal after Offset and Common Mode Corrections

 Here, the offset corrected signal is compared to the common-mode corrected signal (in the form of binned histograms):

 %% Cell type:code id: tags:

 ``` python
 do = [{'x': co,
     'y': ho,
     'y_err': np.sqrt(ho[:]),
     'drawstyle': 'steps-mid',
     'color': 'cornflowerblue',
     'label': 'Offset Corrected Signal'
     },
     {'x': cCM,
     'y': hCM,
     'y_err': np.sqrt(hCM[:]),
     'drawstyle': 'steps-mid',
     'color': 'red',
     'label': 'Common Mode Corrected Signal'
     }]

 fig = xana.simplePlot(do, figsize='2col', aspect=1, x_label = 'Corrected Signal (ADU)', y_label="Counts",
                      x_range = (-20, 20), legend='top-right-frame-1col', title = 'Corrected Signal - 2nd Iteration')
 #fig.savefig('Corrected_Signal_Hist_1.svg', format='svg', dpi=1200, bbox_inches='tight')

 t0 = PrettyTable()
 t0.title = "Comparison of the First Round of Corrections - Bad Pixels Not Excluded"
 t0.field_names = ["After Offset Correction", "After Common Mode Correction"]
 t0.add_row(["Mean: {:0.3f} (ADU)".format(np.mean(offset_corr_data)), "Mean: {:0.3f} (ADU)".format(np.mean(data))])
 t0.add_row(["Median: {:0.3f} (ADU)".format(np.median(offset_corr_data)), "Median: {:0.3f} (ADU)"
            .format(np.median(data))])
 t0.add_row(["Standard Deviation: {:0.3f} (ADU)".format(np.std(offset_corr_data)), "Standard Deviation: {:0.3f} (ADU)"
            .format(np.std(data))])
 print(t0,'\n')
 ```

 %% Cell type:markdown id: tags:

 ### Noise Map after Common Mode Correction

 In the following, the effect of common mode correction on the noise is shown. Finally common mode corrected noise map (noiseMapCM) is displayed and compared to the initial uncorrected noise map:

 %% Cell type:code id: tags:

 ``` python
 #*****NOISE MAP HISTOGRAM FROM THE COMMON MODE CORRECTED DATA*******#
 hn, cn = np.histogram(noiseMap.flatten(), bins=200, range=(0, 40)) # hn: histogram of noise, cn: bin centers for noise
 hn_CM, cn_CM = np.histogram(noiseMapCM.flatten(), bins=200, range=(0, 40))

 dn = [{'x': cn[:-1],
     'y': hn,
     #'y_err': np.sqrt(hn[:]),
     'drawstyle': 'steps-mid',#'bars',
     'color': 'blue',#'cornflowerblue',
     'label': 'Uncorrected Noise'
     },
    {'x': cn_CM[:-1],
     'y': hn_CM,
     #'y_err': np.sqrt(hn_CM[:]),
     'drawstyle': 'steps-mid',#'bars',
     'color': 'crimson',#'red',#'cornflowerblue',
     #'ecolor': 'crimson',
     'label': 'Common Mode Corrected Noise'
     }]
 fig = xana.simplePlot(dn, figsize='2col', aspect=1, x_label = 'Noise (ADU)', y_label="Counts",
                      x_range=(0, 40), y_range=(0, 1e6), y_log=True, legend='top-center-frame-1col',
                      title = 'Noise Comparison')

 #fig.savefig('Noise_CM_1_Hist.svg', format='svg', dpi=1200, bbox_inches='tight')

 fig = xana.heatmapPlot(noiseMapCM[:,:,0], aspect=1, x_label='Column Number', y_label='Row Number',
                       lut_label='Common Mode Corrected Noise (ADU)', x_range=(0,y), y_range=(0,x),
                       vmax=2*np.mean(noiseMapCM), panel_top_low_lim = 0, panel_top_high_lim = 20,
                       panel_side_low_lim = 0, panel_side_high_lim = 50, title = 'Common Mode Corrected Noise',
                       panel_x_label='Columns Stat (ADU)', panel_y_label='Rows Stat (ADU)')

 #fig.savefig('NoiseMapCM.pdf', format='pdf', dpi=400, bbox_inches='tight')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Resetting the calculators so we can do a third iteration later:

 noiseCal.reset()
 histCalCorrected.reset()
 histCalCMCorrected.reset()
 cmCorrection.reset()
 ```

 %% Cell type:markdown id: tags:

 ### Initial BadPixelMap
 This is generated based on the offset and CM corrected noise maps:

 %% Cell type:code id: tags:

 ``` python
 bad_pixels = np.zeros(offsetMap.shape, np.uint32)
 mnoffset = np.nanmedian(offsetMap)
 stdoffset = np.nanstd(offsetMap)
 bad_pixels[(offsetMap < mnoffset-bad_pixel_offset_sigma*stdoffset) |
           (offsetMap > mnoffset+bad_pixel_offset_sigma*stdoffset)] = BadPixels.OFFSET_OUT_OF_THRESHOLD.value

 mnnoise = np.nanmedian(noiseMapCM)
 stdnoise = np.nanstd(noiseMapCM)
 bad_pixels[(noiseMapCM < mnnoise-bad_pixel_noise_sigma*stdnoise) |
           (noiseMapCM > mnnoise+bad_pixel_noise_sigma*stdnoise)] = BadPixels.NOISE_OUT_OF_THRESHOLD.value

 fig = xana.heatmapPlot(np.log2(bad_pixels[:,:,0]),aspect=1, x_label='Column Number', y_label='Row Number',
                       lut_label='2^(Assigned Value to Bad Pixels)', x_range=(0, y), y_range=(0, x),
                       title = 'Bad Pixels Map Excluding Non-Sensitive\n Areas in Middle of CCD',
                       panel_x_label= 'Columns Stat', panel_y_label='Rows Stat')
 ```

 %% Cell type:markdown id: tags:

 Here, we are adding the pixels in the hole (center of the FastCCD) as well as 4 rows in the center of the detector, which we call overscan region:

 %% Cell type:code id: tags:

 ``` python
 def create_circular_mask(h, w, center=None, radius=None):

    import numpy as np
    import math

    if center is None: # use the middle of the image
        center = [int(w/2), int(h/2)]
    if radius is None: # use the smallest distance between the center and image walls
        radius = min(center[0], center[1], w-center[0], h-center[1])

    Y, X = np.ogrid[:h, :w]
    dist_from_center = np.sqrt((X - center[0])**2 + (Y-center[1])**2)

    mask = dist_from_center < radius
    return mask
 ```

 %% Cell type:code id: tags:

 ``` python
 mask = np.zeros(offsetMap.shape, np.uint32)


 # Defining a circular mask + a rectangular mask (overscan) for the hole in the middle of the CCD:
 h, w = (x, y)
 hole_mask_bool = create_circular_mask(h-4, w, radius=61.5, center=(w//2, (h-4)//2))
 hole_mask = np.zeros(hole_mask_bool.shape, np.uint32)
 hole_mask[hole_mask_bool] = BadPixels.NON_SENSITIVE.value

 overscan_mask = np.full((4, w), BadPixels.OVERSCAN.value)

 mask[:,:,0] = np.insert(hole_mask, (h-4)//2, overscan_mask, 0)

 # Assigning this masked area as bad pixels:
 bad_pixels = np.bitwise_or(bad_pixels, mask)

 fig = xana.heatmapPlot(np.log2(bad_pixels[:,:,0]),aspect=1, x_label='Column Number', y_label='Row Number',
                       lut_label='2^(Assigned Value to Bad Pixels)', x_range=(0, y), y_range=(0, x),
                       panel_top_low_lim = 0, panel_top_high_lim = 20, panel_side_low_lim = 0, panel_side_high_lim = 20,
                       title = 'Bad Pixels Map Including Non-Sensitive\n Areas in Middle of CCD',
                       panel_x_label='Columns Stat', panel_y_label='Rows Stat', vmax=20)

 #fig.savefig('BadPixelMap_1.svg', format='svg', dpi=1200, bbox_inches='tight')
 ```

 %% Cell type:markdown id: tags:

 ### Third Iteration

 During the third iteration, the bad pixel map is applied to the data. Bad pixels are masked. Offset and common mode corrections are applied once again to the data, which now have bad pixdels excluded, to produce a common mode corrected noise map:

 %% Cell type:code id: tags:

 ``` python
 # bad_pixels is an array of (1934, 960, 1) filled with zeros except at indices where we have the actual bad pixels, whose
 # values are set to be: 2 (2^1: BadPixels.OFFSET_OUT_OF_THRESHOLD.value), or
 # 262144 (2^18: BadPixels.OVERSCAN.value), or 524288 (2^19: BadPixels.NON_SENSITIVE.value). These indices can be found
 # using np.argwhere(bad_pixels != 0)

 event_threshold = sigmaNoise*np.median(noiseMapCM) # for exclusion of possible cosmic ray events
 noiseCal.setBadPixelMask(bad_pixels != 0)
 ```

 %% Cell type:code id: tags:

 ``` python
 counter1 = 1 # To count how many "if data.shape[2] >= chunkSize" instances are there.
 counter2 = 0 # To count how many "if data.shape[2] < chunkSize" instances are there.
 chunkSize_new = 0 # See below

 for data in reader.readChunks():
    #data = data.astype(np.float32)
    data = np.bitwise_and(data.astype(np.uint16), 0b0011111111111111).astype(np.float32)
    dx = np.count_nonzero(data, axis=(0, 1))
    data = data[:,:,dx != 0]
    # Some sequences may have less than 500 frames in them. To find out how many images there are, we will temporarily
    # change chunkSize to be the same as whatever number of frames the last chunk of data has:
    if data.shape[2] < chunkSize:
        chunkSize_new = data.shape[2]
        print("Number of images are less than chunkSize. chunkSize is temporarily changed to {}."
              .format(chunkSize_new))
        images = images + chunkSize_new
        counter2 += 1
    else:
        images = counter1*chunkSize + counter2*chunkSize_new
        counter1 += 1
    data_copy = offsetCorrection.correct(copy.copy(data))
    cellTable=np.zeros(data_copy.shape[2], np.int32)
    data_copy = cmCorrection.correct(data_copy.astype(np.float32), cellTable=cellTable)
    data[data_copy > event_threshold] = np.nan # cosmic rays
    data = np.ma.MaskedArray(data, np.isnan(data), fill_value=0) # masking cosmics, the default fill_value is 1e+20
    data = offsetCorrection.correct(data)
    offset_corr_data2 = copy.copy(data) # I am copying this so that I can have access to it in the table below
    histCalCorrected.fill(data)
    cellTable=np.zeros(data.shape[2], np.int32)
    data = cmCorrection.correct(data.astype(np.float32), cellTable=cellTable)
    histCalCMCorrected.fill(data)
    noiseCal.fill(data)
    chunkSize = 100 # resetting the chunkSize to its default value for the next sequence or data-chunk

 print('A total number of {} images are processed.'.format(images))
 print("Final iteration is Performed.")
 ```

 %% Cell type:code id: tags:

 ``` python
 noiseMapCM_2nd = noiseCal.get().filled(0) # the masked pixels are filled with zero
 ho2, eo2, co2, so2 = histCalCorrected.get()
 hCM2, eCM2, cCM2, sCM2 = histCalCMCorrected.get()
 ```

 %% Cell type:markdown id: tags:

 ### Plots of the Final Results

 The following plot and table compare the offset and common mode corrected signal with and without the bad pixels:

 %% Cell type:code id: tags:

 ``` python
 do_Final = [{'x': co_second_trial,
     'y': ho_second_trial,
     'y_err': np.sqrt(ho_second_trial[:]),
     'drawstyle': 'steps-mid',
     'color': 'blue',#'cornflowerblue',
     'errorstyle': 'bars',
     'label': 'Offset Corrected Signal, Bad Pixels Included - 2nd Trial'
     },
    {'x': cCM_second_trial,
     'y': hCM_second_trial,
     'y_err': np.sqrt(hCM_second_trial[:]),
     'drawstyle': 'steps-mid',
     'color': 'red',
     'errorstyle': 'bars',
     'ecolor': 'crimson',
     'label': 'Common Mode Corrected Signal, Bad Pixels Included - 2nd Trial'
     },
    {'x': co2,
     'y': ho2,
     'y_err': np.sqrt(ho2[:]),
     'drawstyle': 'steps-mid',
     'color': 'black', #'cornflowerblue',
     'errorstyle': 'bars',
     'label': 'Offset Corrected Signal, Bad Pixels Excluded - 3rd Trial'
     },
    {'x': cCM2,
     'y': hCM2,
     'y_err': np.sqrt(hCM2[:]),
     'drawstyle': 'steps-mid',
     'color': 'orange', #'cornflowerblue',
     'errorstyle': 'bars',
     'label': 'Common Mode Corrected Signal, Bad Pixels Excluded - 3rd Trial'
     }]

 fig = xana.simplePlot(do_Final, figsize='2col', aspect=1, x_label = 'Corrected Signal (ADU)',
                      y_label="Counts (Logarithmic Scale)", y_log=True, x_range=(-40, 40),
                      legend='bottom-left-frame-1col', title = 'Comparison of Corrected Signal')
 #fig.savefig('Corrected_Signal_Hist_2.svg', format='svg', dpi=1200, bbox_inches='tight')

 # offset_corr_data2 and data most likely have some nan's => I am going to use nanmean, nanmedian and nanstd functions:
 t0 = PrettyTable()
 t0.title = "Comparison of the Second Round of Corrections - Bad Pixels Excluded"
 t0.field_names = ["After Offset Correction", "After Common Mode Correction"]
 t0.add_row(["Mean: {:0.3f} (ADU)".format(np.nanmean(offset_corr_data2)), "Mean: {:0.3f} (ADU)".format(np.nanmean(data))])
 t0.add_row(["Median: {:0.3f} (ADU)".format(np.nanmedian(offset_corr_data2)), "Median: {:0.3f} (ADU)"
            .format(np.nanmedian(data))])
 t0.add_row(["Standard Deviation: {:0.3f} (ADU)".format(np.nanstd(offset_corr_data2)),
            "Standard Deviation: {:0.3f} (ADU)".format(np.nanstd(data))])
 print(t0,'\n')
 ```

 %% Cell type:markdown id: tags:

 ### Final NoiseMap

 The effect of exclusion of bad pixels on common mode corrected noise is shown below. Finally common mode corrected noise map with bad pixels excluded (noiseMapCM_2nd) is displayed:

 %% Cell type:code id: tags:

 ``` python
 #*****NOISE MAP HISTOGRAM FROM THE COMMON MODE CORRECTED DATA*******#
 hn_CM2, cn_CM2 = np.histogram(noiseMapCM_2nd.flatten(), bins=200, range=(0, 40))

 dn2 = [{'x': cn[:-1],
     'y': hn,
     #'y_err': np.sqrt(hn[:]),
     'drawstyle': 'steps-mid',#'bars',
     'color': 'blue', #'cornflowerblue',
     'label': 'Uncorrected Noise'
     },
    {'x': cn_CM[:-1],
     'y': hn_CM,
     #'y_err': np.sqrt(hn_CM[:]),
     'drawstyle': 'steps-mid',
     'color': 'red',
     #'ecolor': 'crimson',
     'label': 'Common Mode Corrected Noise prior to Bad Pixels Exclusion'
     },
    {'x': cn_CM2[:-1],
     'y': hn_CM2,
     #'y_err': np.sqrt(hn_CM2[:]),
     'drawstyle': 'steps-mid',
     'color': 'black', #'cornflowerblue',
     'label': 'Common Mode Corrected Noise after Bad Pixels Exclusion'
     }]

 fig = xana.simplePlot(dn2, figsize='2col', aspect = 1, x_label = 'Noise (ADU)', y_label="Counts", y_log=True,
                      x_range=(0, 40), y_range=(0, 1e6), legend='top-right-frame-1col', title = 'Final Noise Comparison')

 #fig.savefig('Noise_Hist_2.svg', format='svg', dpi=1200, bbox_inches='tight')

 fig = xana.heatmapPlot(np.log2(noiseMapCM_2nd[:,:,0]), aspect=1, x_label='Column Number', y_label='Row Number',
                       lut_label='Noise (ADU)', x_range=(0, y), y_range=(0, x), vmax=2*np.mean(noiseMapCM_2nd),
                       title = 'Final Common Mode Corrected Noise\n (Bad Pixels Excluded)',
                       panel_x_label='Columns Stat (ADU)', panel_y_label='Rows Stat (ADU)')
 #fig.savefig('NoiseMapCM_2nd.pdf', format='pdf', dpi=400, bbox_inches='tight')
 ```

 %% Cell type:markdown id: tags:

 ### Final Bad Pixel Map

 Lastly, the final bad pixel map is generated based on the OffsetMap and the noiseMapCM_2nd (common mode corrected noise after exclusion of the initial bad pixels):

 %% Cell type:code id: tags:

 ``` python
 bad_pixels = np.zeros(offsetMap.shape, np.uint32)
 mnoffset = np.nanmedian(offsetMap)
 stdoffset = np.nanstd(offsetMap)
 bad_pixels[(offsetMap < mnoffset-bad_pixel_offset_sigma*stdoffset) |
           (offsetMap > mnoffset+bad_pixel_offset_sigma*stdoffset)] = BadPixels.OFFSET_OUT_OF_THRESHOLD.value

 mnnoise = np.nanmedian(noiseMapCM_2nd)
 stdnoise = np.nanstd(noiseMapCM_2nd)
 bad_pixels[(noiseMapCM_2nd < mnnoise-bad_pixel_noise_sigma*stdnoise) |
           (noiseMapCM_2nd > mnnoise+bad_pixel_noise_sigma*stdnoise)] = BadPixels.NOISE_OUT_OF_THRESHOLD.value

 bad_pixels = np.bitwise_or(bad_pixels, mask)
 fig = xana.heatmapPlot(np.log2(bad_pixels[:,:,0]),aspect=1, x_label='Column Number', y_label='Row Number',
                       lut_label='2^(Assigned Value to Bad Pixels)', x_range=(0, y), y_range=(0, x),
                       panel_top_low_lim = 0, panel_top_high_lim = 20, panel_side_low_lim = 0, panel_side_high_lim = 20,
                       title = 'Final Bad Pixels Map', panel_x_label='Columns Stat',
                       panel_y_label='Rows Stat', vmax=20)
 #fig.savefig('BadPixelMap_2.svg', format='svg', dpi=1200, bbox_inches='tight')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Statistics on the Bad Pixels'))
 num_bad_pixels = np.count_nonzero(bad_pixels)
 num_all_pixels = x*y
 percentage_bad_pixels = num_bad_pixels*100/num_all_pixels
 print("Number of bad pixels: {:0.0f}, i.e. {:0.2f}% of all pixels".format(num_bad_pixels, percentage_bad_pixels))
 ```

 %% Cell type:markdown id: tags:

 ### Electronic Noise

 According to Table 6.1 (page 80) of Ivana Klačková's master's thesis: "Conversion gain for the FastCCD high gain is: lower hemisphere = 6.2e-/ADU and upper hemisphere = 6.1e-/ADU." Also, we know that the high gain/medium gain and high gain/low gain ratios are 4 and 8, respectively since high gain = x8, medium gain = x2 and low gain = x1. We do not currently (October - 2019) know the conversion gains for the FastCCD medium and lows gains in electrons. Therefore, we will use those of the high gains (in both hemispheres) together with the gain ratios to convert the noise in ADU to electrons.

 The following Tables present the noise along lower hemisphere, upper hemisphere, and the entire FastCCD detector at different stages. Here, the values in the first table (in ADU and e-) are the mean of noise per pixel, where noise is considered to be the initial uncorrected noise, CM corrected noise after second trial (including bad pixels) and CM corrected noise after third trial (excluding bad pixels).

 The values of the second table (in electrons) are the standard deviation of noise per pixel.

 %% Cell type:code id: tags:

 ``` python
 # noiseMap refers to the initial uncorrected noise, noiseMapCM refers to common mode corrected noise with inclusion of
 # bad pixels, and noiseMapCM_2nd refers to common mode corrected noise without inclusion of bad pixels:

 ADU_to_electron_hg = (ADU_to_electron_upper_hg + ADU_to_electron_lower_hg)/2 # Average of ADU_to_electron for entire CCD
                                                                             # for high gain

 ADU_to_electron_mg = (ADU_to_electron_upper_mg + ADU_to_electron_lower_mg)/2 # Average of ADU_to_electron for entire CCD
                                                                             # for medium gain


 ADU_to_electron_lg = (ADU_to_electron_upper_lg + ADU_to_electron_lower_lg)/2 # Average of ADU_to_electron for entire CCD
                                                                             # for low gain
 ```

 %% Cell type:code id: tags:

 ``` python
 for gain, value in gain_dict.items():

    if det_gain == gain_dict["low gain"]:
        ADU_to_electron = ADU_to_electron_lg
        ADU_to_electron_upper = ADU_to_electron_upper_lg
        ADU_to_electron_lower = ADU_to_electron_lower_lg

    elif det_gain == gain_dict["medium gain"]:
        ADU_to_electron = ADU_to_electron_mg
        ADU_to_electron_upper = ADU_to_electron_upper_mg
        ADU_to_electron_lower = ADU_to_electron_lower_mg

    else: # Here, we assume the auto gain and high gain conversions from ADU to electrons are the same.
        ADU_to_electron = ADU_to_electron_hg
        ADU_to_electron_upper = ADU_to_electron_upper_hg
        ADU_to_electron_lower = ADU_to_electron_lower_hg

 print("Abbreviations:")
 print(" - ED = Entire Detector;\n - LH: Lower Hemisphere;\n - UH: Upper Hemisphere;")
 print(" - CM Noise: Common Mode Corrected Noise;")
 print(" - BP: Bad Pixels\n")

 t0 = PrettyTable()
 t0.title = "Averages of Noise per Pixel"
 t0.field_names = ["Uncorrected Noise", "CM Noise, BP Incl.", "CM Noise, BP Excl."]
 t0.add_row(["ED: {:0.2f} ADU = {:0.2f} e-".format(np.mean(noiseMap), np.mean(noiseMap)*ADU_to_electron),
            "ED: {:0.2f} ADU = {:0.2f} e-".format(np.mean(noiseMapCM), np.mean(noiseMapCM)*ADU_to_electron),
            "ED: {:0.2f} ADU = {:0.2f} e-".format(np.mean(noiseMapCM_2nd), np.mean(noiseMapCM_2nd)*ADU_to_electron)])
 t0.add_row(["LH: {:0.2f} ADU = {:0.2f} e-".format(np.mean(noiseMap[:x//2,:]),
                                                  np.mean(noiseMap[:x//2,:])*ADU_to_electron_lower),
            "LH: {:0.2f} ADU = {:0.2f} e-".format(np.mean(noiseMapCM[:x//2,:]),
                                                  np.mean(noiseMapCM[:x//2,:])*ADU_to_electron_lower),
            "LH: {:0.2f} ADU = {:0.2f} e-".format(np.mean(noiseMapCM_2nd[:x//2,:]),
                                                  np.mean(noiseMapCM_2nd[:x//2,:])*ADU_to_electron_lower)])
 t0.add_row(["UH: {:0.2f} ADU = {:0.2f} e-".format(np.mean(noiseMap[x//2:,:]),
                                                  np.mean(noiseMap[x//2:,:])*ADU_to_electron_upper),
            "UH: {:0.2f} ADU = {:0.2f} e-".format(np.mean(noiseMapCM[x//2:,:]),
                                                  np.mean(noiseMapCM[x//2:,:])*ADU_to_electron_upper),
            "UH: {:0.2f} ADU = {:0.2f} e-".format(np.mean(noiseMapCM_2nd[x//2:,:]),
                                                  np.mean(noiseMapCM_2nd[x//2:,:])*ADU_to_electron_upper)])
 print(t0,'\n')

 t1 = PrettyTable()
 t1.title = "Standard Deviations of Noise per Pixel"
 t1.field_names = ["Uncorrected Noise", "CM Noise, BP Incl.", "CM Noise, BP Excl."]
 t1.add_row(["ED: {:0.2f} e-".format(np.std(noiseMap)*ADU_to_electron),
            "ED: {:0.2f} e-".format(np.std(noiseMapCM)*ADU_to_electron),
            "ED: {:0.2f} e-".format(np.std(noiseMapCM_2nd)*ADU_to_electron)])
 t1.add_row(["LH: {:0.2f} e-".format(np.std(noiseMap[:x//2,:])*ADU_to_electron_lower),
            "LH: {:0.2f} e-".format(np.std(noiseMapCM[:x//2,:])*ADU_to_electron_lower),
            "LH: {:0.2f} e-".format(np.std(noiseMapCM_2nd[:x//2,:])*ADU_to_electron_lower)])
 t1.add_row(["UH: {:0.2f} e-".format(np.std(noiseMap[x//2:,:])*ADU_to_electron_upper),
            "UH: {:0.2f} e-".format(np.std(noiseMapCM[x//2:,:])*ADU_to_electron_upper),
            "UH: {:0.2f} e-".format(np.std(noiseMapCM_2nd[x//2:,:])*ADU_to_electron_upper)])
 print(t1)
 ```

 %% Cell type:markdown id: tags:

 ### Calibration Constants

 %% Cell type:code id: tags:

 ``` python
 dictionary = {}
 dictionary['Offset'] = offsetMap.data
 dictionary['Noise'] = noiseMapCM_2nd.data
 dictionary['BadPixelsDark'] = bad_pixels.data
-
+md = None
 for const in dictionary:
-    metadata = ConstantMetaData()
    dconst = getattr(Constants.CCD(DetectorTypes.fastCCD), const)()
    dconst.data = dictionary[const]
-    metadata.calibration_constant = dconst

    condition = Conditions.Dark.CCD(bias_voltage=bias_voltage,
                                integration_time=integration_time,
                                gain_setting=det_gain,
                                temperature=fix_temperature,
                                pixels_x=1934,
                                pixels_y=960)

    for parm in condition.parameters:
        if parm.name == "Sensor Temperature":
            parm.lower_deviation = temp_limits
            parm.upper_deviation = temp_limits

    device = Detectors.fastCCD1
-    metadata.detector_condition = condition
-
-    # Specifying the a version for this constant:
-    if creation_time is None:
-        metadata.calibration_constant_version = Versions.Now(device=device)
-    else:
-        metadata.calibration_constant_version = Versions.Timespan(device=device, start=creation_time)
-
-    metadata.calibration_constant_version.raw_data_location = file_loc

    if db_output:
-        try:
-            metadata.send(cal_db_interface, timeout=cal_db_timeout)
-            print("Calibration constant {} is sent to the database.".format(const))
-        except Exception as e:
-            print(e)
+        md = send_to_db(device, dconst, condition, file_loc,
+                        cal_db_interface, creation_time=creation_time, timeout=cal_db_timeout)

    if local_output:
-        save_const_to_h5(metadata, out_folder)
-        print("Calibration constant {} is stored locally.".format(const))
+        md = save_const_to_h5(device, dconst, condition, dconst.data, file_loc, creation_time, out_folder)
+        print(f"Calibration constant {const} is stored locally.")

-print("Creation time is: {}".format(creation_time))
-print("Raw data location is: {}".format(file_loc))
+print("Constants parameter conditions are:\n")
+print(f"• bias_voltage: {bias_voltage}\n• integration_time: {integration_time}\n"
+      f"• temperature: {temperature_k}\n• gain_setting: {gain_setting}\n"
+      f"• in_vacuum: {in_vacuum}\n"
+      f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:markdown id: tags:

 # FastCCD Dark Characterization

 Author: I. Klačková, S. Hauf, K. Setoodehnia and M. Cascella

 The following notebook provides dark image analysis of the FastCCD detector.

 Dark characterization evaluates offset and noise of the FastCCD detector, corrects the noise for Common Mode (CM), and defines bad pixels relative to offset and CM corrected noise. Bad pixels are then excluded and CM corrected noise is recalculated excluding the bad pixels. Resulting offset and CM corrected noise maps, as well as the bad pixel map are sent to the calibration database.

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB" # ipcluster profile to use
 in_folder = "/gpfs/exfel/exp/SCS/201930/p900074/raw" # input folder, required
-out_folder = '/gpfs/exfel/exp/SCS/201930/p900074/scratch/' # output folder, required
+out_folder = '/gpfs/exfel/data/scratch/ahmedk/test/fastccd' # output folder, required
 sequence = 0 # sequence file to use
 run = 351 # which run to read data from, required

 karabo_da = ['DA05'] # data aggregators
 karabo_id = "SCS_CDIDET_FCCD2M" # karabo prefix of PNCCD devices
 receiver_id = "FCCD" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5' # the template to use to access data
 h5path = '/INSTRUMENT/{}/DAQ/{}:daqOutput/data/image/pixels' # path to the data in the HDF5 file
 h5path_t = '/CONTROL/{}/CTRL/LSLAN/inputA/crdg/value'  # path to find temperature
 h5path_cntrl = '/RUN/{}/DET/FCCD'  # path to find control data

 use_dir_creation_date = True # To be used to retrieve calibration constants later on (for database time derivation)
 cal_db_interface = "tcp://max-exfl016:8020" # the calibration database interface to use
 cal_db_timeout = 300000 # timeout on calibration database requests
 db_output = False # Output constants to the calibration database
 local_output = True # output constants locally

 number_dark_frames = 0 # number of images to be used, if set to 0 all available images are used
 # The two operation modes for FastCCD have fixed names which cannot be changed:
 operation_mode = "FF" # FS stands for frame-store and FF for full-frame opeartion.
 temp_limits = 5 # to find calibration constants later on, the sensor temperature is allowed to vary by 5 units
 bad_pixel_offset_sigma = 5. # Any pixel whose offset is beyond 5 standard deviations, is a bad pixel
 bad_pixel_noise_sigma = 5. # Any pixel whose noise is beyond 5 standard deviations, is a bad pixel
 sigmaNoise = 5. # Any pixel whose signal exceeds 'sigmaNoise'*noiseCM (common mode corrected noise) will be masked
 fix_temperature = 0. # Fixed operation temperature in Kelvins. If set to 0, mean value of the data file's temperature is
                     # used.
 chunkSize = 100 # Number of images to read per chunk
 cpuCores = 40 # Specifies the number of running cpu cores
 commonModeAxis = 1 # Axis along which common mode will be calculated (0: along rows, 1: along columns)
 run_parallel = True # For parallel computation


 # According to our gain calibration using 55Fe source, we have the following conversion gains (e.g., 1 ADU = 6.3 e-):
 ADU_to_electron_upper_hg = 6.3 # for upper hemisphere and high gain
 ADU_to_electron_lower_hg = 6.4 # for lower hemisphere and high gain
 ADU_to_electron_upper_mg = 23.4 # for upper hemisphere and medium gain
 ADU_to_electron_lower_mg = 23.4 # for lower hemisphere and medium gain
 ADU_to_electron_upper_lg = 49.3 # for upper hemisphere and low gain
 ADU_to_electron_lower_lg = 47.3 # for lower hemisphere and low gain
 ```

 %% Cell type:code id: tags:

 ``` python
 # Required Packages:
-
 import copy
 import datetime
-import os
 import time
+import os
 import warnings
 warnings.filterwarnings('ignore')

 import h5py
 from IPython.display import display, Markdown
 import matplotlib.pyplot as plt
 %matplotlib inline
 import numpy as np
 from prettytable import PrettyTable

-from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions
-from iCalibrationDB.detectors import DetectorTypes
-from cal_tools.tools import get_dir_creation_date, save_const_to_h5, get_random_db_interface
+from cal_tools.tools import get_dir_creation_date, get_random_db_interface, save_const_to_h5
 from cal_tools.enums import BadPixels
+from iCalibrationDB import Constants, Conditions, Detectors, Versions
+from iCalibrationDB.detectors import DetectorTypes
 from XFELDetAna import xfelpyanatools as xana
 from XFELDetAna import xfelpycaltools as xcal
 from XFELDetAna.detectors.fastccd import readerh5 as fastccdreaderh5
 from XFELDetAna.util import env
 env.iprofile = cluster_profile
 import XFELDetAna.xfelprofiler as xprof
 profiler = xprof.Profiler()
 profiler.disable()
 from XFELDetAna.xfelreaders import ChunkReader
 ```

 %% Cell type:code id: tags:

 ``` python
 # Output Folder Creation:
 if not os.path.exists(out_folder):
    os.makedirs(out_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Number of Images:
 def nImagesOrLimit(nImages, limit):
    if limit == 0:
        return nImages
    else:
        return min(nImages, limit)
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = 'proposal:{} runs:{}'.format(proposal, run)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Detector Operation Mode, Calibration Database Settings, and Some Initial Run Parameters & Paths:

 display(Markdown('### Initial Settings'))
 if operation_mode == "FS":
    x = 960 # rows of the FastCCD to analyze in FS mode
    y = 960 # columns of the FastCCD to analyze in FS mode
    print('\nYou are analyzing data in FS mode.')
 else:
    x = 1934 # rows of the FastCCD to analyze in FF mode
    y = 960 # columns of the FastCCD to analyze in FF mode
    print('\nYou are analyzing data in FF mode.')

 ped_dir = "{}/r{:04d}".format(in_folder, run)
 fp_name = path_template.format(run, karabo_da[0])
 fp_path = '{}/{}'.format(ped_dir, fp_name)
 filename = fp_path.format(sequence)
 h5path = h5path.format(karabo_id, receiver_id)
 h5path_t = h5path_t.format(karabo_id)
 h5path_cntrl = h5path_cntrl.format(karabo_id)

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)

 cal_db_interface = get_random_db_interface(cal_db_interface)
 print('Calibration database interface: {}'.format(cal_db_interface))
 print("Sending constants to the calibration database: {}".format(db_output))
 print("HDF5 path to data: {}".format(h5path))
 print("Run number: {}".format(run))
 print("Reading data from: {}".format(filename))
 if creation_time:
    print("Using {} as creation time".format(creation_time.isoformat()))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Reading Parameters such as Detector Bias, Gain, etc. from the Data:

 memoryCells = 1 # FastCCD has 1 memory cell
 sensorSize = [x, y]
 blockSize = [sensorSize[0]//2, sensorSize[1]] # Sensor area will be analysed according to blocksize
 xcal.defaultBlockSize = blockSize
 nImages = fastccdreaderh5.getDataSize(filename, h5path)[0] # Specifies total number of images to proceed
 nImages = nImagesOrLimit(nImages, number_dark_frames)
 profile = False

 with h5py.File(filename, 'r') as f:
    bias_voltage = int(f['{}/biasclock/bias/value'.format(h5path_cntrl)][0])
    det_gain = int(f['{}/exposure/gain/value'.format(h5path_cntrl)][0])
    integration_time = int(f['{}/exposure/exposure_time/value'.format(h5path_cntrl)][0])
    temperature = np.mean(f[h5path_t])
    temperature = round(temperature, 2)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Printing the Parameters Read from the Data File:

 display(Markdown('### Evaluated Parameters'))
 print("Number of dark images to analyze:", nImages)

 gain_dict = {
        "high gain" : 8,
        "medium gain" : 2,
        "low gain" : 1,
        "auto gain" : 0
    }

 for gain, value in gain_dict.items():
    if det_gain == value:
        gain_setting = gain

 print("Bias voltage is {} V.".format(bias_voltage))
 print("Detector gain is set to x{} ({}).".format(det_gain, gain_setting))
 print("Detector integration time is set to {}".format(integration_time), 'ms.')


 if fix_temperature != 0.:
    print("Using a fixed temperature of {} K".format(fix_temperature))
 else:
    # This is needed while sending the
    # calibration constant to the DB later
    fix_temperature = temperature + 273.15
    print("Temperature is not fixed.")
    print("Mean temperature was {:0.2f} °C / {:0.2f} K".format(temperature, fix_temperature))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Reading Files in Chunks:

 # Chunk reader returns an iterator to access the data in the file within the ranges:

 reader = ChunkReader(filename, fastccdreaderh5.readData, nImages, chunkSize, path = h5path, pixels_x = sensorSize[0],
                     pixels_y = sensorSize[1],)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Calculator:

 # noiseCal is a noise map calculator, which internally also produces a per-pixel mean map, i.e. an offset map:

 noiseCal = xcal.NoiseCalculator(sensorSize, memoryCells, cores=cpuCores, blockSize=blockSize, runParallel=run_parallel)
 ```

 %% Cell type:markdown id: tags:

 ### First Iteration

 Characterization of dark images with purpose to create dark maps (offset, noise and bad pixel maps) is an iterative process. Firstly, initial offset and noise maps are produced from raw dark data.

 %% Cell type:code id: tags:

 ``` python
 counter1 = 1 # To count how many "if data.shape[2] >= chunkSize" instances are there.
 counter2 = 0 # To count how many "if data.shape[2] < chunkSize" instances are there.
 chunkSize_new = 0 # See below

 for data in reader.readChunks():
    data = np.bitwise_and(data.astype(np.uint16), 0b0011111111111111).astype(np.float32)
    dx = np.count_nonzero(data, axis=(0, 1))
    data = data[:,:,dx != 0]
    # Some sequences may have less than 500 frames in them. To find out how many images there are, we will temporarily
    # change chunkSize to be the same as whatever number of frames the last chunk of data has:
    if data.shape[2] < chunkSize:
        chunkSize_new = data.shape[2]
        print("Number of images are less than chunkSize. chunkSize is temporarily changed to {}."
              .format(chunkSize_new))
        images = images + chunkSize_new
        counter2 += 1
    else:
        images = counter1*chunkSize + counter2*chunkSize_new
        counter1 += 1

    noiseCal.fill(data) # Filling calculators with data
    chunkSize = 100 # resetting the chunkSize to its default value for the next sequence or data-chunk

 print('A total number of {} images are processed.'.format(images))
 ```

 %% Cell type:code id: tags:

 ``` python
 offsetMap = noiseCal.getOffset() # Producing offset map
 noiseMap = noiseCal.get() # Producing noise map
 noiseCal.reset() # Resetting noise calculator
 print("Initial maps are created.")
 ```

 %% Cell type:markdown id: tags:

 ### Offset and Noise Maps prior to Common Mode Correction

 In the following, the histogram of the FastCCD offset, FastCCD offset map, as well as the initial uncorrected noise map are plotted:

 %% Cell type:code id: tags:

 ``` python
 #************** OFFSET MAP HISTOGRAM ***********#
 ho, co = np.histogram(offsetMap.flatten(), bins=700) # ho = offset histogram; co = offset bin centers
 do = {'x': co[:-1],
     'y': ho,
     'y_err': np.sqrt(ho[:]),
     'drawstyle': 'bars',
     'color': 'cornflowerblue',
     'label': 'Raw Signal (ADU)'
     }
 fig = xana.simplePlot(do, figsize='1col', aspect=1, x_label = 'Raw Signal (ADU)', y_label="Counts",
                      x_range = (3400, 4400), title = 'Offset Histogram')
 #fig.savefig('Offset_Hist.svg', format='svg', dpi=1200, bbox_inches='tight')

 t0 = PrettyTable()
 t0.title = "Raw Signal"
 t0.field_names = ["Mean", "Median", "Standard Deviation"]
 t0.add_row(["{:0.3f} (ADU)".format(np.mean(data)), "{:0.3f} (ADU)".format(np.median(data)), "{:0.3f} (ADU)"
            .format(np.std(data))])
 print(t0,'\n')

 #************** OffsetMAP *******************#
 fig = xana.heatmapPlot(offsetMap[:,:,0], x_label='Column Number', y_label='Row Number',  aspect=1,
                       x_range=(0, y), y_range=(0, x), vmin=3000, vmax=4300, lut_label='Offset (ADU)',
                       panel_x_label='Columns Stat (ADU)', panel_y_label='Rows Stat (ADU)',
                       panel_top_low_lim = 3000, panel_top_high_lim = 4500, panel_side_low_lim = 3000,
                       panel_side_high_lim = 5000, title = 'OffsetMap')
 #fig.savefig('RawOffsetMap.pdf', format='pdf', dpi=400, bbox_inches='tight')

 #************** Raw NoiseMAP *******************#
 fig = xana.heatmapPlot(noiseMap[:,:,0], x_label='Column Number', y_label='Row Number', aspect=1,
                       lut_label='Uncorrected Noise (ADU)', x_range=(0, y),
                       y_range=(0, x), vmax=2*np.mean(noiseMap), panel_x_label='Columns Stat (ADU)',
                       panel_y_label='Rows Stat (ADU)', panel_top_low_lim = 0, panel_top_high_lim = 20,
                       panel_side_low_lim = 0, panel_side_high_lim = 50, title = 'Uncorrected NoiseMap')
 #fig.savefig('RawNoiseMap.pdf', format='pdf', dpi=400, bbox_inches='tight')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Offset Correction:

 offsetCorrection = xcal.OffsetCorrection(sensorSize, offsetMap, nCells = memoryCells, cores=cpuCores, gains=None,
                                         runParallel=run_parallel, blockSize=blockSize)

 offsetCorrection.debug()

 # Common Mode Correction:
 # This is the new method subtracting the median of all pixels that are read out at the same time along a row:
 cmCorrection = xcal.CommonModeCorrection([data.shape[0], data.shape[1]], [data.shape[0]//2, data.shape[1]],
                                         commonModeAxis, parallel=run_parallel, dType=np.float32, stride=10,
                                         noiseMap=noiseMap.astype(np.float32), minFrac=0)

 cmCorrection.debug()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Histogram Calculators:

 # For offset corrected data:
 histCalCorrected = xcal.HistogramCalculator(sensorSize, bins=400, range=[-200, 200], memoryCells=memoryCells,
                                            cores=cpuCores, gains=None, blockSize=blockSize)
 # For common mode corrected data:
 histCalCMCorrected = xcal.HistogramCalculator(sensorSize, bins=400, range=[-200, 200], memoryCells=memoryCells,
                                              cores=cpuCores, gains=None, blockSize=blockSize)
 ```

 %% Cell type:markdown id: tags:

 ### Second Iteration

 During the second iteration, the data are offset corrected and then common mode corrected to produced a common mode corrected noise map. The common mode correction is calculated by subtracting out the median of all pixels that are read out at the same time along a row.

 %% Cell type:code id: tags:

 ``` python
 counter1 = 1 # To count how many "if data.shape[2] >= chunkSize" instances are there.
 counter2 = 0 # To count how many "if data.shape[2] < chunkSize" instances are there.
 chunkSize_new = 0 # See below

 for data in reader.readChunks():

    data = np.bitwise_and(data.astype(np.uint16), 0b0011111111111111).astype(np.float32)
    dx = np.count_nonzero(data, axis=(0, 1))
    data = data[:,:,dx != 0]
    # Some sequences may have less than 500 frames in them. To find out how many images there are, we will temporarily
    # change chunkSize to be the same as whatever number of frames the last chunk of data has:
    if data.shape[2] < chunkSize:
        chunkSize_new = data.shape[2]
        print("Number of images are less than chunkSize. chunkSize is temporarily changed to {}."
              .format(chunkSize_new))
        images = images + chunkSize_new
        counter2 += 1
    else:
        images = counter1*chunkSize + counter2*chunkSize_new
        counter1 += 1

    data = offsetCorrection.correct(data) # Offset correction
    offset_corr_data = copy.copy(data) # I am copying this so that I can have access to it in the table below
    histCalCorrected.fill(data)
    cellTable=np.zeros(data.shape[2], np.int32) # Common mode correction
    data = cmCorrection.correct(data.astype(np.float32), cellTable=cellTable) # Common mode correction
    histCalCMCorrected.fill(data)
    noiseCal.fill(data)  # Filling noise calculator with common mode (CM) corrected data
    chunkSize = 100 # resetting the chunkSize to its default value for the next sequence or data-chunk

 print('A total number of {} images are processed.'.format(images))
 print("Offset and common mode corrections are applied.")
 ```

 %% Cell type:code id: tags:

 ``` python
 noiseMapCM = noiseCal.get() # Produces CM corrected noise map
 ho, eo, co, so = histCalCorrected.get()
 hCM, eCM, cCM, sCM = histCalCMCorrected.get()
 ```

 %% Cell type:code id: tags:

 ``` python
 # We are copying these so that we can replot them later after the calculators are reset:

 ho_second_trial = copy.copy(ho)
 co_second_trial = copy.copy(co)
 hCM_second_trial = copy.copy(hCM)
 cCM_second_trial = copy.copy(cCM)
 ```

 %% Cell type:markdown id: tags:

 ### Signal after Offset and Common Mode Corrections

 Here, the offset corrected signal is compared to the common-mode corrected signal (in the form of binned histograms):

 %% Cell type:code id: tags:

 ``` python
 do = [{'x': co,
     'y': ho,
     'y_err': np.sqrt(ho[:]),
     'drawstyle': 'steps-mid',
     'color': 'cornflowerblue',
     'label': 'Offset Corrected Signal'
     },
     {'x': cCM,
     'y': hCM,
     'y_err': np.sqrt(hCM[:]),
     'drawstyle': 'steps-mid',
     'color': 'red',
     'label': 'Common Mode Corrected Signal'
     }]

 fig = xana.simplePlot(do, figsize='2col', aspect=1, x_label = 'Corrected Signal (ADU)', y_label="Counts",
                      x_range = (-20, 20), legend='top-right-frame-1col', title = 'Corrected Signal - 2nd Iteration')
 #fig.savefig('Corrected_Signal_Hist_1.svg', format='svg', dpi=1200, bbox_inches='tight')

 t0 = PrettyTable()
 t0.title = "Comparison of the First Round of Corrections - Bad Pixels Not Excluded"
 t0.field_names = ["After Offset Correction", "After Common Mode Correction"]
 t0.add_row(["Mean: {:0.3f} (ADU)".format(np.mean(offset_corr_data)), "Mean: {:0.3f} (ADU)".format(np.mean(data))])
 t0.add_row(["Median: {:0.3f} (ADU)".format(np.median(offset_corr_data)), "Median: {:0.3f} (ADU)"
            .format(np.median(data))])
 t0.add_row(["Standard Deviation: {:0.3f} (ADU)".format(np.std(offset_corr_data)), "Standard Deviation: {:0.3f} (ADU)"
            .format(np.std(data))])
 print(t0,'\n')
 ```

 %% Cell type:markdown id: tags:

 ### Noise Map after Common Mode Correction

 In the following, the effect of common mode correction on the noise is shown. Finally common mode corrected noise map (noiseMapCM) is displayed and compared to the initial uncorrected noise map:

 %% Cell type:code id: tags:

 ``` python
 #*****NOISE MAP HISTOGRAM FROM THE COMMON MODE CORRECTED DATA*******#
 hn, cn = np.histogram(noiseMap.flatten(), bins=200, range=(0, 40)) # hn: histogram of noise, cn: bin centers for noise
 hn_CM, cn_CM = np.histogram(noiseMapCM.flatten(), bins=200, range=(0, 40))

 dn = [{'x': cn[:-1],
     'y': hn,
     #'y_err': np.sqrt(hn[:]),
     'drawstyle': 'steps-mid',#'bars',
     'color': 'blue',#'cornflowerblue',
     'label': 'Uncorrected Noise'
     },
    {'x': cn_CM[:-1],
     'y': hn_CM,
     #'y_err': np.sqrt(hn_CM[:]),
     'drawstyle': 'steps-mid',#'bars',
     'color': 'crimson',#'red',#'cornflowerblue',
     #'ecolor': 'crimson',
     'label': 'Common Mode Corrected Noise'
     }]
 fig = xana.simplePlot(dn, figsize='2col', aspect=1, x_label = 'Noise (ADU)', y_label="Counts",
                      x_range=(0, 40), y_range=(0, 1e6), y_log=True, legend='top-center-frame-1col',
                      title = 'Noise Comparison')

 #fig.savefig('Noise_CM_1_Hist.svg', format='svg', dpi=1200, bbox_inches='tight')

 fig = xana.heatmapPlot(noiseMapCM[:,:,0], aspect=1, x_label='Column Number', y_label='Row Number',
                       lut_label='Common Mode Corrected Noise (ADU)', x_range=(0,y), y_range=(0,x),
                       vmax=2*np.mean(noiseMapCM), panel_top_low_lim = 0, panel_top_high_lim = 20,
                       panel_side_low_lim = 0, panel_side_high_lim = 50, title = 'Common Mode Corrected Noise',
                       panel_x_label='Columns Stat (ADU)', panel_y_label='Rows Stat (ADU)')

 #fig.savefig('NoiseMapCM.pdf', format='pdf', dpi=400, bbox_inches='tight')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Resetting the calculators so we can do a third iteration later:

 noiseCal.reset()
 histCalCorrected.reset()
 histCalCMCorrected.reset()
 cmCorrection.reset()
 ```

 %% Cell type:markdown id: tags:

 ### Initial BadPixelMap
 This is generated based on the offset and CM corrected noise maps:

 %% Cell type:code id: tags:

 ``` python
 bad_pixels = np.zeros(offsetMap.shape, np.uint32)
 mnoffset = np.nanmedian(offsetMap)
 stdoffset = np.nanstd(offsetMap)
 bad_pixels[(offsetMap < mnoffset-bad_pixel_offset_sigma*stdoffset) |
           (offsetMap > mnoffset+bad_pixel_offset_sigma*stdoffset)] = BadPixels.OFFSET_OUT_OF_THRESHOLD.value

 mnnoise = np.nanmedian(noiseMapCM)
 stdnoise = np.nanstd(noiseMapCM)
 bad_pixels[(noiseMapCM < mnnoise-bad_pixel_noise_sigma*stdnoise) |
           (noiseMapCM > mnnoise+bad_pixel_noise_sigma*stdnoise)] = BadPixels.NOISE_OUT_OF_THRESHOLD.value

 fig = xana.heatmapPlot(np.log2(bad_pixels[:,:,0]),aspect=1, x_label='Column Number', y_label='Row Number',
                       lut_label='2^(Assigned Value to Bad Pixels)', x_range=(0, y), y_range=(0, x),
                       title = 'Bad Pixels Map Excluding Non-Sensitive\n Areas in Middle of CCD',
                       panel_x_label= 'Columns Stat', panel_y_label='Rows Stat')
 ```

 %% Cell type:markdown id: tags:

 Here, we are adding the pixels in the hole (center of the FastCCD) as well as 4 rows in the center of the detector, which we call overscan region:

 %% Cell type:code id: tags:

 ``` python
 def create_circular_mask(h, w, center=None, radius=None):

    import numpy as np
    import math

    if center is None: # use the middle of the image
        center = [int(w/2), int(h/2)]
    if radius is None: # use the smallest distance between the center and image walls
        radius = min(center[0], center[1], w-center[0], h-center[1])

    Y, X = np.ogrid[:h, :w]
    dist_from_center = np.sqrt((X - center[0])**2 + (Y-center[1])**2)

    mask = dist_from_center < radius
    return mask
 ```

 %% Cell type:code id: tags:

 ``` python
 mask = np.zeros(offsetMap.shape, np.uint32)


 # Defining a circular mask + a rectangular mask (overscan) for the hole in the middle of the CCD:
 h, w = (x, y)
 hole_mask_bool = create_circular_mask(h-4, w, radius=61.5, center=(w//2, (h-4)//2))
 hole_mask = np.zeros(hole_mask_bool.shape, np.uint32)
 hole_mask[hole_mask_bool] = BadPixels.NON_SENSITIVE.value

 overscan_mask = np.full((4, w), BadPixels.OVERSCAN.value)

 mask[:,:,0] = np.insert(hole_mask, (h-4)//2, overscan_mask, 0)

 # Assigning this masked area as bad pixels:
 bad_pixels = np.bitwise_or(bad_pixels, mask)

 fig = xana.heatmapPlot(np.log2(bad_pixels[:,:,0]),aspect=1, x_label='Column Number', y_label='Row Number',
                       lut_label='2^(Assigned Value to Bad Pixels)', x_range=(0, y), y_range=(0, x),
                       panel_top_low_lim = 0, panel_top_high_lim = 20, panel_side_low_lim = 0, panel_side_high_lim = 20,
                       title = 'Bad Pixels Map Including Non-Sensitive\n Areas in Middle of CCD',
                       panel_x_label='Columns Stat', panel_y_label='Rows Stat', vmax=20)

 #fig.savefig('BadPixelMap_1.svg', format='svg', dpi=1200, bbox_inches='tight')
 ```

 %% Cell type:markdown id: tags:

 ### Third Iteration

 During the third iteration, the bad pixel map is applied to the data. Bad pixels are masked. Offset and common mode corrections are applied once again to the data, which now have bad pixdels excluded, to produce a common mode corrected noise map:

 %% Cell type:code id: tags:

 ``` python
 # bad_pixels is an array of (1934, 960, 1) filled with zeros except at indices where we have the actual bad pixels, whose
 # values are set to be: 2 (2^1: BadPixels.OFFSET_OUT_OF_THRESHOLD.value), or
 # 262144 (2^18: BadPixels.OVERSCAN.value), or 524288 (2^19: BadPixels.NON_SENSITIVE.value). These indices can be found
 # using np.argwhere(bad_pixels != 0)

 event_threshold = sigmaNoise*np.median(noiseMapCM) # for exclusion of possible cosmic ray events
 noiseCal.setBadPixelMask(bad_pixels != 0)
 ```

 %% Cell type:code id: tags:

 ``` python
 counter1 = 1 # To count how many "if data.shape[2] >= chunkSize" instances are there.
 counter2 = 0 # To count how many "if data.shape[2] < chunkSize" instances are there.
 chunkSize_new = 0 # See below

 for data in reader.readChunks():
    #data = data.astype(np.float32)
    data = np.bitwise_and(data.astype(np.uint16), 0b0011111111111111).astype(np.float32)
    dx = np.count_nonzero(data, axis=(0, 1))
    data = data[:,:,dx != 0]
    # Some sequences may have less than 500 frames in them. To find out how many images there are, we will temporarily
    # change chunkSize to be the same as whatever number of frames the last chunk of data has:
    if data.shape[2] < chunkSize:
        chunkSize_new = data.shape[2]
        print("Number of images are less than chunkSize. chunkSize is temporarily changed to {}."
              .format(chunkSize_new))
        images = images + chunkSize_new
        counter2 += 1
    else:
        images = counter1*chunkSize + counter2*chunkSize_new
        counter1 += 1
    data_copy = offsetCorrection.correct(copy.copy(data))
    cellTable=np.zeros(data_copy.shape[2], np.int32)
    data_copy = cmCorrection.correct(data_copy.astype(np.float32), cellTable=cellTable)
    data[data_copy > event_threshold] = np.nan # cosmic rays
    data = np.ma.MaskedArray(data, np.isnan(data), fill_value=0) # masking cosmics, the default fill_value is 1e+20
    data = offsetCorrection.correct(data)
    offset_corr_data2 = copy.copy(data) # I am copying this so that I can have access to it in the table below
    histCalCorrected.fill(data)
    cellTable=np.zeros(data.shape[2], np.int32)
    data = cmCorrection.correct(data.astype(np.float32), cellTable=cellTable)
    histCalCMCorrected.fill(data)
    noiseCal.fill(data)
    chunkSize = 100 # resetting the chunkSize to its default value for the next sequence or data-chunk

 print('A total number of {} images are processed.'.format(images))
 print("Final iteration is Performed.")
 ```

 %% Cell type:code id: tags:

 ``` python
 noiseMapCM_2nd = noiseCal.get().filled(0) # the masked pixels are filled with zero
 ho2, eo2, co2, so2 = histCalCorrected.get()
 hCM2, eCM2, cCM2, sCM2 = histCalCMCorrected.get()
 ```

 %% Cell type:markdown id: tags:

 ### Plots of the Final Results

 The following plot and table compare the offset and common mode corrected signal with and without the bad pixels:

 %% Cell type:code id: tags:

 ``` python
 do_Final = [{'x': co_second_trial,
     'y': ho_second_trial,
     'y_err': np.sqrt(ho_second_trial[:]),
     'drawstyle': 'steps-mid',
     'color': 'blue',#'cornflowerblue',
     'errorstyle': 'bars',
     'label': 'Offset Corrected Signal, Bad Pixels Included - 2nd Trial'
     },
    {'x': cCM_second_trial,
     'y': hCM_second_trial,
     'y_err': np.sqrt(hCM_second_trial[:]),
     'drawstyle': 'steps-mid',
     'color': 'red',
     'errorstyle': 'bars',
     'ecolor': 'crimson',
     'label': 'Common Mode Corrected Signal, Bad Pixels Included - 2nd Trial'
     },
    {'x': co2,
     'y': ho2,
     'y_err': np.sqrt(ho2[:]),
     'drawstyle': 'steps-mid',
     'color': 'black', #'cornflowerblue',
     'errorstyle': 'bars',
     'label': 'Offset Corrected Signal, Bad Pixels Excluded - 3rd Trial'
     },
    {'x': cCM2,
     'y': hCM2,
     'y_err': np.sqrt(hCM2[:]),
     'drawstyle': 'steps-mid',
     'color': 'orange', #'cornflowerblue',
     'errorstyle': 'bars',
     'label': 'Common Mode Corrected Signal, Bad Pixels Excluded - 3rd Trial'
     }]

 fig = xana.simplePlot(do_Final, figsize='2col', aspect=1, x_label = 'Corrected Signal (ADU)',
                      y_label="Counts (Logarithmic Scale)", y_log=True, x_range=(-40, 40),
                      legend='bottom-left-frame-1col', title = 'Comparison of Corrected Signal')
 #fig.savefig('Corrected_Signal_Hist_2.svg', format='svg', dpi=1200, bbox_inches='tight')

 # offset_corr_data2 and data most likely have some nan's => I am going to use nanmean, nanmedian and nanstd functions:
 t0 = PrettyTable()
 t0.title = "Comparison of the Second Round of Corrections - Bad Pixels Excluded"
 t0.field_names = ["After Offset Correction", "After Common Mode Correction"]
 t0.add_row(["Mean: {:0.3f} (ADU)".format(np.nanmean(offset_corr_data2)), "Mean: {:0.3f} (ADU)".format(np.nanmean(data))])
 t0.add_row(["Median: {:0.3f} (ADU)".format(np.nanmedian(offset_corr_data2)), "Median: {:0.3f} (ADU)"
            .format(np.nanmedian(data))])
 t0.add_row(["Standard Deviation: {:0.3f} (ADU)".format(np.nanstd(offset_corr_data2)),
            "Standard Deviation: {:0.3f} (ADU)".format(np.nanstd(data))])
 print(t0,'\n')
 ```

 %% Cell type:markdown id: tags:

 ### Final NoiseMap

 The effect of exclusion of bad pixels on common mode corrected noise is shown below. Finally common mode corrected noise map with bad pixels excluded (noiseMapCM_2nd) is displayed:

 %% Cell type:code id: tags:

 ``` python
 #*****NOISE MAP HISTOGRAM FROM THE COMMON MODE CORRECTED DATA*******#
 hn_CM2, cn_CM2 = np.histogram(noiseMapCM_2nd.flatten(), bins=200, range=(0, 40))

 dn2 = [{'x': cn[:-1],
     'y': hn,
     #'y_err': np.sqrt(hn[:]),
     'drawstyle': 'steps-mid',#'bars',
     'color': 'blue', #'cornflowerblue',
     'label': 'Uncorrected Noise'
     },
    {'x': cn_CM[:-1],
     'y': hn_CM,
     #'y_err': np.sqrt(hn_CM[:]),
     'drawstyle': 'steps-mid',
     'color': 'red',
     #'ecolor': 'crimson',
     'label': 'Common Mode Corrected Noise prior to Bad Pixels Exclusion'
     },
    {'x': cn_CM2[:-1],
     'y': hn_CM2,
     #'y_err': np.sqrt(hn_CM2[:]),
     'drawstyle': 'steps-mid',
     'color': 'black', #'cornflowerblue',
     'label': 'Common Mode Corrected Noise after Bad Pixels Exclusion'
     }]

 fig = xana.simplePlot(dn2, figsize='2col', aspect = 1, x_label = 'Noise (ADU)', y_label="Counts", y_log=True,
                      x_range=(0, 40), y_range=(0, 1e6), legend='top-right-frame-1col', title = 'Final Noise Comparison')

 #fig.savefig('Noise_Hist_2.svg', format='svg', dpi=1200, bbox_inches='tight')

 fig = xana.heatmapPlot(np.log2(noiseMapCM_2nd[:,:,0]), aspect=1, x_label='Column Number', y_label='Row Number',
                       lut_label='Noise (ADU)', x_range=(0, y), y_range=(0, x), vmax=2*np.mean(noiseMapCM_2nd),
                       title = 'Final Common Mode Corrected Noise\n (Bad Pixels Excluded)',
                       panel_x_label='Columns Stat (ADU)', panel_y_label='Rows Stat (ADU)')
 #fig.savefig('NoiseMapCM_2nd.pdf', format='pdf', dpi=400, bbox_inches='tight')
 ```

 %% Cell type:markdown id: tags:

 ### Final Bad Pixel Map

 Lastly, the final bad pixel map is generated based on the OffsetMap and the noiseMapCM_2nd (common mode corrected noise after exclusion of the initial bad pixels):

 %% Cell type:code id: tags:

 ``` python
 bad_pixels = np.zeros(offsetMap.shape, np.uint32)
 mnoffset = np.nanmedian(offsetMap)
 stdoffset = np.nanstd(offsetMap)
 bad_pixels[(offsetMap < mnoffset-bad_pixel_offset_sigma*stdoffset) |
           (offsetMap > mnoffset+bad_pixel_offset_sigma*stdoffset)] = BadPixels.OFFSET_OUT_OF_THRESHOLD.value

 mnnoise = np.nanmedian(noiseMapCM_2nd)
 stdnoise = np.nanstd(noiseMapCM_2nd)
 bad_pixels[(noiseMapCM_2nd < mnnoise-bad_pixel_noise_sigma*stdnoise) |
           (noiseMapCM_2nd > mnnoise+bad_pixel_noise_sigma*stdnoise)] = BadPixels.NOISE_OUT_OF_THRESHOLD.value

 bad_pixels = np.bitwise_or(bad_pixels, mask)
 fig = xana.heatmapPlot(np.log2(bad_pixels[:,:,0]),aspect=1, x_label='Column Number', y_label='Row Number',
                       lut_label='2^(Assigned Value to Bad Pixels)', x_range=(0, y), y_range=(0, x),
                       panel_top_low_lim = 0, panel_top_high_lim = 20, panel_side_low_lim = 0, panel_side_high_lim = 20,
                       title = 'Final Bad Pixels Map', panel_x_label='Columns Stat',
                       panel_y_label='Rows Stat', vmax=20)
 #fig.savefig('BadPixelMap_2.svg', format='svg', dpi=1200, bbox_inches='tight')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Statistics on the Bad Pixels'))
 num_bad_pixels = np.count_nonzero(bad_pixels)
 num_all_pixels = x*y
 percentage_bad_pixels = num_bad_pixels*100/num_all_pixels
 print("Number of bad pixels: {:0.0f}, i.e. {:0.2f}% of all pixels".format(num_bad_pixels, percentage_bad_pixels))
 ```

 %% Cell type:markdown id: tags:

 ### Electronic Noise

 According to Table 6.1 (page 80) of Ivana Klačková's master's thesis: "Conversion gain for the FastCCD high gain is: lower hemisphere = 6.2e-/ADU and upper hemisphere = 6.1e-/ADU." Also, we know that the high gain/medium gain and high gain/low gain ratios are 4 and 8, respectively since high gain = x8, medium gain = x2 and low gain = x1. We do not currently (October - 2019) know the conversion gains for the FastCCD medium and lows gains in electrons. Therefore, we will use those of the high gains (in both hemispheres) together with the gain ratios to convert the noise in ADU to electrons.

 The following Tables present the noise along lower hemisphere, upper hemisphere, and the entire FastCCD detector at different stages. Here, the values in the first table (in ADU and e-) are the mean of noise per pixel, where noise is considered to be the initial uncorrected noise, CM corrected noise after second trial (including bad pixels) and CM corrected noise after third trial (excluding bad pixels).

 The values of the second table (in electrons) are the standard deviation of noise per pixel.

 %% Cell type:code id: tags:

 ``` python
 # noiseMap refers to the initial uncorrected noise, noiseMapCM refers to common mode corrected noise with inclusion of
 # bad pixels, and noiseMapCM_2nd refers to common mode corrected noise without inclusion of bad pixels:

 ADU_to_electron_hg = (ADU_to_electron_upper_hg + ADU_to_electron_lower_hg)/2 # Average of ADU_to_electron for entire CCD
                                                                             # for high gain

 ADU_to_electron_mg = (ADU_to_electron_upper_mg + ADU_to_electron_lower_mg)/2 # Average of ADU_to_electron for entire CCD
                                                                             # for medium gain


 ADU_to_electron_lg = (ADU_to_electron_upper_lg + ADU_to_electron_lower_lg)/2 # Average of ADU_to_electron for entire CCD
                                                                             # for low gain
 ```

 %% Cell type:code id: tags:

 ``` python
 for gain, value in gain_dict.items():

    if det_gain == gain_dict["low gain"]:
        ADU_to_electron = ADU_to_electron_lg
        ADU_to_electron_upper = ADU_to_electron_upper_lg
        ADU_to_electron_lower = ADU_to_electron_lower_lg

    elif det_gain == gain_dict["medium gain"]:
        ADU_to_electron = ADU_to_electron_mg
        ADU_to_electron_upper = ADU_to_electron_upper_mg
        ADU_to_electron_lower = ADU_to_electron_lower_mg

    else: # Here, we assume the auto gain and high gain conversions from ADU to electrons are the same.
        ADU_to_electron = ADU_to_electron_hg
        ADU_to_electron_upper = ADU_to_electron_upper_hg
        ADU_to_electron_lower = ADU_to_electron_lower_hg

 print("Abbreviations:")
 print(" - ED = Entire Detector;\n - LH: Lower Hemisphere;\n - UH: Upper Hemisphere;")
 print(" - CM Noise: Common Mode Corrected Noise;")
 print(" - BP: Bad Pixels\n")

 t0 = PrettyTable()
 t0.title = "Averages of Noise per Pixel"
 t0.field_names = ["Uncorrected Noise", "CM Noise, BP Incl.", "CM Noise, BP Excl."]
 t0.add_row(["ED: {:0.2f} ADU = {:0.2f} e-".format(np.mean(noiseMap), np.mean(noiseMap)*ADU_to_electron),
            "ED: {:0.2f} ADU = {:0.2f} e-".format(np.mean(noiseMapCM), np.mean(noiseMapCM)*ADU_to_electron),
            "ED: {:0.2f} ADU = {:0.2f} e-".format(np.mean(noiseMapCM_2nd), np.mean(noiseMapCM_2nd)*ADU_to_electron)])
 t0.add_row(["LH: {:0.2f} ADU = {:0.2f} e-".format(np.mean(noiseMap[:x//2,:]),
                                                  np.mean(noiseMap[:x//2,:])*ADU_to_electron_lower),
            "LH: {:0.2f} ADU = {:0.2f} e-".format(np.mean(noiseMapCM[:x//2,:]),
                                                  np.mean(noiseMapCM[:x//2,:])*ADU_to_electron_lower),
            "LH: {:0.2f} ADU = {:0.2f} e-".format(np.mean(noiseMapCM_2nd[:x//2,:]),
                                                  np.mean(noiseMapCM_2nd[:x//2,:])*ADU_to_electron_lower)])
 t0.add_row(["UH: {:0.2f} ADU = {:0.2f} e-".format(np.mean(noiseMap[x//2:,:]),
                                                  np.mean(noiseMap[x//2:,:])*ADU_to_electron_upper),
            "UH: {:0.2f} ADU = {:0.2f} e-".format(np.mean(noiseMapCM[x//2:,:]),
                                                  np.mean(noiseMapCM[x//2:,:])*ADU_to_electron_upper),
            "UH: {:0.2f} ADU = {:0.2f} e-".format(np.mean(noiseMapCM_2nd[x//2:,:]),
                                                  np.mean(noiseMapCM_2nd[x//2:,:])*ADU_to_electron_upper)])
 print(t0,'\n')

 t1 = PrettyTable()
 t1.title = "Standard Deviations of Noise per Pixel"
 t1.field_names = ["Uncorrected Noise", "CM Noise, BP Incl.", "CM Noise, BP Excl."]
 t1.add_row(["ED: {:0.2f} e-".format(np.std(noiseMap)*ADU_to_electron),
            "ED: {:0.2f} e-".format(np.std(noiseMapCM)*ADU_to_electron),
            "ED: {:0.2f} e-".format(np.std(noiseMapCM_2nd)*ADU_to_electron)])
 t1.add_row(["LH: {:0.2f} e-".format(np.std(noiseMap[:x//2,:])*ADU_to_electron_lower),
            "LH: {:0.2f} e-".format(np.std(noiseMapCM[:x//2,:])*ADU_to_electron_lower),
            "LH: {:0.2f} e-".format(np.std(noiseMapCM_2nd[:x//2,:])*ADU_to_electron_lower)])
 t1.add_row(["UH: {:0.2f} e-".format(np.std(noiseMap[x//2:,:])*ADU_to_electron_upper),
            "UH: {:0.2f} e-".format(np.std(noiseMapCM[x//2:,:])*ADU_to_electron_upper),
            "UH: {:0.2f} e-".format(np.std(noiseMapCM_2nd[x//2:,:])*ADU_to_electron_upper)])
 print(t1)
 ```

 %% Cell type:markdown id: tags:

 ### Calibration Constants

 %% Cell type:code id: tags:

 ``` python
 dictionary = {}
 dictionary['Offset'] = offsetMap.data
 dictionary['Noise'] = noiseMapCM_2nd.data
 dictionary['BadPixelsDark'] = bad_pixels.data
-
+md = None
 for const in dictionary:
-    metadata = ConstantMetaData()
    dconst = getattr(Constants.CCD(DetectorTypes.fastCCD), const)()
    dconst.data = dictionary[const]
-    metadata.calibration_constant = dconst

    condition = Conditions.Dark.CCD(bias_voltage=bias_voltage,
                                integration_time=integration_time,
                                gain_setting=det_gain,
                                temperature=fix_temperature,
                                pixels_x=1934,
                                pixels_y=960)

    for parm in condition.parameters:
        if parm.name == "Sensor Temperature":
            parm.lower_deviation = temp_limits
            parm.upper_deviation = temp_limits

    device = Detectors.fastCCD1
-    metadata.detector_condition = condition
-
-    # Specifying the a version for this constant:
-    if creation_time is None:
-        metadata.calibration_constant_version = Versions.Now(device=device)
-    else:
-        metadata.calibration_constant_version = Versions.Timespan(device=device, start=creation_time)
-
-    metadata.calibration_constant_version.raw_data_location = file_loc

    if db_output:
-        try:
-            metadata.send(cal_db_interface, timeout=cal_db_timeout)
-            print("Calibration constant {} is sent to the database.".format(const))
-        except Exception as e:
-            print(e)
+        md = send_to_db(device, dconst, condition, file_loc,
+                        cal_db_interface, creation_time=creation_time, timeout=cal_db_timeout)

    if local_output:
-        save_const_to_h5(metadata, out_folder)
-        print("Calibration constant {} is stored locally.".format(const))
+        md = save_const_to_h5(device, dconst, condition, dconst.data, file_loc, creation_time, out_folder)
+        print(f"Calibration constant {const} is stored locally.")

-print("Creation time is: {}".format(creation_time))
-print("Raw data location is: {}".format(file_loc))
+print("Constants parameter conditions are:\n")
+print(f"• bias_voltage: {bias_voltage}\n• integration_time: {integration_time}\n"
+      f"• temperature: {temperature_k}\n• gain_setting: {gain_setting}\n"
+      f"• in_vacuum: {in_vacuum}\n"
+      f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

--- a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Jungfrau Offline Correction #

 Author: European XFEL Detector Group, Version: 0.1

 Offline Calibration for the Jungfrau Detector

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB" # cluster profile to use
 in_folder = "/gpfs/exfel/exp/FXE/201901/p002210/raw" # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/jf"  # the folder to output to, required
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 run = 249 # runs to process, required

 karabo_id = "FXE_XAD_JF1M" # karabo prefix of Jungfrau devices
 karabo_da = ['JNGFR01'] # data aggregators
 receiver_id = "RECEIVER-{}" # inset for receiver devices
 receiver_control_id = "CONTROL" # inset for control devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5'  # template to use for file name, double escape sequence number
 h5path = '/INSTRUMENT/{}/DET/{}:daqOutput/data'  # path in H5 file under which images are located
 h5path_run = '/RUN/{}/DET/{}' # path to run data
 h5path_cntrl = '/CONTROL/{}/DET/{}' # path to control data
 karabo_id_control = ""  # if control is on a different ID, set to empty string if it is the same a karabo-id
 karabo_da_control = "JNGFR01" # file inset for control data

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8017#8025" #"tcp://max-exfl016:8015#8025" # the database interface to use
 cal_db_timeout = 180000 # timeout on caldb requests",

 overwrite = True # set to True if existing data should be overwritten
 no_relative_gain = False # do not do relative gain correction
 bias_voltage = 180 # will be overwritten by value in file
 sequences_per_node = 5 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
 photon_energy = 9.2 # photon energy in keV
 chunk_size_idim = 1  # chunking size of imaging dimension, adjust if user software is sensitive to this.
 integration_time = 4.96 # integration time in us, will be overwritten by value in file
 mem_cells = 0. # leave memory cells equal 0, as it is saved in control information starting 2019.
 gmapfile = "" # variable is not used but left here for back compatibility
 db_module = ["Jungfrau_M233"] # ID of module in calibration database
 manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values
 chunk_size = 0

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import time
 from ipyparallel import Client
 from functools import partial
 import tabulate
 from IPython.display import display, Latex
 import copy
 import h5py
 import os
 from cal_tools.tools import (map_modules_from_folder, get_dir_creation_date,
                             get_constant_from_db_and_time)
 from iCalibrationDB import (ConstantMetaData, Constants, Conditions, Detectors, Versions)
 from cal_tools.enums import BadPixels
 import numpy as np
 import matplotlib
 import matplotlib.pyplot as plt
 from matplotlib.colors import LogNorm
 import warnings
 warnings.filterwarnings('ignore')

 matplotlib.use('agg')
 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 client = Client(profile=cluster_profile)
 view = client[:]
 view.use_dill()

 h5path = h5path.format(karabo_id, receiver_id)
 ped_dir = "{}/r{:04d}".format(in_folder, run)

 if ped_dir[-1] == "/":
    ped_dir = ped_dir[:-1]

 if not os.path.exists(out_folder):
    os.makedirs(out_folder)
 elif not overwrite:
    raise AttributeError("Output path exists! Exiting")

 fp_name_contr = path_template.format(run, karabo_da_control, 0)
 fp_path_contr = '{}/{}'.format(ped_dir, fp_name_contr)

 if sequences[0] == -1:
    sequences = None

 print(f"Run is: {run}")
 print(f"HDF5 path: {h5path}")
 print(f"Process modules: {karabo_da}")

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time} as creation time")

 if karabo_id_control == "":
    karabo_id_control = karabo_id
 ```

 %% Cell type:code id: tags:

 ``` python
 def check_memoryCells(file_name, path):
    with h5py.File(file_name, 'r') as f:
        t_stamp = np.array(f[path + '/storageCells/timestamp'])
        st_cells = np.array(f[path + '/storageCells/value'])
        sc_start = np.array(f[path + '/storageCellStart/value'])

    valid_train = t_stamp > 0
    n_scs = st_cells[valid_train][0] + 1
    sc_s = sc_start[valid_train][0]

    return n_scs, sc_s
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)
 mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf

 print(f"Processing a total of {total_sequences} sequence files")
 table = []
 fi = 0
-for i, key in enumerate(mapped_files):
-    for k, f in enumerate(list(mapped_files[key].queue)):
-        if k == 0:
-            table.append((fi, karabo_da[i], k, f))
-        else:
-            table.append((fi, "", k,  f))
-        fi += 1
-md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
-                                     headers=["#", "module", "# module", "file"])))
+if total_sequences > 0: # create table
+    for i, key in enumerate(mapped_files):
+        for k, f in enumerate(list(mapped_files[key].queue)):
+            if k == 0:
+                table.append((fi, karabo_da[i], k, f))
+            else:
+                table.append((fi, "", k,  f))
+            fi += 1
+    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
+                                         headers=["#", "module", "# module", "file"])))

 # restore the queue
 mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)
 mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf
 ```

 %% Cell type:code id: tags:

 ``` python
 if not manual_slow_data:
    with h5py.File(fp_path_contr.format(0), 'r') as f:
        run_path = h5path_run.format(karabo_id_control, receiver_control_id)
        integration_time = float(f[f'{run_path}/exposureTime/value'][()]*1e6)
        bias_voltage = int(np.squeeze(f[f'{run_path}/vHighVoltage/value'])[0])


 control_path = h5path_cntrl.format(karabo_id_control, receiver_control_id)
 try:
    this_run_mcells, sc_start = check_memoryCells(fp_path_contr.format(0), control_path)
    if this_run_mcells == 1:
        memoryCells = 1
        print(f'Dark runs in single cell mode\n storage cell start: {sc_start:02d}')
    else:
        memoryCells = 16
        print(f'Dark runs in burst mode\n storage cell start: {sc_start:02d}')
 except Exception as e:
    if "Unable to open object" in str(e):
        if mem_cells==0:
            memoryCells = 1
        else:
            memoryCells = mem_cells
        print(f'Set memory cells to {memoryCells} as it is not saved in control information.')
    else:
        print(f"Error trying to access memory cell from contol information: {e}")

 print(f"Integration time is {integration_time} us")
 print(f"Bias voltage is {bias_voltage} V")
 print(f"Number of memory cells is {memoryCells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 condition = Conditions.Dark.jungfrau(memory_cells=memoryCells,
                                     bias_voltage=bias_voltage,
                                     integration_time=integration_time)


 def get_constant_for_module(condition, cal_db_interface, creation_time, cal_db_timeout,
                            memoryCells, db_module):
    """ Get calibration constants for given module of Jungfrau

    Function contains all includes to be used with ipCluster

    :param condition: Calibration condition
    :param cal_db_interface: Interface string, e.g. "tcp://max-exfl016:8015"
    :param creation_time: Latest time for constant to be created
    :param cal_db_timeout: Timeout for zmq request
    :param: memoryCells: Number of used memory cells
    :param: db_module: Module of Jungfrau, e.g. "Jungfrau_M035"

    :return: offset_map (offset map), mask (mask of bad pixels),
        gain_map (map of relative gain factors), db_module (name of DB module),
        when (dictionaty: constant - creation time)
    """

    from iCalibrationDB import (ConstantMetaData, Constants, Conditions, Detectors, Versions)
    from cal_tools.tools import get_constant_from_db_and_time
    import numpy as np

    when = {}

    offset_map, time = \
        get_constant_from_db_and_time(getattr(Detectors, db_module),
                                      Constants.jungfrau.Offset(),
                                      condition,
                                      np.zeros((1024, 512, 1, 3)),
                                      cal_db_interface,
                                      creation_time=creation_time,
                                      timeout=cal_db_timeout)
    when['Offset'] = time

    mask, _ = \
        get_constant_from_db_and_time(getattr(Detectors, db_module),
                                      Constants.jungfrau.BadPixelsDark(),
                                      condition,
                                      np.zeros((1024, 512, 1, 3)),
                                      cal_db_interface,
                                      creation_time=creation_time,
                                      timeout=cal_db_timeout)
    when['BadPixels'] = time

    gain_map, _ = \
        get_constant_from_db_and_time(getattr(Detectors, db_module),
                                      Constants.jungfrau.RelativeGain(),
                                      condition,
                                      None,
                                      cal_db_interface,
                                      creation_time=creation_time,
                                      timeout=cal_db_timeout)
    when['Gain'] = time

    # move from x,y,cell,gain to cell,x,y,gain
    offset_map = np.squeeze(offset_map)
    mask = np.squeeze(mask)
    if memoryCells > 1:
        offset_map = np.moveaxis(np.moveaxis(offset_map, 0, 2), 0, 2)
        mask = np.moveaxis(np.moveaxis(mask, 0, 2), 0, 2)

    if gain_map is not None:
        if memoryCells > 1:
            gain_map = np.moveaxis(np.moveaxis(gain_map, 0, 2), 0, 1)
        else:
            gain_map = np.squeeze(gain_map)
            gain_map = np.moveaxis(gain_map, 1, 0)

    return offset_map, mask, gain_map, db_module, when


 # Retrieve Offset, BadPixels and gain constants for a JungFrau module.
 # Run ip Cluster parallelization over modules
 p = partial(get_constant_for_module, condition, cal_db_interface,
            creation_time, cal_db_timeout, memoryCells)

 r = view.map_sync(p, db_module)
 #r = list(map(p, db_module))

 constants = {}
 for rr in r:
    offset_map, mask, gain_map, db_mod, when = rr
    print(f'Constants for module {db_mod}:')
    for const in when:
        print(f'{const} injected at {when[const]}')
    if gain_map is None:
        print("No gain map found")
        no_relative_gain = True
    constants[db_mod] = (offset_map, mask, gain_map)
 ```

 %% Cell type:code id: tags:

 ``` python
 def copy_and_sanitize_non_cal_data(infile, outfile, h5base):
    """ Copy and sanitize data in `infile` that is not touched by `correctLPD`
    """

    if h5base.startswith("/"):
        h5base = h5base[1:]
    dont_copy = ["adc", ]
    dont_copy = [h5base+"/{}".format(do)
                 for do in dont_copy]

    def visitor(k, item):
        if k not in dont_copy:
            if isinstance(item, h5py.Group):
                outfile.create_group(k)
            elif isinstance(item, h5py.Dataset):
                group = str(k).split("/")
                group = "/".join(group[:-1])
                infile.copy(k, outfile[group])

    infile.visititems(visitor)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Correct a chunk of images for offset and gain
 def correct_chunk(offset_map, mask, gain_map, memoryCells, no_relative_gain, inp):
    import numpy as np
    import copy
    import h5py

    fim_data = None
    gim_data = None
    rim_data = None
    msk_data = None
    err = ''

    try:
        d, g, m, ind, copy_sample = inp
        g[g==3] = 2

        if copy_sample and ind==0:
            if memoryCells==1:
                rim_data = np.squeeze(copy.copy(d))
            else:
                rim_data = np.squeeze(copy.copy(d[:,0,...]))

        # Select memory cells
        if memoryCells>1:
            m[m>16] = 0
            offset_map_cell = offset_map[m,...]
            mask_cell = mask[m,...]
        else:
            offset_map_cell = offset_map
            mask_cell = mask

        # Offset correction
        offset = np.choose(g, (offset_map_cell[...,0], offset_map_cell[...,1], offset_map_cell[...,2]))
        d -= offset

        # Gain correction
        if not no_relative_gain:
            if memoryCells>1:
                gain_map_cell = gain_map[m,...]
            else:
                gain_map_cell = gain_map
            cal = np.choose(g, (gain_map_cell[..., 0], gain_map_cell[..., 1], gain_map_cell[..., 2]))
            d /= cal

        msk = np.choose(g, (mask_cell[...,0], mask_cell[...,1], mask_cell[...,2]))

        # Store sample of data for plotting
        if copy_sample and ind==0:
            if memoryCells==1:
                fim_data = np.squeeze(copy.copy(d))
                gim_data = np.squeeze(copy.copy(g))
                msk_data = np.squeeze(copy.copy(msk))
            else:
                fim_data = np.squeeze(copy.copy(d[:,1,...]))
                gim_data = np.squeeze(copy.copy(g[:,1,...]))
                msk_data = np.squeeze(copy.copy(msk[:,1,...]))

    except Exception as e:
        err = e

    return ind, d, msk, rim_data, fim_data, gim_data, msk_data, err

 fim_data = {}
 gim_data = {}
 rim_data = {}
 msk_data = {}

 # Loop over modules
 for i, key in enumerate(mapped_files):
    # Loop over sequences for given module
    for k, f in enumerate(list(mapped_files[key].queue)):

        offset_map, mask, gain_map = constants[db_module[i]]
        h5path_f = h5path.format(int(karabo_da[i][-2:]))

        with h5py.File(f, 'r') as infile:

            # The processed files are saved here in a folder with the run name.
            out_file = "{}/{}".format(out_folder, f.split("/")[-1])
            out_file = out_file.replace("RAW", "CORR")
            print(f'Process file: {f}, with path {h5path_f}')
            try:
                with h5py.File(out_file, "w") as ofile:
                    copy_and_sanitize_non_cal_data(infile, ofile, h5path_f)

                    oshape = infile[h5path_f+"/adc"].shape
                    print(f'Data shape: {oshape}')
                    if not oshape[0]:
                        raise ValueError(f"No image data: shape {oshape}")
                    ddset = ofile.create_dataset(h5path_f+"/adc",
                                                 oshape,
                                                 chunks=(chunk_size_idim, 1, oshape[1], oshape[2]),
                                                 dtype=np.float32)

                    mskset = ofile.create_dataset(h5path_f+"/mask",
                                                  oshape,
                                                  chunks=(chunk_size_idim, 1, oshape[1], oshape[2]),
                                                  dtype=np.uint32,
                                                  compression="gzip", compression_opts=1, shuffle=True)

                    # Run ip Cluster parallelization over chunks of images
                    inp = []
                    max_ind = oshape[0]
                    ind = 0

                    # If chunk size is not given maximum 12+1 chunks is expected
                    if chunk_size == 0:
                        chunk_size = max_ind//12
                        print(f'Chunk size: {chunk_size}')

                    ts = time.time()
                    while ind<max_ind:
                        d = infile[h5path_f+"/adc"][ind:ind+chunk_size,...].astype(np.float32)
                        g = infile[h5path_f+"/gain"][ind:ind+chunk_size,...]
                        if h5path_f+"/memoryCell" in infile:
                            m = infile[h5path_f+"/memoryCell"][ind:ind+chunk_size,...]
                        else:
                            m = None
                        print(f'To process: {d.shape}')
                        inp.append((d,g,m, ind, k==0))
                        ind += chunk_size

                    print('Preparation time: ', time.time() - ts)
                    ts = time.time()

                    print(f'Run {len(inp)} processes')
                    p = partial(correct_chunk, offset_map, mask, gain_map, memoryCells, no_relative_gain)

                    r = view.map_sync(p, inp)
                    #r = list(map(p, inp))
                    if k==0:
                        (_,_,_,
                         rim_data[db_module[i]], fim_data[db_module[i]],
                         gim_data[db_module[i]], msk_data[db_module[i]], _) = r[0]

                    print('Correction time: ', time.time() - ts)
                    ts = time.time()

                    for rr in r:
                        ind, cdata, cmask, _,_,_,_, err = rr
                        data_size = cdata.shape[0]
                        ddset[ind:ind+data_size,...] = cdata
                        mskset[ind:ind+data_size,...] = cmask
                        if err != '':
                            print(f'Error: {err}')

                    print('Saving time: ', time.time() - ts)
            except Exception as e:
                print(f"Error: {e}")
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis, title):
    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1,:], extent=extent, aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(data)))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_title(title)
    cb = fig.colorbar(im)
    cb.set_label("Counts")

 ```

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    h, ex, ey = np.histogram2d(rim_data[mod].flatten(), gim_data[mod].flatten(),
                               bins=[100, 4], range=[[0, 10000], [0,4]])
    do_2d_plot(h, (ex, ey), "Signal (ADU)", "Gain Bit Value", f'Module {mod}')
 ```

 %% Cell type:markdown id: tags:

 ### Mean RAW Preview ###

 The per pixel mean of the sequence file of RAW data

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    fig = plt.figure(figsize=(20,10))
    ax = fig.add_subplot(111)
    im = ax.imshow(np.mean(rim_data[mod],axis=0),
                   vmin=min(0.75*np.median(rim_data[mod][rim_data[mod] > 0]), 2000),
                   vmax=max(1.5*np.median(rim_data[mod][rim_data[mod] > 0]), 16000), cmap="jet")
    ax.set_title(f'Module {mod}')
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Mean CORRECTED Preview ###

 The per pixel mean of the sequence file of CORR data

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    fig = plt.figure(figsize=(20,10))
    ax = fig.add_subplot(111)
    im = ax.imshow(np.mean(fim_data[mod],axis=0),
                   vmin=min(0.75*np.median(fim_data[mod][fim_data[mod] > 0]), -0.5),
                   vmax=max(2.*np.median(fim_data[mod][fim_data[mod] > 0]), 100), cmap="jet")
    ax.set_title(f'Module {mod}', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Single Train Preview ###

 A single image from the first train

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    fig = plt.figure(figsize=(20,10))
    ax = fig.add_subplot(111)
    im = ax.imshow(fim_data[mod][0,...],
                   vmin=min(0.75*np.median(fim_data[mod][0,...]), -0.5),
                   vmax=max(2.*np.median(fim_data[mod][0,...]), 100), cmap="jet")
    ax.set_title(f'Module {mod}', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ## Signal Distribution ##

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    fig = plt.figure(figsize=(20,10))
    ax = fig.add_subplot(211)
    h = ax.hist(fim_data[mod].flatten(), bins=1000, range=(-100, 1000), log=True)
    l = ax.set_xlabel("Signal (keV)")
    l = ax.set_ylabel("Counts")
    _ = ax.set_title(f'Module {mod}')

    ax = fig.add_subplot(212)
    h = ax.hist(fim_data[mod].flatten(), bins=1000, range=(-1000, 10000), log=True)
    l = ax.set_xlabel("Signal (keV)")
    l = ax.set_ylabel("Counts")
    _ = ax.set_title(f'Module {mod}')
 ```

 %% Cell type:markdown id: tags:

 ### Maximum GAIN Preview ###

 The per pixel maximum of the first train of the GAIN data

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    fig = plt.figure(figsize=(20,10))
    ax = fig.add_subplot(111)
    im = ax.imshow(np.max(gim_data[mod], axis=0), vmin=0,
                   vmax=3, cmap="jet")
    ax.set_title(f'Module {mod}', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, f"{item.value:016b}"))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:markdown id: tags:

 ### Single Image Bad Pixels ###

 A single image bad pixel map for the first image of the first train

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    fig = plt.figure(figsize=(20,10))
    ax = fig.add_subplot(111)
    im = ax.imshow(np.log2(msk_data[mod][0,...]), vmin=0, vmax=32, cmap="jet")
    ax.set_title(f'Module {mod}', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 # Jungfrau Offline Correction #

 Author: European XFEL Detector Group, Version: 0.1

 Offline Calibration for the Jungfrau Detector

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB" # cluster profile to use
 in_folder = "/gpfs/exfel/exp/FXE/201901/p002210/raw" # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/jf"  # the folder to output to, required
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 run = 249 # runs to process, required

 karabo_id = "FXE_XAD_JF1M" # karabo prefix of Jungfrau devices
 karabo_da = ['JNGFR01'] # data aggregators
 receiver_id = "RECEIVER-{}" # inset for receiver devices
 receiver_control_id = "CONTROL" # inset for control devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5'  # template to use for file name, double escape sequence number
 h5path = '/INSTRUMENT/{}/DET/{}:daqOutput/data'  # path in H5 file under which images are located
 h5path_run = '/RUN/{}/DET/{}' # path to run data
 h5path_cntrl = '/CONTROL/{}/DET/{}' # path to control data
 karabo_id_control = ""  # if control is on a different ID, set to empty string if it is the same a karabo-id
 karabo_da_control = "JNGFR01" # file inset for control data

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8017#8025" #"tcp://max-exfl016:8015#8025" # the database interface to use
 cal_db_timeout = 180000 # timeout on caldb requests",

 overwrite = True # set to True if existing data should be overwritten
 no_relative_gain = False # do not do relative gain correction
 bias_voltage = 180 # will be overwritten by value in file
 sequences_per_node = 5 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
 photon_energy = 9.2 # photon energy in keV
 chunk_size_idim = 1  # chunking size of imaging dimension, adjust if user software is sensitive to this.
 integration_time = 4.96 # integration time in us, will be overwritten by value in file
 mem_cells = 0. # leave memory cells equal 0, as it is saved in control information starting 2019.
 gmapfile = "" # variable is not used but left here for back compatibility
 db_module = ["Jungfrau_M233"] # ID of module in calibration database
 manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values
 chunk_size = 0

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import time
 from ipyparallel import Client
 from functools import partial
 import tabulate
 from IPython.display import display, Latex
 import copy
 import h5py
 import os
 from cal_tools.tools import (map_modules_from_folder, get_dir_creation_date,
                             get_constant_from_db_and_time)
 from iCalibrationDB import (ConstantMetaData, Constants, Conditions, Detectors, Versions)
 from cal_tools.enums import BadPixels
 import numpy as np
 import matplotlib
 import matplotlib.pyplot as plt
 from matplotlib.colors import LogNorm
 import warnings
 warnings.filterwarnings('ignore')

 matplotlib.use('agg')
 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 client = Client(profile=cluster_profile)
 view = client[:]
 view.use_dill()

 h5path = h5path.format(karabo_id, receiver_id)
 ped_dir = "{}/r{:04d}".format(in_folder, run)

 if ped_dir[-1] == "/":
    ped_dir = ped_dir[:-1]

 if not os.path.exists(out_folder):
    os.makedirs(out_folder)
 elif not overwrite:
    raise AttributeError("Output path exists! Exiting")

 fp_name_contr = path_template.format(run, karabo_da_control, 0)
 fp_path_contr = '{}/{}'.format(ped_dir, fp_name_contr)

 if sequences[0] == -1:
    sequences = None

 print(f"Run is: {run}")
 print(f"HDF5 path: {h5path}")
 print(f"Process modules: {karabo_da}")

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time} as creation time")

 if karabo_id_control == "":
    karabo_id_control = karabo_id
 ```

 %% Cell type:code id: tags:

 ``` python
 def check_memoryCells(file_name, path):
    with h5py.File(file_name, 'r') as f:
        t_stamp = np.array(f[path + '/storageCells/timestamp'])
        st_cells = np.array(f[path + '/storageCells/value'])
        sc_start = np.array(f[path + '/storageCellStart/value'])

    valid_train = t_stamp > 0
    n_scs = st_cells[valid_train][0] + 1
    sc_s = sc_start[valid_train][0]

    return n_scs, sc_s
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)
 mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf

 print(f"Processing a total of {total_sequences} sequence files")
 table = []
 fi = 0
-for i, key in enumerate(mapped_files):
-    for k, f in enumerate(list(mapped_files[key].queue)):
-        if k == 0:
-            table.append((fi, karabo_da[i], k, f))
-        else:
-            table.append((fi, "", k,  f))
-        fi += 1
-md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
-                                     headers=["#", "module", "# module", "file"])))
+if total_sequences > 0: # create table
+    for i, key in enumerate(mapped_files):
+        for k, f in enumerate(list(mapped_files[key].queue)):
+            if k == 0:
+                table.append((fi, karabo_da[i], k, f))
+            else:
+                table.append((fi, "", k,  f))
+            fi += 1
+    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
+                                         headers=["#", "module", "# module", "file"])))

 # restore the queue
 mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)
 mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf
 ```

 %% Cell type:code id: tags:

 ``` python
 if not manual_slow_data:
    with h5py.File(fp_path_contr.format(0), 'r') as f:
        run_path = h5path_run.format(karabo_id_control, receiver_control_id)
        integration_time = float(f[f'{run_path}/exposureTime/value'][()]*1e6)
        bias_voltage = int(np.squeeze(f[f'{run_path}/vHighVoltage/value'])[0])


 control_path = h5path_cntrl.format(karabo_id_control, receiver_control_id)
 try:
    this_run_mcells, sc_start = check_memoryCells(fp_path_contr.format(0), control_path)
    if this_run_mcells == 1:
        memoryCells = 1
        print(f'Dark runs in single cell mode\n storage cell start: {sc_start:02d}')
    else:
        memoryCells = 16
        print(f'Dark runs in burst mode\n storage cell start: {sc_start:02d}')
 except Exception as e:
    if "Unable to open object" in str(e):
        if mem_cells==0:
            memoryCells = 1
        else:
            memoryCells = mem_cells
        print(f'Set memory cells to {memoryCells} as it is not saved in control information.')
    else:
        print(f"Error trying to access memory cell from contol information: {e}")

 print(f"Integration time is {integration_time} us")
 print(f"Bias voltage is {bias_voltage} V")
 print(f"Number of memory cells is {memoryCells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 condition = Conditions.Dark.jungfrau(memory_cells=memoryCells,
                                     bias_voltage=bias_voltage,
                                     integration_time=integration_time)


 def get_constant_for_module(condition, cal_db_interface, creation_time, cal_db_timeout,
                            memoryCells, db_module):
    """ Get calibration constants for given module of Jungfrau

    Function contains all includes to be used with ipCluster

    :param condition: Calibration condition
    :param cal_db_interface: Interface string, e.g. "tcp://max-exfl016:8015"
    :param creation_time: Latest time for constant to be created
    :param cal_db_timeout: Timeout for zmq request
    :param: memoryCells: Number of used memory cells
    :param: db_module: Module of Jungfrau, e.g. "Jungfrau_M035"

    :return: offset_map (offset map), mask (mask of bad pixels),
        gain_map (map of relative gain factors), db_module (name of DB module),
        when (dictionaty: constant - creation time)
    """

    from iCalibrationDB import (ConstantMetaData, Constants, Conditions, Detectors, Versions)
    from cal_tools.tools import get_constant_from_db_and_time
    import numpy as np

    when = {}

    offset_map, time = \
        get_constant_from_db_and_time(getattr(Detectors, db_module),
                                      Constants.jungfrau.Offset(),
                                      condition,
                                      np.zeros((1024, 512, 1, 3)),
                                      cal_db_interface,
                                      creation_time=creation_time,
                                      timeout=cal_db_timeout)
    when['Offset'] = time

    mask, _ = \
        get_constant_from_db_and_time(getattr(Detectors, db_module),
                                      Constants.jungfrau.BadPixelsDark(),
                                      condition,
                                      np.zeros((1024, 512, 1, 3)),
                                      cal_db_interface,
                                      creation_time=creation_time,
                                      timeout=cal_db_timeout)
    when['BadPixels'] = time

    gain_map, _ = \
        get_constant_from_db_and_time(getattr(Detectors, db_module),
                                      Constants.jungfrau.RelativeGain(),
                                      condition,
                                      None,
                                      cal_db_interface,
                                      creation_time=creation_time,
                                      timeout=cal_db_timeout)
    when['Gain'] = time

    # move from x,y,cell,gain to cell,x,y,gain
    offset_map = np.squeeze(offset_map)
    mask = np.squeeze(mask)
    if memoryCells > 1:
        offset_map = np.moveaxis(np.moveaxis(offset_map, 0, 2), 0, 2)
        mask = np.moveaxis(np.moveaxis(mask, 0, 2), 0, 2)

    if gain_map is not None:
        if memoryCells > 1:
            gain_map = np.moveaxis(np.moveaxis(gain_map, 0, 2), 0, 1)
        else:
            gain_map = np.squeeze(gain_map)
            gain_map = np.moveaxis(gain_map, 1, 0)

    return offset_map, mask, gain_map, db_module, when


 # Retrieve Offset, BadPixels and gain constants for a JungFrau module.
 # Run ip Cluster parallelization over modules
 p = partial(get_constant_for_module, condition, cal_db_interface,
            creation_time, cal_db_timeout, memoryCells)

 r = view.map_sync(p, db_module)
 #r = list(map(p, db_module))

 constants = {}
 for rr in r:
    offset_map, mask, gain_map, db_mod, when = rr
    print(f'Constants for module {db_mod}:')
    for const in when:
        print(f'{const} injected at {when[const]}')
    if gain_map is None:
        print("No gain map found")
        no_relative_gain = True
    constants[db_mod] = (offset_map, mask, gain_map)
 ```

 %% Cell type:code id: tags:

 ``` python
 def copy_and_sanitize_non_cal_data(infile, outfile, h5base):
    """ Copy and sanitize data in `infile` that is not touched by `correctLPD`
    """

    if h5base.startswith("/"):
        h5base = h5base[1:]
    dont_copy = ["adc", ]
    dont_copy = [h5base+"/{}".format(do)
                 for do in dont_copy]

    def visitor(k, item):
        if k not in dont_copy:
            if isinstance(item, h5py.Group):
                outfile.create_group(k)
            elif isinstance(item, h5py.Dataset):
                group = str(k).split("/")
                group = "/".join(group[:-1])
                infile.copy(k, outfile[group])

    infile.visititems(visitor)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Correct a chunk of images for offset and gain
 def correct_chunk(offset_map, mask, gain_map, memoryCells, no_relative_gain, inp):
    import numpy as np
    import copy
    import h5py

    fim_data = None
    gim_data = None
    rim_data = None
    msk_data = None
    err = ''

    try:
        d, g, m, ind, copy_sample = inp
        g[g==3] = 2

        if copy_sample and ind==0:
            if memoryCells==1:
                rim_data = np.squeeze(copy.copy(d))
            else:
                rim_data = np.squeeze(copy.copy(d[:,0,...]))

        # Select memory cells
        if memoryCells>1:
            m[m>16] = 0
            offset_map_cell = offset_map[m,...]
            mask_cell = mask[m,...]
        else:
            offset_map_cell = offset_map
            mask_cell = mask

        # Offset correction
        offset = np.choose(g, (offset_map_cell[...,0], offset_map_cell[...,1], offset_map_cell[...,2]))
        d -= offset

        # Gain correction
        if not no_relative_gain:
            if memoryCells>1:
                gain_map_cell = gain_map[m,...]
            else:
                gain_map_cell = gain_map
            cal = np.choose(g, (gain_map_cell[..., 0], gain_map_cell[..., 1], gain_map_cell[..., 2]))
            d /= cal

        msk = np.choose(g, (mask_cell[...,0], mask_cell[...,1], mask_cell[...,2]))

        # Store sample of data for plotting
        if copy_sample and ind==0:
            if memoryCells==1:
                fim_data = np.squeeze(copy.copy(d))
                gim_data = np.squeeze(copy.copy(g))
                msk_data = np.squeeze(copy.copy(msk))
            else:
                fim_data = np.squeeze(copy.copy(d[:,1,...]))
                gim_data = np.squeeze(copy.copy(g[:,1,...]))
                msk_data = np.squeeze(copy.copy(msk[:,1,...]))

    except Exception as e:
        err = e

    return ind, d, msk, rim_data, fim_data, gim_data, msk_data, err

 fim_data = {}
 gim_data = {}
 rim_data = {}
 msk_data = {}

 # Loop over modules
 for i, key in enumerate(mapped_files):
    # Loop over sequences for given module
    for k, f in enumerate(list(mapped_files[key].queue)):

        offset_map, mask, gain_map = constants[db_module[i]]
        h5path_f = h5path.format(int(karabo_da[i][-2:]))

        with h5py.File(f, 'r') as infile:

            # The processed files are saved here in a folder with the run name.
            out_file = "{}/{}".format(out_folder, f.split("/")[-1])
            out_file = out_file.replace("RAW", "CORR")
            print(f'Process file: {f}, with path {h5path_f}')
            try:
                with h5py.File(out_file, "w") as ofile:
                    copy_and_sanitize_non_cal_data(infile, ofile, h5path_f)

                    oshape = infile[h5path_f+"/adc"].shape
                    print(f'Data shape: {oshape}')
                    if not oshape[0]:
                        raise ValueError(f"No image data: shape {oshape}")
                    ddset = ofile.create_dataset(h5path_f+"/adc",
                                                 oshape,
                                                 chunks=(chunk_size_idim, 1, oshape[1], oshape[2]),
                                                 dtype=np.float32)

                    mskset = ofile.create_dataset(h5path_f+"/mask",
                                                  oshape,
                                                  chunks=(chunk_size_idim, 1, oshape[1], oshape[2]),
                                                  dtype=np.uint32,
                                                  compression="gzip", compression_opts=1, shuffle=True)

                    # Run ip Cluster parallelization over chunks of images
                    inp = []
                    max_ind = oshape[0]
                    ind = 0

                    # If chunk size is not given maximum 12+1 chunks is expected
                    if chunk_size == 0:
                        chunk_size = max_ind//12
                        print(f'Chunk size: {chunk_size}')

                    ts = time.time()
                    while ind<max_ind:
                        d = infile[h5path_f+"/adc"][ind:ind+chunk_size,...].astype(np.float32)
                        g = infile[h5path_f+"/gain"][ind:ind+chunk_size,...]
                        if h5path_f+"/memoryCell" in infile:
                            m = infile[h5path_f+"/memoryCell"][ind:ind+chunk_size,...]
                        else:
                            m = None
                        print(f'To process: {d.shape}')
                        inp.append((d,g,m, ind, k==0))
                        ind += chunk_size

                    print('Preparation time: ', time.time() - ts)
                    ts = time.time()

                    print(f'Run {len(inp)} processes')
                    p = partial(correct_chunk, offset_map, mask, gain_map, memoryCells, no_relative_gain)

                    r = view.map_sync(p, inp)
                    #r = list(map(p, inp))
                    if k==0:
                        (_,_,_,
                         rim_data[db_module[i]], fim_data[db_module[i]],
                         gim_data[db_module[i]], msk_data[db_module[i]], _) = r[0]

                    print('Correction time: ', time.time() - ts)
                    ts = time.time()

                    for rr in r:
                        ind, cdata, cmask, _,_,_,_, err = rr
                        data_size = cdata.shape[0]
                        ddset[ind:ind+data_size,...] = cdata
                        mskset[ind:ind+data_size,...] = cmask
                        if err != '':
                            print(f'Error: {err}')

                    print('Saving time: ', time.time() - ts)
            except Exception as e:
                print(f"Error: {e}")
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis, title):
    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1,:], extent=extent, aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(data)))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_title(title)
    cb = fig.colorbar(im)
    cb.set_label("Counts")

 ```

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    h, ex, ey = np.histogram2d(rim_data[mod].flatten(), gim_data[mod].flatten(),
                               bins=[100, 4], range=[[0, 10000], [0,4]])
    do_2d_plot(h, (ex, ey), "Signal (ADU)", "Gain Bit Value", f'Module {mod}')
 ```

 %% Cell type:markdown id: tags:

 ### Mean RAW Preview ###

 The per pixel mean of the sequence file of RAW data

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    fig = plt.figure(figsize=(20,10))
    ax = fig.add_subplot(111)
    im = ax.imshow(np.mean(rim_data[mod],axis=0),
                   vmin=min(0.75*np.median(rim_data[mod][rim_data[mod] > 0]), 2000),
                   vmax=max(1.5*np.median(rim_data[mod][rim_data[mod] > 0]), 16000), cmap="jet")
    ax.set_title(f'Module {mod}')
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Mean CORRECTED Preview ###

 The per pixel mean of the sequence file of CORR data

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    fig = plt.figure(figsize=(20,10))
    ax = fig.add_subplot(111)
    im = ax.imshow(np.mean(fim_data[mod],axis=0),
                   vmin=min(0.75*np.median(fim_data[mod][fim_data[mod] > 0]), -0.5),
                   vmax=max(2.*np.median(fim_data[mod][fim_data[mod] > 0]), 100), cmap="jet")
    ax.set_title(f'Module {mod}', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Single Train Preview ###

 A single image from the first train

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    fig = plt.figure(figsize=(20,10))
    ax = fig.add_subplot(111)
    im = ax.imshow(fim_data[mod][0,...],
                   vmin=min(0.75*np.median(fim_data[mod][0,...]), -0.5),
                   vmax=max(2.*np.median(fim_data[mod][0,...]), 100), cmap="jet")
    ax.set_title(f'Module {mod}', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ## Signal Distribution ##

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    fig = plt.figure(figsize=(20,10))
    ax = fig.add_subplot(211)
    h = ax.hist(fim_data[mod].flatten(), bins=1000, range=(-100, 1000), log=True)
    l = ax.set_xlabel("Signal (keV)")
    l = ax.set_ylabel("Counts")
    _ = ax.set_title(f'Module {mod}')

    ax = fig.add_subplot(212)
    h = ax.hist(fim_data[mod].flatten(), bins=1000, range=(-1000, 10000), log=True)
    l = ax.set_xlabel("Signal (keV)")
    l = ax.set_ylabel("Counts")
    _ = ax.set_title(f'Module {mod}')
 ```

 %% Cell type:markdown id: tags:

 ### Maximum GAIN Preview ###

 The per pixel maximum of the first train of the GAIN data

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    fig = plt.figure(figsize=(20,10))
    ax = fig.add_subplot(111)
    im = ax.imshow(np.max(gim_data[mod], axis=0), vmin=0,
                   vmax=3, cmap="jet")
    ax.set_title(f'Module {mod}', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, f"{item.value:016b}"))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:markdown id: tags:

 ### Single Image Bad Pixels ###

 A single image bad pixel map for the first image of the first train

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    fig = plt.figure(figsize=(20,10))
    ax = fig.add_subplot(111)
    im = ax.imshow(np.log2(msk_data[mod][0,...]), vmin=0, vmax=32, cmap="jet")
    ax.set_title(f'Module {mod}', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```
No results found