Compare revisions

Cyril Danilevski · Cyril Danilevski · Cyril Danilevski · Cyril Danilevski · Karim Ahmed · Karim Ahmed
--- a/cal_tools/cal_tools/agipdlib.py
+++ b/cal_tools/cal_tools/agipdlib.py
@@ -58,10 +58,10 @@ def get_acq_rate(fast_paths: Tuple[str, str, int],
        with h5py.File(slow_data_file, "r") as fin:
            if slow_data_path in fin:
                # The acquisition rate value is stored in a 1D array of type
-                # float. Use the 3rd value, arbitrarily chosen. It's okay to
-                # loose precision here because the usage is about defining the
-                # rate for meta-data.
-                return round(fin[slow_data_path][3], 1)
+                # float. Use the 3rd value, arbitrarily chosen.
+                # It is desired to loose precision here because the usage is
+                # about bucketing the rate for managing meta-data.
+                return round(float(fin[slow_data_path][3]), 1)

    # Compute acquisition rate from fast data
    fast_data_file, karabo_id, module = fast_paths
@@ -209,6 +209,8 @@ class AgipdCorrections:
        self.baseline_corr_noise_threshold = -1000
        self.snow_resolution = SnowResolution.INTERPOLATE
        self.cm_dark_fraction = 0.66
+        self.cm_dark_min = -25.
+        self.cm_dark_max = 25.
        self.cm_n_itr = 4
        self.mg_hard_threshold = 100
        self.hg_hard_threshold = 100
@@ -271,7 +273,7 @@ class AgipdCorrections:
            f = h5py.File(file_name, 'r')
            group = f[data_path]

-            valid, first_index, last_index, valid_trains, valid_indices = \
+            _, first_index, last_index, __, valid_indices = \
                self.get_valid_image_idx(idx_base, f)
            firange = self.gen_valid_range(first_index, last_index,
                                           self.max_cells, agipd_base, f,
@@ -342,7 +344,8 @@ class AgipdCorrections:
                arr = data_dict[field][:n_img]
                kw = {'fletcher32': True}
                if field in compress_fields:
-                    kw.update(compression='gzip', compression_opts=1, shuffle=True)
+                    kw.update(compression='gzip', compression_opts=1,
+                              shuffle=True)
                if arr.ndim > 1:
                    kw['chunks'] = (1,) + arr.shape[1:]  # 1 chunk = 1 image

@@ -364,17 +367,18 @@ class AgipdCorrections:

        if not self.corr_bools.get("common_mode"):
            return
-
+        dark_min = self.cm_dark_min
+        dark_max = self.cm_dark_max
        fraction = self.cm_dark_fraction
        n_itr = self.cm_n_itr
-
+        
        cell_id = self.shared_dict[i_proc]['cellId']
        train_id = self.shared_dict[i_proc]['trainId']
        n_img = self.shared_dict[i_proc]['nImg'][0]
        cell_ids = cell_id[train_id == train_id[0]]
        n_cells = cell_ids.size
-        data = self.shared_dict[i_proc]['data'][:n_img].reshape(-1, n_cells, 8,
-                                                                64, 2, 64)
+        data = self.shared_dict[i_proc]['data'][:n_img].reshape(-1, n_cells,
+                                                                8, 64, 2, 64)

        # Loop over iterations
        for i in range(n_itr):
@@ -388,7 +392,8 @@ class AgipdCorrections:

                # Cell common mode
                cell_cm_sum, cell_cm_count = \
-                    calgs.sum_and_count_in_range_cell(asic_data, -25., 25.)
+                    calgs.sum_and_count_in_range_cell(asic_data, dark_min,
+                                                      dark_max)
                cell_cm = cell_cm_sum / cell_cm_count

                cell_cm[cell_cm_count < fraction * 32 * 256] = 0
@@ -396,7 +401,8 @@ class AgipdCorrections:

                # Asics common mode
                asic_cm_sum, asic_cm_count = \
-                    calgs.sum_and_count_in_range_asic(asic_data, -25., 25.)
+                    calgs.sum_and_count_in_range_asic(asic_data, dark_min,
+                                                      dark_max)
                asic_cm = asic_cm_sum / asic_cm_count

                asic_cm[asic_cm_count < fraction * 64 * 64] = 0
@@ -513,6 +519,7 @@ class AgipdCorrections:
                # if not we continue with initial data
                else:
                    dd = data[i]
+                    sh = 0

                # if we have enough pixels in medium or low gain and
                # correction via hist matching is requested to this now
@@ -560,9 +567,12 @@ class AgipdCorrections:
            data[gain == 1] += mgbc[gain == 1]
            del mgbc

-        # Do xray correction if requested
+        # Do xray correction if requested 
+        # The slopes we have in our constants are already relative
+        # slopeFF = slopeFFpix/avarege(slopeFFpix)
+        # To apply them we have to / not *
        if self.corr_bools.get("xray_corr"):
-            data *= self.xray_cor[module_idx]
+            data /= self.xray_cor[module_idx]

        if self.corr_bools.get('melt_snow'):
            melt_snowy_pixels(raw_data, data, gain, rgain,
@@ -751,8 +761,8 @@ class AgipdCorrections:
            uq, fidxv, cntsv = np.unique(trains, return_index=True,
                                         return_counts=True)

-            # Validate calculated CORR INDEX contents by checking difference between
-            # trainId stored in RAW data and trains from
+            # Validate calculated CORR INDEX contents by checking 
+            # difference between trainId stored in RAW data and trains from
            train_diff = np.isin(np.array(infile["/INDEX/trainId"]), uq,
                                 invert=True)

@@ -837,43 +847,38 @@ class AgipdCorrections:
        to medium gain is used, as no reliable CI data for all memory cells
        exists of the current AGIPD instances.

-        Relative gain is derived both from pulse capacitor as well as flat
-        field data:
-
-        * from the pulse capacitor data we get the relative slopes of a
-          given pixel's memory cells with respect to all memory cells of that
-          pixel:
+        Relative gain is derived both from pulse capacitor as well as low
+        intensity flat field data, information from flat field data is 
+        needed to 'calibrate' pulse capacitor data, if there is no 
+        available FF data, relative gain for High Gain stage is set to 1:

-             rpch = m_h / median(m_h)
+        * Relative gain for High gain stage - from the FF data we get 
+          the relative slopes of a given pixel and memory cells with 
+          respect to all memory cells and all pixels in the module,
+          Please note: Current slopesFF avaialble in calibibration
+          constants are created per pixel only, not per memory cell:

-          where m_h is the high gain slope m of each memory cell of the pixel.
+             rel_high_gain = 1 if only PC data is available
+             rel_high_gain = rel_slopesFF if FF data is also available

-        * we also derive the factor between high and medium gain in a
-          similar way and scale it to be relative to the pixels memory cells:
+          

-             fpc = m_m/m_h
-             rfpc = fpc/ median(fpc)
+        * Relative gain for Medium gain stage: we derive the factor
+          between high and medium gain using slope information from
+          fits to the linear part of high and medium gain:

-          where m_m is the medium gain slope of all memory cells of a given
-          pixel and m_h is the high gain slope as above
+             rfpc_high_medium = m_h/m_m          

-        * finally, we get the relative X-ray gain of all memory cells for a
-          given pixel from flat field data:
-
-             ff = median(m_ff)
-             ff /= median(ff)
-
-          where m_ff is the flat field derived (high gain) slope of all
-          memory cells of a given pixel. The pixel values are then scaled to
-          the complete module_idx. Note that the first 32 memory cells are known
-          to exhibit differing behaviour and are skipped if possible.
+          where m_h and m_m is the medium gain slope of given memory cells 
+          and pixel and m_h is the high gain slope as above
+           rel_gain_medium = rel_high_gain * rfpc_high_medium

        With this data the relative gain for the three gain stages evaluates
        to:

-            high gain = ff * rpch
-            medium gain = ff * rfpc
-            low gain = medium gain / 4.48
+            rel_high gain = 1 or rel_slopesFF
+            rel_medium gain = rel_high_gain * rfpc_high_medium
+            rel_low gain = _rel_medium gain * 4.48

        :param cons_data: A dictionary for each retrieved constant value.
        :param module_idx: A module_idx index
@@ -910,8 +915,16 @@ class AgipdCorrections:
            else:
                xray_cor = np.squeeze(slopesFF[..., 0])

-            # relative X-ray correction is normalized by the median of all pixels
-            xray_cor /= np.nanmedian(xray_cor)
+            # relative X-ray correction is normalized by the median
+            # of all pixels
+
+            # TODO: A check is required to know why it is again divided by 
+            # median. If we have relative slopes in the constants
+            # and (we have!) 
+            # xray cor = (slopeFF/avarege_slopeFF)/avarege_slopeFF.
+            # It didn't not make sense and was removed. 
+            # xray_cor /= np.nanmedian(xray_cor)
+
            self.xray_cor[module_idx][...] = xray_cor.transpose()[...]

        # add additional bad pixel information
@@ -921,7 +934,7 @@ class AgipdCorrections:

            slopesPC = cons_data["SlopesPC"].astype(np.float32)

-            # this will handle some historical data in a different format
+            # This will handle some historical data in a different format
            # constant dimension injected first
            if slopesPC.shape[0] == 10 or slopesPC.shape[0] == 11:
                slopesPC = np.moveaxis(slopesPC, 0, 3)
@@ -936,23 +949,38 @@ class AgipdCorrections:
            pc_med_m = slopesPC[..., :self.max_cells, 3]
            pc_med_l = slopesPC[..., :self.max_cells, 4]

-            # calculate ratio high to medium gain over memory cells
+            # calculate ratio high to medium gain 
            pc_high_ave = np.nanmean(pc_high_m, axis=(0,1))
            pc_med_ave = np.nanmean(pc_med_m, axis=(0,1))
+            # ration between HG and MG per pixel per mem cell 
+            # used for rel gain calculation
+            frac_high_med_pix = pc_high_m / pc_med_m
+            # avarage ration between HG and MG as a function of 
+            # mem cell (needed for bls_stripes)
+            # TODO: Per pixel would be more optimal correction
            frac_high_med = pc_high_ave / pc_med_ave
-
            # calculate additional medium-gain offset
            md_additional_offset = pc_high_l - pc_med_l * pc_high_m / pc_med_m

-            # Calculate relative gain
-            rel_gain[..., 0] = pc_high_m / pc_high_ave
-            rel_gain[..., 1] = pc_med_m / pc_med_ave * frac_high_med
+            # Calculate relative gain. If FF constants are available,
+            # use them for high gain
+            # if not rel_gain is calculated using PC data only
+            # if self.corr_bools.get("xray_corr"):
+            #     rel_gain[..., :self.max_cells, 0] /= xray_corr 
+
+            # PC data should be 'calibrated with X-ray data, 
+            # if it is not done, it is better to use 1 instead of bias 
+            # the results with PC arteffacts. 
+            # rel_gain[..., 0] = 1./(pc_high_m / pc_high_ave)
+
+            # High-gain (rel_gain[..., 0]) stays the same
+            rel_gain[..., 1] = rel_gain[..., 0] * frac_high_med_pix
            rel_gain[..., 2] = rel_gain[..., 1] * 4.48

            self.md_additional_offset[module_idx][...] = md_additional_offset.transpose()[...]  # noqa
            self.rel_gain[module_idx][...] = rel_gain[...].transpose()
            self.frac_high_med[module_idx][...] = frac_high_med
-
+            
        self.mask[module_idx][...] = bpixels.transpose()[...]

        return

--- a/cal_tools/cal_tools/agipdutils.py
+++ b/cal_tools/cal_tools/agipdutils.py
@@ -141,7 +141,7 @@ def baseline_correct_via_stripe(d, g, m, frac_high_med):
    if len(idx) < 3:
        return d, 0

-    shift = np.nanmean(dd[idx, :])
+    shift = np.nanmedian(dd[idx, :])
    d[g == 0] -= shift
    d[g == 1] -= shift / frac_high_med
    return d, shift

--- a/cal_tools/cal_tools/tools.py
+++ b/cal_tools/cal_tools/tools.py
@@ -2,8 +2,9 @@ from collections import OrderedDict
 import datetime
 from glob import glob
 import json
-from os import environ, listdir, path, stat
-from os.path import isfile, splitext
+from os import environ, listdir, path
+from os.path import isfile
+from pathlib import Path
 from queue import Queue
 import re
 from time import sleep
@@ -11,6 +12,7 @@ from typing import Optional
 from urllib.parse import urljoin

 import dateutil.parser
+import h5py
 import ipykernel
 from metadata_client.metadata_client import MetadataClient
 from notebook.notebookapp import list_running_servers
@@ -229,47 +231,57 @@ def get_run_info(proposal, run):


 def get_dir_creation_date(directory: str, run: int,
-                          tsdir: Optional[bool] = False,
-                          verbosity: Optional[int] = 0):
+                          verbosity: Optional[int] = 0) -> datetime.datetime:
    """
-    Return run starting time from the MDC.
-    If not succeeded, return modification time of oldest file.h5
-    in [directory]/[run]04.
+    Return run start time from MyDC.
+    If not available from MyMDC, retrieve the data from the dataset's metadata
+    in [directory]/[run] or, if the dataset is older than 2020, from the
+    directory's creation time.
+
+    If the data is not available from either source, this function will raise a
+    ValueError.

    :param directory: path to directory which contains runs
    :param run: run number
-    :param tsdir: to get modification time of [directory]/[run]04.
    :param verbosity: Level of verbosity (0 - silent)
    :return: (datetime) modification time
+
    """
+    directory = Path(directory)
+    proposal = int(directory.parent.name[1:])

    try:
-        path_list = list(filter(None, directory.strip('/').split('/')))
-        proposal = int(path_list[-2][1:])
        run_info = get_run_info(proposal, run)
        return dateutil.parser.parse(run_info['begin_at'])
    except Exception as e:
        if verbosity > 0:
            print(e)

+    directory = directory / 'r{:04d}'.format(run)
+
+    # Loop a number of times to catch stale file handle errors, due to
+    # migration or gpfs sync.
    ntries = 100
    while ntries > 0:
        try:
-            if tsdir:
-                creation_time = stat("{}/r{:04d}".format(directory, run)).st_mtime
-            else:
-                rfiles = glob("{}/r{:04d}/*.h5".format(directory, run))
-                rfiles.sort(key=path.getmtime)
-                creation_time = stat(rfiles[0]).st_mtime
-
-            creation_time = datetime.datetime.fromtimestamp(creation_time)
-            return creation_time
-        except:  # catch stale file handle errors etc and try again
+            dates = []
+            for f in directory.glob('*.h5'):
+                with h5py.File(f, 'r') as fin:
+                    cdate = fin['METADATA/creationDate'][0].decode()
+                    cdate = datetime.datetime.strptime(cdate, "%Y%m%dT%H%M%SZ")
+                    dates.append(cdate)
+            return min(dates)
+        except (IOError, ValueError):
            ntries -= 1
+        except KeyError:  # The files are here, but it's an older dataset
+            return datetime.datetime.fromtimestamp(directory.stat().st_ctime)
+
+    msg = 'Could not get the creation time from the directory'
+    raise ValueError(msg, directory)


 def save_const_to_h5(device, constant, condition, data,
-                    file_loc, creation_time, out_folder):
+                     file_loc, creation_time, out_folder):
    """
    Save constant in h5 file with its metadata
    (e.g. db_module, condition, creation_time)
@@ -280,7 +292,7 @@ def save_const_to_h5(device, constant, condition, data,
    :type constant: iCalibrationDB.know_constants object
    :param condition: Calibration condition
    :type condition: iCalibrationDB.know_detector_conditions object
-    :param data: Constant data to save 
+    :param data: Constant data to save
    :type data: ndarray
    :param file_loc: Location of raw data "proposal:{} runs:{} {} {}"
    :type file_loc: str

--- a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
+++ b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
 %% Cell type:markdown id: tags:

 # AGIPD Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/MID/202030/p900137/raw" # the folder to read data from, required
 out_folder = "/gpfs/exfel/exp/MID/202030/p900137/scratch/karnem/r449_v06"  # the folder to output to, required
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 run = 449 # runs to process, required

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information
 karabo_id_control = "MID_IRU_AGIPD1M1" # karabo-id for control device
 karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milli seconds
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants

 max_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300 # Bias voltage
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
 photon_energy = 9.2 # photon energy in keV
 overwrite = True # set to True if existing data should be overwritten
 max_pulses = [0, 500, 1] # range list [st, end, step] of maximum pulse indices within a train. 3 allowed maximum list input elements.
 mem_cells_db = 0 # set to a value different than 0 to use this value for DB queries
 cell_id_preview = 1 # cell Id used for preview in single-shot plots

 # Correction parameters
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
-chunk_size_idim = 1  # chunking size of imaging dimension, adjust if user software is sensitive to this.
+cm_dark_fraction = 0.66 # threshold for fraction of  empty pixels to consider module enough dark to perform CM correction
+cm_dark_range = [-50.,30] # range for signal value ADU for pixel to be consider as a dark pixel
+cm_n_itr = 4 # number of iterations for common mode correction
 hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
 mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
 noisy_adc_threshold = 0.25 # threshold to mask complete adc
-cm_dark_fraction = 0.66 # threshold for empty pixels to consider module enough dark to perform CM correction
-cm_n_itr = 4 # number of iterations for common mode correction

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 zero_nans = False # set NaN values in corrected data to 0
 zero_orange = False # set to 0 very negative and very large values in corrected data
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
 corr_asic_diag = False # if set, diagonal drop offs on ASICs are correted
-force_hg_if_below = True # set high gain if mg offset subtracted value is below hg_hard_threshold
-force_mg_if_below = True # set medium gain if mg offset subtracted value is below mg_hard_threshold
+force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold
+force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold
 mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
-common_mode = True # Common mode correction
+common_mode = False # Common mode correction
 melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
 mask_zero_std = False # Mask pixels with zero standard deviation across train
-low_medium_gap = True # 5% separation in thresholding between low and medium gain
+low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain

 # Paralellization parameters
-sequences_per_node = 2 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
 chunk_size = 1000 # Size of chunk for image-weise correction
+chunk_size_idim = 1  # chunking size of imaging dimension, adjust if user software is sensitive to this.
 n_cores_correct = 16 # Number of chunks to be processed in parallel
 n_cores_files = 4 # Number of files to be processed in parallel
+sequences_per_node = 2 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import copy
 from datetime import timedelta
 from dateutil import parser
 import gc
 import glob
 import itertools
 from IPython.display import HTML, display, Markdown, Latex
 import math
 from multiprocessing import Pool
 import os
 import re
 import sys
 import traceback
 from time import time, sleep, perf_counter
 import tabulate
 import warnings
 warnings.filterwarnings('ignore')
 import yaml

 from extra_geom import AGIPD_1MGeometry
 from extra_data import RunDirectory, stack_detector_data
 from iCalibrationDB import Detectors
 from mpl_toolkits.mplot3d import Axes3D
 from matplotlib.ticker import LinearLocator, FormatStrFormatter
 from matplotlib.colors import LogNorm
 from matplotlib import cm as colormap
 import matplotlib.pyplot as plt
 import matplotlib
 matplotlib.use("agg")
 %matplotlib inline
 import numpy as np
+import seaborn as sns
+sns.set()
+sns.set_context("paper", font_scale=1.4)
+sns.set_style("ticks")

-from cal_tools.agipdlib import (AgipdCorrections, get_num_cells, get_acq_rate, get_gain_setting)
+from cal_tools.agipdlib import (AgipdCorrections, get_acq_rate,
+                                get_gain_setting, get_num_cells)
 from cal_tools.cython import agipdalgs as calgs
 from cal_tools.ana_tools import get_range
 from cal_tools.enums import BadPixels
 from cal_tools.tools import get_dir_creation_date, map_modules_from_folder
 from cal_tools.step_timing import StepTimer
 ```

 %% Cell type:markdown id: tags:

 ## Evaluated parameters ##

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the herarichy and dependability for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["zero_nans"] = zero_nans
    corr_bools["zero_orange"] = zero_orange
    corr_bools["mask_noisy_adc"] = mask_noisy_adc
    corr_bools["force_hg_if_below"] = force_hg_if_below
    corr_bools["force_mg_if_below"] = force_mg_if_below
    corr_bools["common_mode"] = common_mode
    corr_bools["melt_snow"] = melt_snow
    corr_bools["mask_zero_std"] = mask_zero_std
    corr_bools["low_medium_gap"] = low_medium_gap

 ```

 %% Cell type:code id: tags:

 ``` python
 if in_folder[-1] == "/":
    in_folder = in_folder[:-1]
 if sequences[0] == -1:
    sequences = None

 control_fname = f'{in_folder}/r{run:04d}/RAW-R{run:04d}-{karabo_da_control}-S00000.h5'
 h5path_ctrl = h5path_ctrl.format(karabo_id_control)
 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)

 print(f'Path to control file {control_fname}')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create output folder
 os.makedirs(out_folder, exist_ok=overwrite)

 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
 else:
    dinstance = "AGIPD1M2"

 # Evaluate requested modules
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(16))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 def mod_name(modno):
    return f"Q{modno // 4 + 1}M{modno % 4 + 1}"

 print("Process modules: ", ', '.join(
    [mod_name(x) for x in modules]))
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Display Information about the selected pulses indices for correction.
 pulses_lst = list(range(*max_pulses)) if not (len(max_pulses)==1 and max_pulses[0]==0) else max_pulses

 try:
    if len(pulses_lst) > 1:
        print("A range of {} pulse indices is selected: from {} to {} with a step of {}"
               .format(len(pulses_lst), pulses_lst[0] , pulses_lst[-1] + (pulses_lst[1] - pulses_lst[0]),
                       pulses_lst[1] - pulses_lst[0]))
    else:
        print("one pulse is selected: a pulse of idx {}".format(pulses_lst[0]))
 except Exception as e:
    raise ValueError('max_pulses input Error: {}'.format(e))
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mmf = map_modules_from_folder(in_folder, run, path_template,
                              karabo_da, sequences)
 mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf
 file_list = []

 # ToDo: Split table over pages
 print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
 table = []
 ti = 0
 for k, files in mapped_files.items():
    i = 0
    for f in list(files.queue):
        file_list.append(f)
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["#", "module", "# module", "file"])))
 file_list = sorted(file_list, key=lambda name: name[-10:])
 ```

 %% Cell type:code id: tags:

 ``` python
 filename = file_list[0]
 channel = int(re.findall(r".*-AGIPD([0-9]+)-.*", filename)[0])

 # Evaluate number of memory cells
 mem_cells = get_num_cells(filename, karabo_id, channel)
 if mem_cells is None:
    raise ValueError(f"No raw images found in {filename}")

 mem_cells_db = mem_cells if mem_cells_db == 0 else mem_cells_db
 max_cells = mem_cells if max_cells == 0 else max_cells

 # Evaluate aquisition rate
 if acq_rate == 0:
    acq_rate = get_acq_rate((filename, karabo_id, channel))
 else:
    acq_rate = None

 print(f"Maximum memory cells to calibrate: {max_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate creation time
 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    offset = parser.parse(creation_date_offset)
    delta = timedelta(hours=offset.hour,
                      minutes=offset.minute, seconds=offset.second)
    creation_time += delta

 # Evaluate gain setting
 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
        except Exception as e:
            print(f'Error while reading gain setting from: \n{control_fname}')
            print(e)
            print("Set gain settion to 0")
            gain_setting = 0

 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Using {creation_time} as creation time")
 print(f"Operating conditions are:\n• Bias voltage: {bias_voltage}\n• Memory cells: {mem_cells_db}\n"
              f"• Acquisition rate: {acq_rate}\n• Gain setting: {gain_setting}\n• Photon Energy: {photon_energy}\n")
 ```

 %% Cell type:markdown id: tags:

 ## Data processing ##

 %% Cell type:code id: tags:

 ``` python
 agipd_corr = AgipdCorrections(max_cells, max_pulses,
                              h5_data_path=h5path,
                              h5_index_path=h5path_idx,
                              corr_bools=corr_bools)

 agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
 agipd_corr.hg_hard_threshold = hg_hard_threshold
 agipd_corr.mg_hard_threshold = mg_hard_threshold

+agipd_corr.cm_dark_min = cm_dark_range[0]
+agipd_corr.cm_dark_max = cm_dark_range[1]
 agipd_corr.cm_dark_fraction = cm_dark_fraction
 agipd_corr.cm_n_itr = cm_n_itr
 agipd_corr.noisy_adc_threshold = noisy_adc_threshold
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve calibration constants to RAM
 agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))

 const_yaml = None
 if os.path.isfile(f'{out_folder}/retrieved_constants.yml'):
    with open(f'{out_folder}/retrieved_constants.yml', "r") as f:
        const_yaml = yaml.load(f.read(), Loader=yaml.FullLoader)

 # retrive constants
 def retrieve_constants(mod):
    """
    Retrieve calibration constants and load them to shared memory

    Metadata for constants is taken from yml file or retrieved from the DB
    """
    device = getattr(getattr(Detectors, dinstance), mod_name(mod))
    err = ''
    try:
        # check if there is a yaml file in out_folder that has the device constants.
        if const_yaml and device.device_name in const_yaml:
            when = agipd_corr.initialize_from_yaml(const_yaml, mod, device)
        else:
            when = agipd_corr.initialize_from_db(cal_db_interface, creation_time, mem_cells_db, bias_voltage,
                                                 photon_energy, gain_setting, acq_rate, mod, device, False)
    except Exception as e:
        err = f"Error: {e}\nError traceback: {traceback.format_exc()}"
        when = None
    return err, mod, when, device.device_name


 ts = perf_counter()
 with Pool(processes=16) as pool:
    const_out = pool.map(retrieve_constants, modules)
 print(f"Constants were loaded in {perf_counter()-ts:.01f}s")
 ```

 %% Cell type:code id: tags:

 ``` python
 # allocate memory for images and hists
 n_images_max = max_cells*256
 data_shape = (n_images_max, 512, 128)
 agipd_corr.allocate_images(data_shape, n_cores_files)
 ```

 %% Cell type:code id: tags:

 ``` python
 def batches(l, batch_size):
    """Group a list into batches of (up to) batch_size elements"""
    start = 0
    while start < len(l):
        yield l[start:start + batch_size]
        start += batch_size
 ```

 %% Cell type:code id: tags:

 ``` python
 def imagewise_chunks(img_counts):
    """Break up the loaded data into chunks of up to chunk_size

    Yields (file data slot, start index, stop index)
    """
    for i_proc, n_img in enumerate(img_counts):
        n_chunks = math.ceil(n_img / chunk_size)
        for i in range(n_chunks):
            yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id: tags:

 ``` python
 with Pool() as pool:
    for file_batch in batches(file_list, n_cores_files):
        # TODO: Move some printed output to logging or similar
        print(f"Processing next {len(file_batch)} files:")
        for file_name in file_batch:
            print(" ", file_name)
        step_timer.start()

        img_counts = pool.starmap(agipd_corr.read_file, enumerate(file_batch))
        step_timer.done_step('Load')

        # Evaluate zero-data-std mask
        pool.starmap(agipd_corr.mask_zero_std, itertools.product(
            range(len(file_batch)), np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
        ))
        step_timer.done_step('Mask 0 std')

        # Perform image-wise correction
        pool.starmap(agipd_corr.correct_agipd, imagewise_chunks(img_counts))
        step_timer.done_step("Image-wise correction")

        # Perform cross-file correction parallel over asics
        pool.starmap(agipd_corr.cm_correction, itertools.product(
            range(len(file_batch)), range(16)  # 16 ASICs per module
        ))
        step_timer.done_step("Common-mode correction")


        # Save corrected data
        pool.starmap(agipd_corr.write_file, [
            (i_proc, file_name, os.path.join(out_folder, os.path.basename(file_name).replace("RAW", "CORR")))
            for i_proc, file_name in enumerate(file_batch)
        ])
        step_timer.done_step("Save")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Correction of {len(file_list)} files is finished")
 print(f"Total processing time {step_timer.timespan():.01f} s")
 print(f"Timing summary per batch of {n_cores_files} files:")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 # if there is a yml file that means a leading notebook got processed
 # and the reporting would be generated from it.
 fst_print = True

 to_store = []
 line = []
 for i, (error, modno, when, mod_dev) in enumerate(const_out):
    qm = mod_name(modno)
    # expose errors while applying correction
    if error:
        print("Error: {}".format(error) )

    if not const_yaml or mod_dev not in const_yaml:
        if fst_print:
            print("Constants are retrieved with creation time: ")
            fst_print = False

        line = [qm]

        # If correction is crashed
        if not error:
            print(f"{qm}:")
            for key, item in when.items():
                if hasattr(item, 'strftime'):
                    item = item.strftime('%y-%m-%d %H:%M')
                when[key] = item
                print('{:.<12s}'.format(key), item)

        # Store few time stamps if exists
        # Add NA to keep array structure
        for key in ['Offset', 'SlopesPC', 'SlopesFF']:
            if when and key in when and when[key]:
                line.append(when[key])
            else:
                if error is not None:
                    line.append('Err')
                else:
                    line.append('NA')

        if len(line) > 0:
            to_store.append(line)

 seq = sequences[0] if sequences else 0

 if len(to_store) > 0:
    with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fyml:
        yaml.safe_dump({"time-summary": {f"S{seq}":to_store}}, fyml)
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.gca(projection='3d')

    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)
    Z = data.T

    # Plot the surface.
    surf = ax.plot_surface(X, Y, Z, cmap=colormap.coolwarm,
                           linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")


 def do_2d_plot(data, edges, y_axis, x_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),
              np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
                   norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 def get_trains_data(run_folder, source, include, tid=None, path='*/DET/*'):
    """
    Load single train for all module

    :param run_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param tid: Train Id to be loaded. First train is considered if None is given
    :param path: Path to find image data inside h5 file

    """
    run_data = RunDirectory(run_folder, include)
    if tid:
        tid, data = run_data.select('*/DET/*', source).train_from_id(tid)
        return tid, stack_detector_data(data, source)
    else:
        for tid, data in run_data.select('*/DET/*', source).trains(require_all=True):
            return tid, stack_detector_data(data, source)
    return None, None
 ```

 %% Cell type:code id: tags:

 ``` python
 geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
    (-525, 625),
    (-550, -10),
    (520, -160),
    (542.5, 475),
 ])
 ```

 %% Cell type:code id: tags:

 ``` python
 include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
 tid, corrected = get_trains_data(f'{out_folder}/', 'image.data', include)
 _, gains = get_trains_data(f'{out_folder}/', 'image.gain', include, tid)
 _, mask = get_trains_data(f'{out_folder}/', 'image.mask', include, tid)
 _, blshift = get_trains_data(f'{out_folder}/', 'image.blShift', include, tid)
 _, cellId = get_trains_data(f'{out_folder}/', 'image.cellId', include, tid)
 _, pulseId = get_trains_data(f'{out_folder}/', 'image.pulseId', include, tid)
 _, raw = get_trains_data(f'{in_folder}/r{run:04d}/', 'image.data', include, tid)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Analogue Gain ###

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(raw[:,0,...].flatten().astype(np.float32),
                                         raw[:,1,...].flatten().astype(np.float32),
                                         bins=(100, 100),
                                         range=[[4000, 8192], [4000, 8192]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Digitized Gain ###

 The following plot shows plots signal vs. digitized gain

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(corrected.flatten().astype(np.float32),
                                         gains.flatten().astype(np.float32), bins=(100, 3),
                                         range=[[-50, 8192], [0, 3]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Gain statistics in %")
 table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
          f'{gains[gains==1].size/gains.size*100:.03f}',
          f'{gains[gains==2].size/gains.size*100:.03f}']]
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["High", "Medium", "Low"])))
 ```

 %% Cell type:markdown id: tags:

 ### Intensity per Pulse ###

 %% Cell type:code id: tags:

 ``` python
 pulse_range = [np.min(pulseId[pulseId>=0]), np.max(pulseId[pulseId>=0])]

 mean_data = np.nanmean(corrected, axis=(2, 3))
 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100, int(pulse_range[1])),
                                      range=[[-50, 1000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")

 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100,  int(pulse_range[1])),
                                      range=[[-50, 200000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 ```

 %% Cell type:markdown id: tags:

 ### Baseline shift ###

 Estimated base-line shift with respect to the total ADU counts of corrected image.

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(blshift.flatten(), bins=100, log=True)
+_ = plt.xlabel('Baseline shift [ADU]')
+_ = plt.ylabel('Counts')
+_ = ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 10))
 corrected_ave = np.nansum(corrected, axis=(2, 3))
 plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)
-
+plt.xlim(-1, 1000)
+plt.grid()
 plt.xlabel('Illuminated corrected [MADU] ')
 _ = plt.ylabel('Estimated baseline shift [ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Raw preview ###\n'))
 display(Markdown(f'Mean over images of the RAW data\n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 data = np.mean(raw[:, 0, ...], axis=0)
 vmin, vmax = get_range(data, 5)
 ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'Single shot of the RAW data from cell {np.max(cellId[cell_id_preview])} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(raw[cell_id_preview, 0, ...], 5)
 ax = geom.plot_data_fast(raw[cell_id_preview, 0, ...], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Corrected preview ###\n'))
 display(Markdown(f'A single shot image from cell {np.max(cellId[cell_id_preview])} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
-vmin, vmax = get_range(corrected[cell_id_preview], 7)
+vmin, vmax = get_range(corrected[cell_id_preview], 7, -50)
+vmin = - 50
 ax = geom.plot_data_fast(corrected[cell_id_preview], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
-h = ax.hist(corrected[cell_id_preview].flatten(), bins=1000, range=(-50, 2000), log=True)
+vmin, vmax = get_range(corrected[cell_id_preview], 5, -50)
+nbins = np.int((vmax + 50) / 2)
+h = ax.hist(corrected[cell_id_preview].flatten(),
+            bins=nbins, range=(-50, vmax),
+            histtype='stepfilled', log=True)
 _ = plt.xlabel('[ADU]')
+_ = plt.ylabel('Counts')
+_ = ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Mean CORRECTED Preview ###\n'))
 display(Markdown(f'A mean across one train \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 data = np.mean(corrected, axis=0)
-vmin, vmax = get_range(data, 5)
-ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
+vmin, vmax = get_range(data, 7)
+ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=-50, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
-h = ax.hist(corrected.flatten(), bins=1200,
-            range=(-200, 20000), histtype='step', log=True, label = 'All')
-_ = ax.hist(corrected[gains == 0].flatten(), bins=1200, range=(-200, 20000),
+vmin, vmax = get_range(corrected, 10, -100)
+vmax = np.nanmax(corrected)
+if vmax > 50000:
+    vmax=50000
+nbins = np.int((vmax + 100) / 5)
+h = ax.hist(corrected.flatten(), bins=nbins,
+            range=(-100, vmax), histtype='step', log=True, label = 'All')
+_ = ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),
            alpha=0.5, log=True, label='High gain', color='green')
-_ = ax.hist(corrected[gains == 1].flatten(), bins=1200, range=(-200, 20000),
+_ = ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),
            alpha=0.5, log=True, label='Medium gain', color='red')
-_ = ax.hist(corrected[gains == 2].flatten(), bins=1200,
-            range=(-200, 20000), alpha=0.5, log=True, label='Low gain', color='yellow')
+_ = ax.hist(corrected[gains == 2].flatten(), bins=nbins,
+            range=(-100, vmax), alpha=0.5, log=True, label='Low gain', color='yellow')
 _ = ax.legend()
+_ = ax.grid()
 _ = plt.xlabel('[ADU]')
+_ = plt.ylabel('Counts')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Maximum GAIN Preview ###\n'))
 display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.max(gains, axis=0), ax=ax,
                         cmap="jet", vmin=-1, vmax=3)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'### Single Shot Bad Pixels ### \n'))
 display(Markdown(f'A single shot bad pixel map from cell {np.max(cellId[cell_id_preview])} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.log2(mask[cell_id_preview]), ax=ax, vmin=0, vmax=32, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train  ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.mean(mask>0, axis=0),
                         vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train. Only Dark Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 cm = np.copy(mask)
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 ax = geom.plot_data_fast(np.mean(cm>0, axis=0),
                         vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 # AGIPD Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/MID/202030/p900137/raw" # the folder to read data from, required
 out_folder = "/gpfs/exfel/exp/MID/202030/p900137/scratch/karnem/r449_v06"  # the folder to output to, required
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 run = 449 # runs to process, required

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information
 karabo_id_control = "MID_IRU_AGIPD1M1" # karabo-id for control device
 karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milli seconds
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants

 max_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300 # Bias voltage
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
 photon_energy = 9.2 # photon energy in keV
 overwrite = True # set to True if existing data should be overwritten
 max_pulses = [0, 500, 1] # range list [st, end, step] of maximum pulse indices within a train. 3 allowed maximum list input elements.
 mem_cells_db = 0 # set to a value different than 0 to use this value for DB queries
 cell_id_preview = 1 # cell Id used for preview in single-shot plots

 # Correction parameters
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
-chunk_size_idim = 1  # chunking size of imaging dimension, adjust if user software is sensitive to this.
+cm_dark_fraction = 0.66 # threshold for fraction of  empty pixels to consider module enough dark to perform CM correction
+cm_dark_range = [-50.,30] # range for signal value ADU for pixel to be consider as a dark pixel
+cm_n_itr = 4 # number of iterations for common mode correction
 hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
 mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
 noisy_adc_threshold = 0.25 # threshold to mask complete adc
-cm_dark_fraction = 0.66 # threshold for empty pixels to consider module enough dark to perform CM correction
-cm_n_itr = 4 # number of iterations for common mode correction

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 zero_nans = False # set NaN values in corrected data to 0
 zero_orange = False # set to 0 very negative and very large values in corrected data
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
 corr_asic_diag = False # if set, diagonal drop offs on ASICs are correted
-force_hg_if_below = True # set high gain if mg offset subtracted value is below hg_hard_threshold
-force_mg_if_below = True # set medium gain if mg offset subtracted value is below mg_hard_threshold
+force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold
+force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold
 mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
-common_mode = True # Common mode correction
+common_mode = False # Common mode correction
 melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
 mask_zero_std = False # Mask pixels with zero standard deviation across train
-low_medium_gap = True # 5% separation in thresholding between low and medium gain
+low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain

 # Paralellization parameters
-sequences_per_node = 2 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
 chunk_size = 1000 # Size of chunk for image-weise correction
+chunk_size_idim = 1  # chunking size of imaging dimension, adjust if user software is sensitive to this.
 n_cores_correct = 16 # Number of chunks to be processed in parallel
 n_cores_files = 4 # Number of files to be processed in parallel
+sequences_per_node = 2 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import copy
 from datetime import timedelta
 from dateutil import parser
 import gc
 import glob
 import itertools
 from IPython.display import HTML, display, Markdown, Latex
 import math
 from multiprocessing import Pool
 import os
 import re
 import sys
 import traceback
 from time import time, sleep, perf_counter
 import tabulate
 import warnings
 warnings.filterwarnings('ignore')
 import yaml

 from extra_geom import AGIPD_1MGeometry
 from extra_data import RunDirectory, stack_detector_data
 from iCalibrationDB import Detectors
 from mpl_toolkits.mplot3d import Axes3D
 from matplotlib.ticker import LinearLocator, FormatStrFormatter
 from matplotlib.colors import LogNorm
 from matplotlib import cm as colormap
 import matplotlib.pyplot as plt
 import matplotlib
 matplotlib.use("agg")
 %matplotlib inline
 import numpy as np
+import seaborn as sns
+sns.set()
+sns.set_context("paper", font_scale=1.4)
+sns.set_style("ticks")

-from cal_tools.agipdlib import (AgipdCorrections, get_num_cells, get_acq_rate, get_gain_setting)
+from cal_tools.agipdlib import (AgipdCorrections, get_acq_rate,
+                                get_gain_setting, get_num_cells)
 from cal_tools.cython import agipdalgs as calgs
 from cal_tools.ana_tools import get_range
 from cal_tools.enums import BadPixels
 from cal_tools.tools import get_dir_creation_date, map_modules_from_folder
 from cal_tools.step_timing import StepTimer
 ```

 %% Cell type:markdown id: tags:

 ## Evaluated parameters ##

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the herarichy and dependability for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["zero_nans"] = zero_nans
    corr_bools["zero_orange"] = zero_orange
    corr_bools["mask_noisy_adc"] = mask_noisy_adc
    corr_bools["force_hg_if_below"] = force_hg_if_below
    corr_bools["force_mg_if_below"] = force_mg_if_below
    corr_bools["common_mode"] = common_mode
    corr_bools["melt_snow"] = melt_snow
    corr_bools["mask_zero_std"] = mask_zero_std
    corr_bools["low_medium_gap"] = low_medium_gap

 ```

 %% Cell type:code id: tags:

 ``` python
 if in_folder[-1] == "/":
    in_folder = in_folder[:-1]
 if sequences[0] == -1:
    sequences = None

 control_fname = f'{in_folder}/r{run:04d}/RAW-R{run:04d}-{karabo_da_control}-S00000.h5'
 h5path_ctrl = h5path_ctrl.format(karabo_id_control)
 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)

 print(f'Path to control file {control_fname}')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create output folder
 os.makedirs(out_folder, exist_ok=overwrite)

 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
 else:
    dinstance = "AGIPD1M2"

 # Evaluate requested modules
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(16))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 def mod_name(modno):
    return f"Q{modno // 4 + 1}M{modno % 4 + 1}"

 print("Process modules: ", ', '.join(
    [mod_name(x) for x in modules]))
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Display Information about the selected pulses indices for correction.
 pulses_lst = list(range(*max_pulses)) if not (len(max_pulses)==1 and max_pulses[0]==0) else max_pulses

 try:
    if len(pulses_lst) > 1:
        print("A range of {} pulse indices is selected: from {} to {} with a step of {}"
               .format(len(pulses_lst), pulses_lst[0] , pulses_lst[-1] + (pulses_lst[1] - pulses_lst[0]),
                       pulses_lst[1] - pulses_lst[0]))
    else:
        print("one pulse is selected: a pulse of idx {}".format(pulses_lst[0]))
 except Exception as e:
    raise ValueError('max_pulses input Error: {}'.format(e))
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mmf = map_modules_from_folder(in_folder, run, path_template,
                              karabo_da, sequences)
 mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf
 file_list = []

 # ToDo: Split table over pages
 print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
 table = []
 ti = 0
 for k, files in mapped_files.items():
    i = 0
    for f in list(files.queue):
        file_list.append(f)
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["#", "module", "# module", "file"])))
 file_list = sorted(file_list, key=lambda name: name[-10:])
 ```

 %% Cell type:code id: tags:

 ``` python
 filename = file_list[0]
 channel = int(re.findall(r".*-AGIPD([0-9]+)-.*", filename)[0])

 # Evaluate number of memory cells
 mem_cells = get_num_cells(filename, karabo_id, channel)
 if mem_cells is None:
    raise ValueError(f"No raw images found in {filename}")

 mem_cells_db = mem_cells if mem_cells_db == 0 else mem_cells_db
 max_cells = mem_cells if max_cells == 0 else max_cells

 # Evaluate aquisition rate
 if acq_rate == 0:
    acq_rate = get_acq_rate((filename, karabo_id, channel))
 else:
    acq_rate = None

 print(f"Maximum memory cells to calibrate: {max_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate creation time
 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    offset = parser.parse(creation_date_offset)
    delta = timedelta(hours=offset.hour,
                      minutes=offset.minute, seconds=offset.second)
    creation_time += delta

 # Evaluate gain setting
 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
        except Exception as e:
            print(f'Error while reading gain setting from: \n{control_fname}')
            print(e)
            print("Set gain settion to 0")
            gain_setting = 0

 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Using {creation_time} as creation time")
 print(f"Operating conditions are:\n• Bias voltage: {bias_voltage}\n• Memory cells: {mem_cells_db}\n"
              f"• Acquisition rate: {acq_rate}\n• Gain setting: {gain_setting}\n• Photon Energy: {photon_energy}\n")
 ```

 %% Cell type:markdown id: tags:

 ## Data processing ##

 %% Cell type:code id: tags:

 ``` python
 agipd_corr = AgipdCorrections(max_cells, max_pulses,
                              h5_data_path=h5path,
                              h5_index_path=h5path_idx,
                              corr_bools=corr_bools)

 agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
 agipd_corr.hg_hard_threshold = hg_hard_threshold
 agipd_corr.mg_hard_threshold = mg_hard_threshold

+agipd_corr.cm_dark_min = cm_dark_range[0]
+agipd_corr.cm_dark_max = cm_dark_range[1]
 agipd_corr.cm_dark_fraction = cm_dark_fraction
 agipd_corr.cm_n_itr = cm_n_itr
 agipd_corr.noisy_adc_threshold = noisy_adc_threshold
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve calibration constants to RAM
 agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))

 const_yaml = None
 if os.path.isfile(f'{out_folder}/retrieved_constants.yml'):
    with open(f'{out_folder}/retrieved_constants.yml', "r") as f:
        const_yaml = yaml.load(f.read(), Loader=yaml.FullLoader)

 # retrive constants
 def retrieve_constants(mod):
    """
    Retrieve calibration constants and load them to shared memory

    Metadata for constants is taken from yml file or retrieved from the DB
    """
    device = getattr(getattr(Detectors, dinstance), mod_name(mod))
    err = ''
    try:
        # check if there is a yaml file in out_folder that has the device constants.
        if const_yaml and device.device_name in const_yaml:
            when = agipd_corr.initialize_from_yaml(const_yaml, mod, device)
        else:
            when = agipd_corr.initialize_from_db(cal_db_interface, creation_time, mem_cells_db, bias_voltage,
                                                 photon_energy, gain_setting, acq_rate, mod, device, False)
    except Exception as e:
        err = f"Error: {e}\nError traceback: {traceback.format_exc()}"
        when = None
    return err, mod, when, device.device_name


 ts = perf_counter()
 with Pool(processes=16) as pool:
    const_out = pool.map(retrieve_constants, modules)
 print(f"Constants were loaded in {perf_counter()-ts:.01f}s")
 ```

 %% Cell type:code id: tags:

 ``` python
 # allocate memory for images and hists
 n_images_max = max_cells*256
 data_shape = (n_images_max, 512, 128)
 agipd_corr.allocate_images(data_shape, n_cores_files)
 ```

 %% Cell type:code id: tags:

 ``` python
 def batches(l, batch_size):
    """Group a list into batches of (up to) batch_size elements"""
    start = 0
    while start < len(l):
        yield l[start:start + batch_size]
        start += batch_size
 ```

 %% Cell type:code id: tags:

 ``` python
 def imagewise_chunks(img_counts):
    """Break up the loaded data into chunks of up to chunk_size

    Yields (file data slot, start index, stop index)
    """
    for i_proc, n_img in enumerate(img_counts):
        n_chunks = math.ceil(n_img / chunk_size)
        for i in range(n_chunks):
            yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id: tags:

 ``` python
 with Pool() as pool:
    for file_batch in batches(file_list, n_cores_files):
        # TODO: Move some printed output to logging or similar
        print(f"Processing next {len(file_batch)} files:")
        for file_name in file_batch:
            print(" ", file_name)
        step_timer.start()

        img_counts = pool.starmap(agipd_corr.read_file, enumerate(file_batch))
        step_timer.done_step('Load')

        # Evaluate zero-data-std mask
        pool.starmap(agipd_corr.mask_zero_std, itertools.product(
            range(len(file_batch)), np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
        ))
        step_timer.done_step('Mask 0 std')

        # Perform image-wise correction
        pool.starmap(agipd_corr.correct_agipd, imagewise_chunks(img_counts))
        step_timer.done_step("Image-wise correction")

        # Perform cross-file correction parallel over asics
        pool.starmap(agipd_corr.cm_correction, itertools.product(
            range(len(file_batch)), range(16)  # 16 ASICs per module
        ))
        step_timer.done_step("Common-mode correction")


        # Save corrected data
        pool.starmap(agipd_corr.write_file, [
            (i_proc, file_name, os.path.join(out_folder, os.path.basename(file_name).replace("RAW", "CORR")))
            for i_proc, file_name in enumerate(file_batch)
        ])
        step_timer.done_step("Save")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Correction of {len(file_list)} files is finished")
 print(f"Total processing time {step_timer.timespan():.01f} s")
 print(f"Timing summary per batch of {n_cores_files} files:")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 # if there is a yml file that means a leading notebook got processed
 # and the reporting would be generated from it.
 fst_print = True

 to_store = []
 line = []
 for i, (error, modno, when, mod_dev) in enumerate(const_out):
    qm = mod_name(modno)
    # expose errors while applying correction
    if error:
        print("Error: {}".format(error) )

    if not const_yaml or mod_dev not in const_yaml:
        if fst_print:
            print("Constants are retrieved with creation time: ")
            fst_print = False

        line = [qm]

        # If correction is crashed
        if not error:
            print(f"{qm}:")
            for key, item in when.items():
                if hasattr(item, 'strftime'):
                    item = item.strftime('%y-%m-%d %H:%M')
                when[key] = item
                print('{:.<12s}'.format(key), item)

        # Store few time stamps if exists
        # Add NA to keep array structure
        for key in ['Offset', 'SlopesPC', 'SlopesFF']:
            if when and key in when and when[key]:
                line.append(when[key])
            else:
                if error is not None:
                    line.append('Err')
                else:
                    line.append('NA')

        if len(line) > 0:
            to_store.append(line)

 seq = sequences[0] if sequences else 0

 if len(to_store) > 0:
    with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fyml:
        yaml.safe_dump({"time-summary": {f"S{seq}":to_store}}, fyml)
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.gca(projection='3d')

    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)
    Z = data.T

    # Plot the surface.
    surf = ax.plot_surface(X, Y, Z, cmap=colormap.coolwarm,
                           linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")


 def do_2d_plot(data, edges, y_axis, x_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),
              np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
                   norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 def get_trains_data(run_folder, source, include, tid=None, path='*/DET/*'):
    """
    Load single train for all module

    :param run_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param tid: Train Id to be loaded. First train is considered if None is given
    :param path: Path to find image data inside h5 file

    """
    run_data = RunDirectory(run_folder, include)
    if tid:
        tid, data = run_data.select('*/DET/*', source).train_from_id(tid)
        return tid, stack_detector_data(data, source)
    else:
        for tid, data in run_data.select('*/DET/*', source).trains(require_all=True):
            return tid, stack_detector_data(data, source)
    return None, None
 ```

 %% Cell type:code id: tags:

 ``` python
 geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
    (-525, 625),
    (-550, -10),
    (520, -160),
    (542.5, 475),
 ])
 ```

 %% Cell type:code id: tags:

 ``` python
 include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
 tid, corrected = get_trains_data(f'{out_folder}/', 'image.data', include)
 _, gains = get_trains_data(f'{out_folder}/', 'image.gain', include, tid)
 _, mask = get_trains_data(f'{out_folder}/', 'image.mask', include, tid)
 _, blshift = get_trains_data(f'{out_folder}/', 'image.blShift', include, tid)
 _, cellId = get_trains_data(f'{out_folder}/', 'image.cellId', include, tid)
 _, pulseId = get_trains_data(f'{out_folder}/', 'image.pulseId', include, tid)
 _, raw = get_trains_data(f'{in_folder}/r{run:04d}/', 'image.data', include, tid)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Analogue Gain ###

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(raw[:,0,...].flatten().astype(np.float32),
                                         raw[:,1,...].flatten().astype(np.float32),
                                         bins=(100, 100),
                                         range=[[4000, 8192], [4000, 8192]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Digitized Gain ###

 The following plot shows plots signal vs. digitized gain

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(corrected.flatten().astype(np.float32),
                                         gains.flatten().astype(np.float32), bins=(100, 3),
                                         range=[[-50, 8192], [0, 3]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Gain statistics in %")
 table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
          f'{gains[gains==1].size/gains.size*100:.03f}',
          f'{gains[gains==2].size/gains.size*100:.03f}']]
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["High", "Medium", "Low"])))
 ```

 %% Cell type:markdown id: tags:

 ### Intensity per Pulse ###

 %% Cell type:code id: tags:

 ``` python
 pulse_range = [np.min(pulseId[pulseId>=0]), np.max(pulseId[pulseId>=0])]

 mean_data = np.nanmean(corrected, axis=(2, 3))
 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100, int(pulse_range[1])),
                                      range=[[-50, 1000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")

 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100,  int(pulse_range[1])),
                                      range=[[-50, 200000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 ```

 %% Cell type:markdown id: tags:

 ### Baseline shift ###

 Estimated base-line shift with respect to the total ADU counts of corrected image.

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(blshift.flatten(), bins=100, log=True)
+_ = plt.xlabel('Baseline shift [ADU]')
+_ = plt.ylabel('Counts')
+_ = ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 10))
 corrected_ave = np.nansum(corrected, axis=(2, 3))
 plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)
-
+plt.xlim(-1, 1000)
+plt.grid()
 plt.xlabel('Illuminated corrected [MADU] ')
 _ = plt.ylabel('Estimated baseline shift [ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Raw preview ###\n'))
 display(Markdown(f'Mean over images of the RAW data\n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 data = np.mean(raw[:, 0, ...], axis=0)
 vmin, vmax = get_range(data, 5)
 ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'Single shot of the RAW data from cell {np.max(cellId[cell_id_preview])} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(raw[cell_id_preview, 0, ...], 5)
 ax = geom.plot_data_fast(raw[cell_id_preview, 0, ...], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Corrected preview ###\n'))
 display(Markdown(f'A single shot image from cell {np.max(cellId[cell_id_preview])} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
-vmin, vmax = get_range(corrected[cell_id_preview], 7)
+vmin, vmax = get_range(corrected[cell_id_preview], 7, -50)
+vmin = - 50
 ax = geom.plot_data_fast(corrected[cell_id_preview], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
-h = ax.hist(corrected[cell_id_preview].flatten(), bins=1000, range=(-50, 2000), log=True)
+vmin, vmax = get_range(corrected[cell_id_preview], 5, -50)
+nbins = np.int((vmax + 50) / 2)
+h = ax.hist(corrected[cell_id_preview].flatten(),
+            bins=nbins, range=(-50, vmax),
+            histtype='stepfilled', log=True)
 _ = plt.xlabel('[ADU]')
+_ = plt.ylabel('Counts')
+_ = ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Mean CORRECTED Preview ###\n'))
 display(Markdown(f'A mean across one train \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 data = np.mean(corrected, axis=0)
-vmin, vmax = get_range(data, 5)
-ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
+vmin, vmax = get_range(data, 7)
+ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=-50, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
-h = ax.hist(corrected.flatten(), bins=1200,
-            range=(-200, 20000), histtype='step', log=True, label = 'All')
-_ = ax.hist(corrected[gains == 0].flatten(), bins=1200, range=(-200, 20000),
+vmin, vmax = get_range(corrected, 10, -100)
+vmax = np.nanmax(corrected)
+if vmax > 50000:
+    vmax=50000
+nbins = np.int((vmax + 100) / 5)
+h = ax.hist(corrected.flatten(), bins=nbins,
+            range=(-100, vmax), histtype='step', log=True, label = 'All')
+_ = ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),
            alpha=0.5, log=True, label='High gain', color='green')
-_ = ax.hist(corrected[gains == 1].flatten(), bins=1200, range=(-200, 20000),
+_ = ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),
            alpha=0.5, log=True, label='Medium gain', color='red')
-_ = ax.hist(corrected[gains == 2].flatten(), bins=1200,
-            range=(-200, 20000), alpha=0.5, log=True, label='Low gain', color='yellow')
+_ = ax.hist(corrected[gains == 2].flatten(), bins=nbins,
+            range=(-100, vmax), alpha=0.5, log=True, label='Low gain', color='yellow')
 _ = ax.legend()
+_ = ax.grid()
 _ = plt.xlabel('[ADU]')
+_ = plt.ylabel('Counts')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Maximum GAIN Preview ###\n'))
 display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.max(gains, axis=0), ax=ax,
                         cmap="jet", vmin=-1, vmax=3)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'### Single Shot Bad Pixels ### \n'))
 display(Markdown(f'A single shot bad pixel map from cell {np.max(cellId[cell_id_preview])} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.log2(mask[cell_id_preview]), ax=ax, vmin=0, vmax=32, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train  ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.mean(mask>0, axis=0),
                         vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train. Only Dark Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 cm = np.copy(mask)
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 ax = geom.plot_data_fast(np.mean(cm>0, axis=0),
                         vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

--- a/tests/test_cal_tools.py
+++ b/tests/test_cal_tools.py
+from datetime import datetime
+from pathlib import Path
+
+import pytest
+from cal_tools.tools import get_dir_creation_date
+
+
+def test_dir_creation_date():
+    folder = '/gpfs/exfel/exp/DETLAB/202031/p900172/raw'
+
+    date = get_dir_creation_date(folder, 10)
+    assert isinstance(date, datetime)
+    assert str(date) == '2020-07-20 10:39:03'
+
+    with pytest.raises(ValueError) as e:
+        get_dir_creation_date(folder, 4)
+    assert e.value.args[1] == Path(folder) / 'r0004'
+
+    # The following data predates the addition of creation_time in metadata
+    folder = '/gpfs/exfel/exp/SQS/201930/p900075/raw/'
+
+    date = get_dir_creation_date(folder, 365)
+    assert isinstance(date, datetime)
+    assert str(date) == '2019-07-04 11:02:41.280000'
--- a/xfel_calibrate/finalize.py
+++ b/xfel_calibrate/finalize.py
@@ -186,8 +186,8 @@ def make_timing_summary(run_path, joblist, request_time, submission_time):
                                           time_table=time_table.split('\n'))))


-def make_report(run_path, tmp_path, out_path, project, author, version,
-                report_to):
+def make_report(run_path: str, tmp_path: str, out_path: str, project: str,
+                author: str, version: str, report_to: str):
    """
    Create calibration report (pdf file)

@@ -201,12 +201,13 @@ def make_report(run_path, tmp_path, out_path, project, author, version,
    :param project: Project title
    :param author: Author of the notebook
    :param version: Version of the notebook
-    :param report_to: Name or path of the report file
+    :param report_to: report path tailed with report name
    """
    run_path = path.abspath(run_path)
    report_path, report_name = path.split(report_to)
-    if report_path != '':
-        out_path = report_path
+
+    if not report_path:
+        report_path = out_path

    try:
        check_call([sys.executable, "-m", "sphinx.cmd.quickstart",
@@ -299,16 +300,18 @@ def make_report(run_path, tmp_path, out_path, project, author, version,
    # finally call the make scripts
    chdir(run_path)
    try:
-        check_call(["make", f"SPHINXBUILD={sys.executable} -m sphinx", "latexpdf"])
+        check_call(["make", f"SPHINXBUILD={sys.executable} -m sphinx",
+                    "latexpdf"])

    except CalledProcessError:
        print("Failed to make pdf documentation")
-        print("Temp files will not be deleted and " +
-              "can be inspected at: {}".format(run_path))
+        print("Temp files will not be deleted and "
+              f"can be inspected at: {run_path}")
        return
-    print("Moving report to final location: {}".format(out_path))
-    makedirs(out_path, exist_ok=True)
-    copy('{}/_build/latex/{}.pdf'.format(run_path, report_name), out_path)
+
+    print(f"Moving report to final location: {report_path}")
+    makedirs(report_path, exist_ok=True)
+    copy(f'{run_path}/_build/latex/{report_name}.pdf', report_path)

    temp_dirs = glob(f'{tmp_path}/*/')
    # Remove folders with figures and sphinx files.
@@ -318,15 +321,15 @@ def make_report(run_path, tmp_path, out_path, project, author, version,

    # Archiving files in slurm_tmp
    if os.path.isfile(f'{out_path}/retrieved_constants.yml'):
-        move(f'{out_path}/retrieved_constants.yml',
+        copy(f'{out_path}/retrieved_constants.yml',
             f"{tmp_path}")

    # Moving temporary files to out-folder after successful execution
    # This helps in keeping elements needed for re-producibility.
    print(f"Moving temporary files to final location"
-          f": {out_path}/{os.path.basename(tmp_path)} with name: "
+          f": {report_path}/{os.path.basename(tmp_path)} with name: "
          f"slurm_out_{report_name}")
-    move(tmp_path, f"{out_path}/slurm_out_{report_name}")
+    move(tmp_path, f"{report_path}/slurm_out_{report_name}")


 def make_titlepage(sphinx_path, project, data_path, version):
No results found