Compare revisions

Karim Ahmed · Karim Ahmed · Cyril Danilevski · Cyril Danilevski · f9db3129 · f9db3129
--- a/cal_tools/cal_tools/agipdlib.py
+++ b/cal_tools/cal_tools/agipdlib.py
@@ -624,7 +624,7 @@ class AgipdCorrections:
        # slopeFF = slopeFFpix/avarege(slopeFFpix)
        # To apply them we have to / not *
        if self.corr_bools.get("xray_corr"):
-            data /= self.xray_cor[module_idx]
+            data /= self.xray_cor[module_idx][cellid, ...]
        # use sharedmem raw_data and t0_rgain
        # after calculating it while offset correcting.
@@ -818,7 +818,7 @@ class AgipdCorrections:
            uq, fidxv, cntsv = np.unique(trains, return_index=True,
                                         return_counts=True)
-            # Validate calculated CORR INDEX contents by checking 
+            # Validate calculated CORR INDEX contents by checking
            # difference between trainId stored in RAW data and trains from
            train_diff = np.isin(np.array(infile["/INDEX/trainId"]), uq,
                                 invert=True)
@@ -905,12 +905,12 @@ class AgipdCorrections:
        exists of the current AGIPD instances.
        Relative gain is derived both from pulse capacitor as well as low
-        intensity flat field data, information from flat field data is 
+        intensity flat field data, information from flat field data is
-        needed to 'calibrate' pulse capacitor data, if there is no 
+        needed to 'calibrate' pulse capacitor data, if there is no
        available FF data, relative gain for High Gain stage is set to 1:
-        * Relative gain for High gain stage - from the FF data we get 
+        * Relative gain for High gain stage - from the FF data we get
-          the relative slopes of a given pixel and memory cells with 
+          the relative slopes of a given pixel and memory cells with
          respect to all memory cells and all pixels in the module,
          Please note: Current slopesFF avaialble in calibibration
          constants are created per pixel only, not per memory cell:
@@ -923,9 +923,9 @@ class AgipdCorrections:
          between high and medium gain using slope information from
          fits to the linear part of high and medium gain:
-             rfpc_high_medium = m_h/m_m          
+             rfpc_high_medium = m_h/m_m
-          where m_h and m_m is the medium gain slope of given memory cells 
+          where m_h and m_m is the medium gain slope of given memory cells
          and pixel and m_h is the high gain slope as above
           rel_gain_medium = rel_high_gain * rfpc_high_medium
@@ -954,32 +954,16 @@ class AgipdCorrections:
        if self.corr_bools.get("xray_corr"):
            bpixels |= cons_data["BadPixelsFF"].astype(np.uint32)[..., :bpixels.shape[2], None]  # noqa
-            slopesFF = np.squeeze(cons_data["SlopesFF"])
+            slopesFF = cons_data["SlopesFF"]
            if len(slopesFF.shape) == 4:
                slopesFF = slopesFF[..., 0]
+            # Memory cell resolved xray_cor correction
-            # first 32 cells are known to behave differently so if we can avoid
+            xray_cor = slopesFF  # (128, 512, mem_cells)
-            # them
-            # when calculating the mean X-ray derived gain slope for each pixel
-            if slopesFF.shape[2] > 32:
-                xray_cor = np.nanmedian(
-                    slopesFF[..., 32:min(96, self.max_cells)], axis=2)
-            elif slopesFF.shape[2] > 2:
-                xray_cor = np.nanmedian(
-                    slopesFF[..., :min(96, self.max_cells)], axis=2)
-            else:
-                xray_cor = np.squeeze(slopesFF[..., 0])
            # relative X-ray correction is normalized by the median
            # of all pixels
-            # TODO: A check is required to know why it is again divided by 
+            xray_cor /= np.nanmedian(xray_cor)
-            # median. If we have relative slopes in the constants
-            # and (we have!) 
-            # xray cor = (slopeFF/avarege_slopeFF)/avarege_slopeFF.
-            # It didn't not make sense and was removed. 
-            # xray_cor /= np.nanmedian(xray_cor)
            self.xray_cor[module_idx][...] = xray_cor.transpose()[...]
@@ -1041,13 +1025,19 @@ class AgipdCorrections:
            # ration between HG and MG per pixel per mem cell used
            # for rel gain calculation
            frac_high_med_pix = pc_high_m / pc_med_m
-            # avarage ration between HG and MG as a function of 
+            # avarage ration between HG and MG as a function of
            # mem cell (needed for bls_stripes)
            # TODO: Per pixel would be more optimal correction
            frac_high_med = pc_high_med / pc_med_med
            # calculate additional medium-gain offset
            md_additional_offset = pc_high_l - pc_med_l * pc_high_m / pc_med_m
+            # Calculate relative gain. If FF constants are available,
+            # use them for high gain
+            # if not rel_gain is calculated using PC data only
+            # if self.corr_bools.get("xray_corr"):
+            #     rel_gain[..., :self.max_cells, 0] /= xray_corr
            # PC data should be 'calibrated with X-ray data,
            # if it is not done, it is better to use 1 instead of bias
            # the results with PC arteffacts.
@@ -1078,17 +1068,35 @@ class AgipdCorrections:
        dname = device.device_name
        cons_data = dict()
        when = dict()
        for cname, mdata in const_yaml[dname].items():
            when[cname] = mdata["creation-time"]
            if when[cname]:
-                cf = h5py.File(mdata["file-path"], "r")
+                # This path is only used when testing new flat fields from
-                cons_data[cname] = np.copy(cf[f"{dname}/{cname}/0/data"])
+                # file during development: it takes ages to test using all
-                cf.close()
+                # cells. Consequently, the shape needs to be fixed when less
+                # cells are used.
+                with h5py.File(mdata["file-path"], "r") as cf:
+                    cons_data[cname] = np.copy(cf[f"{dname}/{cname}/0/data"])
+                    shape = cons_data[cname].shape  # (128, 512, mem_cells)
+                    extra_dims = shape[:2] + (self.max_cells-shape[2], )
+                    if extra_dims[-1] != 0 and cname == "BadPixelsFF":
+                        extra_temp = np.zeros(extra_dims, dtype=np.int32)
+                        cons_data[cname] = np.concatenate(
+                            (cons_data[cname], extra_temp), axis=2)
+                        print('An extra dimension was added to the constants '
+                              'for the benefit of BadPixelsFF')
+                    if extra_dims[-1] != 0 and cname == "SlopesFF":
+                        extra_temp = np.ones(extra_dims, dtype=np.float32)
+                        cons_data[cname] = np.concatenate(
+                            (cons_data[cname], extra_temp), axis=2)
+                        print('An extra dimension was added to the constants '
+                              'for the benefit of SlopesFF')
            else:
                # Create empty constant using the list elements
-                cons_data[cname] = \
+                cons_data[cname] = getattr(np, mdata["file-path"][0])(mdata["file-path"][1])  # noqa
-                    getattr(np, mdata["file-path"][0])(mdata["file-path"][1])
        self.init_constants(cons_data, module_idx)
        return when
@@ -1191,7 +1199,7 @@ class AgipdCorrections:
                                                             dtype='f4')
            self.mask[module_idx] = sharedmem.empty(constant_shape, dtype='i4')
-            self.xray_cor[module_idx] = sharedmem.empty(constant_shape[2:],
+            self.xray_cor[module_idx] = sharedmem.empty(constant_shape[1:],
                                                        dtype='f4')
    def allocate_images(self, shape, n_cores_files):

--- a/cal_tools/cal_tools/agipdutils.py
+++ b/cal_tools/cal_tools/agipdutils.py
@@ -43,7 +43,7 @@ def assemble_constant_dict(corr_bools, pc_bools, memory_cells, bias_voltage,
    const_dict = {
        "Offset": ["zeros", (128, 512, memory_cells, 3), darkcond],
        "Noise": ["zeros", (128, 512, memory_cells, 3), darkcond],
-        "ThresholdsDark": ["ones", (128, 512, memory_cells, 2), darkcond],
+        "ThresholdsDark": ["ones", (128, 512, memory_cells, 5), darkcond],
        "BadPixelsDark": ["zeros", (128, 512, memory_cells, 3), darkcond],
    }

--- a/cal_tools/cal_tools/agipdutils_ff.py
+++ b/cal_tools/cal_tools/agipdutils_ff.py
+from typing import Any, Dict, List, Optional, Tuple
+from iminuit import Minuit
+import numpy as np
+from cal_tools.enums import BadPixelsFF
+def any_in(mask: np.ndarray, bits: int) -> bool:
+    return mask.astype(np.uint) & bits > 0
+def gaussian(x: np.ndarray, norm: int = 1, mean: int = 0, sigma: int = 1) -> float:  # noqa
+    """
+    Return value of Gaussian function
+    :param x: Argument (float of 1D array) of Gaussian function
+    :param norm: Normalization of Gaussian function
+    :param mean: Mean parameter
+    :param sigma: Sigma parameter
+    :return: Value of gaussian function.
+    """
+    return norm * np.exp(-1 / 2 * ((x - mean) / sigma) ** 2) / (sigma * np.sqrt(2 * np.pi))  # noqa
+def gaussian_sum(x: np.ndarray, ng: int = 4, *p: Tuple[Any]) -> float:
+    """Sum of ng Gaussian functions
+    :param x: Argument (float of 1D array) of the function
+    :param ng: Number of Gaussian functions
+    :param p: List of parameters (norm1,mean1,sigma1,norm2,mean2,sigma2,...)
+    """
+    r = 0.
+    for i in range(ng):
+        r += gaussian(x, *p[i * 3:(i + 1) * 3])
+    return r
+def get_statistical_parameters(x: np.ndarray,
+                               y: np.ndarray,
+                               x_range: List) -> Tuple[np.uint64, np.float64, np.float64, np.ndarray]:  # noqa
+    """Return statistical parameters of selected part of a histogram.
+    :param x: Center of bins of the histogram
+    :param y: Value of bins of the histogram
+    :param x_range: x range to be considered
+    :return: Sum of histogram, Mean value, Standard deviation,
+             List of selected bins
+    """
+    # TODO: Check if wq.median works better than mean
+    sel = (x >= x_range[0]) & (x < x_range[1])
+    h_sum = np.sum(y[sel])
+    h_norm = y[sel] / h_sum
+    h_mean = np.sum(h_norm * x[sel])
+    h_sqr = (x[sel] - h_mean) ** 2
+    h_std = np.sqrt(np.sum(h_norm * h_sqr))
+    return h_sum, h_mean, h_std, sel
+def get_starting_parameters(xe: np.ndarray,
+                            ye: np.ndarray,
+                            limits: np.ndarray,
+                            n_peaks: int = 3,
+                            f_lim: int = 2) -> Tuple[Dict[str, Any], List[Tuple]]:  # noqa
+    """
+    Estimate starting parameters for Gaussian fit of several peaks.
+    :param xe: Center of bins of the histogram
+    :param ye: Value of bins of the histogram
+    :param limits: Position of each peak ((left1, right1),
+    (left2, right2), ...) to be considered.
+    :param n_peaks: Number of peaks
+    :param f_lim: Limits in units of standard deviation to consider
+    """
+    parameters = {}
+    shapes = []
+    for peak in range(n_peaks):
+        n, m, rms, idx = get_statistical_parameters(xe, ye, limits[peak])
+        limits2 = [m - f_lim * rms, m + f_lim * rms]
+        n, m, rms, idx = get_statistical_parameters(xe, ye, limits2)
+        shapes.append((n, m, rms, idx))
+        parameters.update({f'g{peak}sigma': rms,
+                           f'g{peak}n': float(n),
+                           f'g{peak}mean': m})
+    return parameters, shapes
+def fit_n_peaks(x: np.ndarray,
+                y: np.ndarray,
+                pars: Dict[str, Any],
+                x_range: Tuple[float, float],
+                do_minos: Optional[bool] = False,
+                n_peaks: Optional[int] = 4,
+                fix_d01: Optional[bool] = True) -> Minuit:
+    """
+    Fit histogram with n-Gaussian function.
+    :param x: Center of bins of the histogram
+    :param y: Value of bins of the histogram
+    :param pars: Dictionary of initial parameters for fitting
+    :param x_range: x Range to be considered for the fitting
+    :param do_minos: Run Minos if True
+    :param n_peaks: Number of Gaussian peaks to fit (min 2, max 4)
+    :param fix_d01: Fix position of peaks to the distance between noise and
+    first photon peak.
+    :return: minuit object
+    """
+    sel = (x >= x_range[0]) & (x < x_range[1])
+    # Square of bin errors
+    yrr2 = np.copy(y[sel])
+    yrr2[yrr2 == 0] = 1  # bins with zero events have error=1
+    if fix_d01:
+        pars['fix_g2mean'] = True
+        pars['fix_g3mean'] = True
+    if n_peaks < 4:
+        pars['g3n'] = 0
+        pars['fix_g3n'] = True
+        pars['g3sigma'] = 1
+        pars['fix_g3sigma'] = True
+        pars['fix_g3mean'] = True
+    if n_peaks < 3:
+        pars['g2n'] = 0
+        pars['fix_g2n'] = True
+        pars['g2sigma'] = 1
+        pars['fix_g2sigma'] = True
+        pars['fix_g2mean'] = True
+    def chi2_f(g0n, g0mean, g0sigma,
+               g1n, g1mean, g1sigma,
+               g2n, g2mean, g2sigma,
+               g3n, g3mean, g3sigma, ):
+        d01 = (g1mean - g0mean)
+        if 'fix_g2mean' in pars and pars['fix_g2mean']:
+            g2mean = g0mean + d01 * 2
+        if 'fix_g3mean' in pars and pars['fix_g3mean']:
+            g3mean = g0mean + d01 * 3
+        if g3n == 0:
+            n_peaks = 3
+        elif g2n == 0:
+            n_peaks = 2
+        else:
+            n_peaks = 4
+        yt = gaussian_sum(x[sel], n_peaks,
+                          g0n, g0mean, g0sigma,
+                          g1n, g1mean, g1sigma,
+                          g2n, g2mean, g2sigma,
+                          g3n, g3mean, g3sigma)
+        return np.nansum((yt - y[sel]) ** 2 / yrr2)
+    minuit = Minuit(chi2_f, **pars, pedantic=False)
+    minuit.migrad()
+    if do_minos:
+        if minuit.get_fmin().is_valid:
+            minuit.minos()
+    return minuit
+def set_par_limits(pars: Dict[str, Any],
+                   peak_range: np.ndarray,
+                   peak_norm_range: np.ndarray,
+                   peak_width_range: np.ndarray,
+                   n_peaks: Optional[int] = 4):
+    """
+    Set limits on initial fit parameters based on given values
+    :param pars: Dictionary of initial fit parameters
+    :param peak_range: Limits on peak positions
+    :param peak_norm_range: Limits on normalization of Gaussian peaks
+    :param peak_width_range: Limits on width of Gaussian peaks
+    :param n_peaks: Number of Gaussian peaks
+    """
+    for peak in range(n_peaks):
+        pars.update({f'limit_g{peak}n': peak_norm_range[peak],
+                     f'limit_g{peak}mean': peak_range[peak],
+                     f'limit_g{peak}sigma': peak_width_range[peak],
+                     })
+def get_mask(fit_summary: Dict[str, Any],
+             peak_lim: List,
+             d0_lim: List,
+             chi2_lim: List,
+             peak_width_lim: np.array) -> int:
+    """
+    Calculate Bad pixels mask based on fit results and given limits
+    :param fit_summary: Dictionary of the fit output from Multi-Gaussian fit
+    :param peak_lim: Limits on noise peak position
+    :param d0_lim: Limits on distance between noise and first photon peak
+    :param chi2_lim: Limits on reduced chi^2 value
+    :param peak_width_lim: Limits on noise peak width
+    :return: Bad pixel mask
+    """
+    if not fit_summary['is_valid']:
+        return BadPixelsFF.FIT_FAILED.value
+    m0 = fit_summary['g0mean']
+    s0 = fit_summary['g0sigma']
+    s1 = fit_summary['g1sigma']
+    s2 = fit_summary['g2sigma']
+    chi2_ndof = fit_summary['chi2_ndof']
+    d01 = fit_summary['g1mean'] - m0
+    mask = 0
+    if not fit_summary['is_valid']:
+        mask |= BadPixelsFF.FIT_FAILED.value
+    if not fit_summary['has_accurate_covar']:
+        mask |= BadPixelsFF.ACCURATE_COVAR.value
+    if not peak_lim[0] <= m0 <= peak_lim[1]:
+        mask |= BadPixelsFF.NOISE_PEAK_THRESHOLD.value
+    if not d0_lim[0] <= d01 <= d0_lim[1]:
+        mask |= BadPixelsFF.GAIN_THRESHOLD.value
+    if not chi2_lim[0] <= chi2_ndof <= chi2_lim[1]:
+        mask |= BadPixelsFF.CHI2_THRESHOLD.value
+    width_lim = peak_width_lim[0] * s0
+    inside_s1 = width_lim[0] <= s1 <= width_lim[1]
+    width_lim = peak_width_lim[1] * s0
+    inside_s2 = width_lim[0] <= s2 <= width_lim[1]
+    if not inside_s1 and inside_s2:
+        mask |= BadPixelsFF.PEAK_WIDTH_THRESHOLD.value
+    return mask
--- a/cal_tools/cal_tools/enums.py
+++ b/cal_tools/cal_tools/enums.py
@@ -28,6 +28,21 @@ class BadPixels(Enum):
    NON_LIN_RESPONSE_REGION  = 0b100000000000000000000 # bit 21
+class BadPixelsFF(Enum):
+    """ The SLopesFF Bad Pixel Encoding
+    """
+    FIT_FAILED               = 0b000000000000000000001 # bit 1
+    CHI2_THRESHOLD           = 0b000000000000000000010 # bit 2
+    NOISE_PEAK_THRESHOLD     = 0b000000000000000000100 # bit 3
+    GAIN_THRESHOLD           = 0b000000000000000001000 # bit 4
+    PEAK_WIDTH_THRESHOLD     = 0b000000000000000010000 # bit 5
+    ACCURATE_COVAR           = 0b000000000000000100000 # bit 6
+    BAD_DARK                 = 0b000000000000001000000 # bit 6
+    NO_ENTRY                 = 0b000000000000010000000 # bit 7
+    GAIN_DEVIATION           = 0b000000000000100000000 # bit 8
 class SnowResolution(Enum):
    """ An Enum specifying how to resolve snowy pixels
    """

--- a/cal_tools/cal_tools/tools.py
+++ b/cal_tools/cal_tools/tools.py
@@ -264,13 +264,15 @@ def get_dir_creation_date(directory: str, run: int,
    ntries = 100
    while ntries > 0:
        try:
-            dates = []
+            rfiles = list(directory.glob('*.h5'))
-            for f in directory.glob('*.h5'):
+            rfiles.sort(key=path.getmtime)
-                with h5py.File(f, 'r') as fin:
+            # get creation time for oldest file,
-                    cdate = fin['METADATA/creationDate'][0].decode()
+            # as creation time between run files
-                    cdate = datetime.datetime.strptime(cdate, "%Y%m%dT%H%M%SZ")
+            # should be different only within few seconds
-                    dates.append(cdate)
+            with h5py.File(rfiles[0], 'r') as fin:
-            return min(dates)
+                cdate = fin['METADATA/creationDate'][0].decode()
+                cdate = datetime.datetime.strptime(cdate, "%Y%m%dT%H%M%SZ")
+            return cdate
        except (IOError, ValueError):
            ntries -= 1
        except KeyError:  # The files are here, but it's an older dataset

--- a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
+++ b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
 %% Cell type:markdown id: tags:
 # AGIPD Offline Correction #
 Author: European XFEL Detector Group, Version: 2.0
 Offline Calibration for the AGIPD Detector
 %% Cell type:code id: tags:
 ``` python
 in_folder = "/gpfs/exfel/exp/HED/202031/p900174/raw" # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/hibef_agipd2"  # the folder to output to, required
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 run = 155 # runs to process, required
 karabo_id = "HED_DET_AGIPD500K2G" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information
 karabo_id_control = "HED_EXP_AGIPD500K2G" # karabo-id for control device
 karabo_da_control = 'AGIPD500K2G00' # karabo DA for control infromation
+slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants
 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milli seconds
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
 max_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300 # Bias voltage
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
 photon_energy = 9.2 # photon energy in keV
 overwrite = True # set to True if existing data should be overwritten
 max_pulses = [0, 500, 1] # range list [st, end, step] of maximum pulse indices within a train. 3 allowed maximum list input elements.
 mem_cells_db = 0 # set to a value different than 0 to use this value for DB queries
 cell_id_preview = 1 # cell Id used for preview in single-shot plots
 # Correction parameters
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
 cm_dark_fraction = 0.66 # threshold for fraction of  empty pixels to consider module enough dark to perform CM correction
 cm_dark_range = [-50.,30] # range for signal value ADU for pixel to be consider as a dark pixel
 cm_n_itr = 4 # number of iterations for common mode correction
 hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
 mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
 noisy_adc_threshold = 0.25 # threshold to mask complete adc
 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 zero_nans = False # set NaN values in corrected data to 0
 zero_orange = False # set to 0 very negative and very large values in corrected data
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
 corr_asic_diag = False # if set, diagonal drop offs on ASICs are correted
 force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold
 force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold
 mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
 common_mode = False # Common mode correction
 melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
 mask_zero_std = False # Mask pixels with zero standard deviation across train
 low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain
 # Paralellization parameters
 chunk_size = 1000 # Size of chunk for image-weise correction
 chunk_size_idim = 1  # chunking size of imaging dimension, adjust if user software is sensitive to this.
 n_cores_correct = 16 # Number of chunks to be processed in parallel
 n_cores_files = 4 # Number of files to be processed in parallel
 sequences_per_node = 2 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```
 %% Cell type:code id: tags:
 ``` python
 import copy
 from datetime import timedelta
 from dateutil import parser
 import gc
 import glob
 import itertools
 from IPython.display import HTML, display, Markdown, Latex
 import math
 from multiprocessing import Pool
 import os
 import re
 import sys
 import traceback
 from time import time, sleep, perf_counter
 import tabulate
 import warnings
 warnings.filterwarnings('ignore')
 import yaml
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from extra_data import RunDirectory, stack_detector_data
 from iCalibrationDB import Detectors
 from mpl_toolkits.mplot3d import Axes3D
 from matplotlib.ticker import LinearLocator, FormatStrFormatter
 from matplotlib.colors import LogNorm
 from matplotlib import cm as colormap
 import matplotlib.pyplot as plt
 import matplotlib
 matplotlib.use("agg")
 %matplotlib inline
 import numpy as np
 import seaborn as sns
 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")
 from cal_tools.agipdlib import (AgipdCorrections, get_acq_rate,
                                get_gain_setting, get_num_cells)
 from cal_tools.cython import agipdalgs as calgs
 from cal_tools.ana_tools import get_range
 from cal_tools.enums import BadPixels
 from cal_tools.tools import get_dir_creation_date, map_modules_from_folder
 from cal_tools.step_timing import StepTimer
 import seaborn as sns
 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")
 ```
 %% Cell type:markdown id: tags:
 ## Evaluated parameters ##
 %% Cell type:code id: tags:
 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis
 # Here the herarichy and dependability for correction booleans are defined
 corr_bools = {}
 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset
 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["zero_nans"] = zero_nans
    corr_bools["zero_orange"] = zero_orange
    corr_bools["mask_noisy_adc"] = mask_noisy_adc
    corr_bools["force_hg_if_below"] = force_hg_if_below
    corr_bools["force_mg_if_below"] = force_mg_if_below
    corr_bools["common_mode"] = common_mode
    corr_bools["melt_snow"] = melt_snow
    corr_bools["mask_zero_std"] = mask_zero_std
    corr_bools["low_medium_gap"] = low_medium_gap
 ```
 %% Cell type:code id: tags:
 ``` python
 if in_folder[-1] == "/":
    in_folder = in_folder[:-1]
 if sequences[0] == -1:
    sequences = None
 control_fname = f'{in_folder}/r{run:04d}/RAW-R{run:04d}-{karabo_da_control}-S00000.h5'
 h5path_ctrl = h5path_ctrl.format(karabo_id_control)
 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)
 print(f'Path to control file {control_fname}')
 ```
 %% Cell type:code id: tags:
 ``` python
 # Create output folder
 os.makedirs(out_folder, exist_ok=overwrite)
 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 # TODO: Remove DETLAB
 elif instrument == "HED" or instrument == "DETLAB":
    dinstance = "AGIPD500K"
    nmods = 8
 # Evaluate requested modules
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
 def mod_name(modno):
    return f"Q{modno // 4 + 1}M{modno % 4 + 1}"
 print("Process modules: ", ', '.join(
    [mod_name(x) for x in modules]))
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```
 %% Cell type:code id: tags:
 ``` python
 # Display Information about the selected pulses indices for correction.
 pulses_lst = list(range(*max_pulses)) if not (len(max_pulses)==1 and max_pulses[0]==0) else max_pulses
 try:
    if len(pulses_lst) > 1:
        print("A range of {} pulse indices is selected: from {} to {} with a step of {}"
               .format(len(pulses_lst), pulses_lst[0] , pulses_lst[-1] + (pulses_lst[1] - pulses_lst[0]),
                       pulses_lst[1] - pulses_lst[0]))
    else:
        print("one pulse is selected: a pulse of idx {}".format(pulses_lst[0]))
 except Exception as e:
    raise ValueError('max_pulses input Error: {}'.format(e))
 ```
 %% Cell type:code id: tags:
 ``` python
 # set everything up filewise
 mmf = map_modules_from_folder(in_folder, run, path_template,
                              karabo_da, sequences)
 mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf
 file_list = []
 # ToDo: Split table over pages
 print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
 table = []
 ti = 0
 for k, files in mapped_files.items():
    i = 0
    for f in list(files.queue):
        file_list.append(f)
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["#", "module", "# module", "file"])))
 file_list = sorted(file_list, key=lambda name: name[-10:])
 ```
 %% Cell type:code id: tags:
 ``` python
 filename = file_list[0]
 channel = int(re.findall(r".*-AGIPD([0-9]+)-.*", filename)[0])
 # Evaluate number of memory cells
 mem_cells = get_num_cells(filename, karabo_id, channel)
 if mem_cells is None:
    raise ValueError(f"No raw images found in {filename}")
 mem_cells_db = mem_cells if mem_cells_db == 0 else mem_cells_db
 max_cells = mem_cells if max_cells == 0 else max_cells
 # Evaluate aquisition rate
 if acq_rate == 0:
    acq_rate = get_acq_rate((filename, karabo_id, channel))
 print(f"Maximum memory cells to calibrate: {max_cells}")
 ```
 %% Cell type:code id: tags:
 ``` python
 # Evaluate creation time
 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    offset = parser.parse(creation_date_offset)
    delta = timedelta(hours=offset.hour,
                      minutes=offset.minute, seconds=offset.second)
    creation_time += delta
 # Evaluate gain setting
 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
        except Exception as e:
            print(f'ERROR: while reading gain setting from: \n{control_fname}')
            print(e)
            print("Set gain setting to 0")
            gain_setting = 0
 ```
 %% Cell type:code id: tags:
 ``` python
 print(f"Using {creation_time} as creation time")
 print(f"Operating conditions are:\n• Bias voltage: {bias_voltage}\n• Memory cells: {mem_cells_db}\n"
              f"• Acquisition rate: {acq_rate}\n• Gain setting: {gain_setting}\n• Photon Energy: {photon_energy}\n")
 ```
 %% Cell type:markdown id: tags:
 ## Data processing ##
 %% Cell type:code id: tags:
 ``` python
 agipd_corr = AgipdCorrections(max_cells, max_pulses,
                              h5_data_path=h5path,
                              h5_index_path=h5path_idx,
                              corr_bools=corr_bools)
 agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
 agipd_corr.hg_hard_threshold = hg_hard_threshold
 agipd_corr.mg_hard_threshold = mg_hard_threshold
 agipd_corr.cm_dark_min = cm_dark_range[0]
 agipd_corr.cm_dark_max = cm_dark_range[1]
 agipd_corr.cm_dark_fraction = cm_dark_fraction
 agipd_corr.cm_n_itr = cm_n_itr
 agipd_corr.noisy_adc_threshold = noisy_adc_threshold
 ```
 %% Cell type:code id: tags:
 ``` python
 # Retrieve calibration constants to RAM
 agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))
 const_yaml = None
 if os.path.isfile(f'{out_folder}/retrieved_constants.yml'):
    with open(f'{out_folder}/retrieved_constants.yml', "r") as f:
        const_yaml = yaml.safe_load(f.read())
 # retrive constants
 def retrieve_constants(mod):
    """
    Retrieve calibration constants and load them to shared memory
    Metadata for constants is taken from yml file or retrieved from the DB
    """
    device = getattr(getattr(Detectors, dinstance), mod_name(mod))
    err = ''
    try:
        # check if there is a yaml file in out_folder that has the device constants.
        if const_yaml and device.device_name in const_yaml:
            when = agipd_corr.initialize_from_yaml(const_yaml, mod, device)
        else:
            when = agipd_corr.initialize_from_db(cal_db_interface, creation_time, mem_cells_db, bias_voltage,
                                                 photon_energy, gain_setting, acq_rate, mod, device, False)
    except Exception as e:
        err = f"Error: {e}\nError traceback: {traceback.format_exc()}"
        when = None
    return err, mod, when, device.device_name
 ts = perf_counter()
 with Pool(processes=len(modules)) as pool:
    const_out = pool.map(retrieve_constants, modules)
 print(f"Constants were loaded in {perf_counter()-ts:.01f}s")
 ```
 %% Cell type:code id: tags:
 ``` python
 # allocate memory for images and hists
 n_images_max = max_cells*256
 data_shape = (n_images_max, 512, 128)
 agipd_corr.allocate_images(data_shape, n_cores_files)
 ```
 %% Cell type:code id: tags:
 ``` python
 def batches(l, batch_size):
    """Group a list into batches of (up to) batch_size elements"""
    start = 0
    while start < len(l):
        yield l[start:start + batch_size]
        start += batch_size
 ```
 %% Cell type:code id: tags:
 ``` python
 def imagewise_chunks(img_counts):
    """Break up the loaded data into chunks of up to chunk_size
    Yields (file data slot, start index, stop index)
    """
    for i_proc, n_img in enumerate(img_counts):
        n_chunks = math.ceil(n_img / chunk_size)
        for i in range(n_chunks):
            yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
 ```
 %% Cell type:code id: tags:
 ``` python
 step_timer = StepTimer()
 ```
 %% Cell type:code id: tags:
 ``` python
 with Pool() as pool:
    for file_batch in batches(file_list, n_cores_files):
        # TODO: Move some printed output to logging or similar
        print(f"Processing next {len(file_batch)} files:")
        for file_name in file_batch:
            print(" ", file_name)
        step_timer.start()
        img_counts = pool.starmap(agipd_corr.read_file, enumerate(file_batch))
        step_timer.done_step('Loading data from files')
        # Evaluate zero-data-std mask
        pool.starmap(agipd_corr.mask_zero_std, itertools.product(
            range(len(file_batch)), np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
        ))
        step_timer.done_step('Mask 0 std')
        # Perform offset image-wise correction
        pool.starmap(agipd_corr.offset_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Offset correction")
        if blc_noise or blc_stripes or blc_hmatch:
            # Perform image-wise correction
            pool.starmap(agipd_corr.baseline_correction, imagewise_chunks(img_counts))
            step_timer.done_step("Base-line shift correction")
        if common_mode:
            # Perform cross-file correction parallel over asics
            pool.starmap(agipd_corr.cm_correction, itertools.product(
                range(len(file_batch)), range(16)  # 16 ASICs per module
            ))
            step_timer.done_step("Common-mode correction")
        # Perform image-wise correction
        pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Image-wise correction")
        # Save corrected data
        pool.starmap(agipd_corr.write_file, [
            (i_proc, file_name, os.path.join(out_folder, os.path.basename(file_name).replace("RAW", "CORR")))
            for i_proc, file_name in enumerate(file_batch)
        ])
        step_timer.done_step("Save")
 ```
 %% Cell type:code id: tags:
 ``` python
 print(f"Correction of {len(file_list)} files is finished")
 print(f"Total processing time {step_timer.timespan():.01f} s")
 print(f"Timing summary per batch of {n_cores_files} files:")
 step_timer.print_summary()
 ```
 %% Cell type:code id: tags:
 ``` python
 # if there is a yml file that means a leading notebook got processed
 # and the reporting would be generated from it.
 fst_print = True
 to_store = []
 line = []
 for i, (error, modno, when, mod_dev) in enumerate(const_out):
    qm = mod_name(modno)
    # expose errors while applying correction
    if error:
        print("Error: {}".format(error) )
    if not const_yaml or mod_dev not in const_yaml:
        if fst_print:
            print("Constants are retrieved with creation time: ")
            fst_print = False
        line = [qm]
        # If correction is crashed
        if not error:
            print(f"{qm}:")
            for key, item in when.items():
                if hasattr(item, 'strftime'):
                    item = item.strftime('%y-%m-%d %H:%M')
                when[key] = item
                print('{:.<12s}'.format(key), item)
        # Store few time stamps if exists
        # Add NA to keep array structure
        for key in ['Offset', 'SlopesPC', 'SlopesFF']:
            if when and key in when and when[key]:
                line.append(when[key])
            else:
                if error is not None:
                    line.append('Err')
                else:
                    line.append('NA')
        if len(line) > 0:
            to_store.append(line)
 seq = sequences[0] if sequences else 0
 if len(to_store) > 0:
    with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fyml:
        yaml.safe_dump({"time-summary": {f"S{seq}":to_store}}, fyml)
 ```
 %% Cell type:code id: tags:
 ``` python
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.gca(projection='3d')
    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)
    Z = data.T
    # Plot the surface.
    surf = ax.plot_surface(X, Y, Z, cmap=colormap.coolwarm,
                           linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")
 def do_2d_plot(data, edges, y_axis, x_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),
              np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
                   norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```
 %% Cell type:code id: tags:
 ``` python
 def get_trains_data(run_folder, source, include, tid=None, path='*/DET/*', modules=16, fillvalue=np.nan):
    """
    Load single train for all module
    :param run_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param tid: Train Id to be loaded. First train is considered if None is given
    :param path: Path to find image data inside h5 file
    """
    run_data = RunDirectory(run_folder, include)
    if tid:
        tid, data = run_data.select('*/DET/*', source).train_from_id(tid)
        return tid, stack_detector_data(train=data, data=source, fillvalue=fillvalue, modules=modules)
    else:
        for tid, data in run_data.select('*/DET/*', source).trains(require_all=True):
            return tid, stack_detector_data(train=data, data=source, fillvalue=fillvalue, modules=modules)
    return None, None
 ```
 %% Cell type:code id: tags:
 ``` python
 if dinstance == "AGIPD500K":
    geom = AGIPD_500K2GGeometry.from_origin()
 else:
    geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
        (-525, 625),
        (-550, -10),
        (520, -160),
        (542.5, 475),
    ])
 ```
 %% Cell type:code id: tags:
 ``` python
 include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
 tid, corrected = get_trains_data(f'{out_folder}/', 'image.data', include, modules=nmods)
 _, gains = get_trains_data(f'{out_folder}/', 'image.gain', include, tid, modules=nmods)
 _, mask = get_trains_data(f'{out_folder}/', 'image.mask', include, tid, modules=nmods)
 _, blshift = get_trains_data(f'{out_folder}/', 'image.blShift', include, tid, modules=nmods)
 _, cellId = get_trains_data(f'{out_folder}/', 'image.cellId', include, tid, modules=nmods)
 _, pulseId = get_trains_data(f'{out_folder}/', 'image.pulseId', include, tid,
                             modules=nmods, fillvalue=0)
 _, raw = get_trains_data(f'{in_folder}/r{run:04d}/', 'image.data', include, tid, modules=nmods)
 ```
 %% Cell type:code id: tags:
 ``` python
 display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
 ```
 %% Cell type:markdown id: tags:
 ### Signal vs. Analogue Gain ###
 %% Cell type:code id: tags:
 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(raw[:,0,...].flatten().astype(np.float32),
                                         raw[:,1,...].flatten().astype(np.float32),
                                         bins=(100, 100),
                                         range=[[4000, 8192], [4000, 8192]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 ```
 %% Cell type:markdown id: tags:
 ### Signal vs. Digitized Gain ###
 The following plot shows plots signal vs. digitized gain
 %% Cell type:code id: tags:
 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(corrected.flatten().astype(np.float32),
                                         gains.flatten().astype(np.float32), bins=(100, 3),
                                         range=[[-50, 8192], [0, 3]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
 ```
 %% Cell type:code id: tags:
 ``` python
 print(f"Gain statistics in %")
 table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
          f'{gains[gains==1].size/gains.size*100:.03f}',
          f'{gains[gains==2].size/gains.size*100:.03f}']]
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["High", "Medium", "Low"])))
 ```
 %% Cell type:markdown id: tags:
 ### Intensity per Pulse ###
 %% Cell type:code id: tags:
 ``` python
 pulse_range = [np.min(pulseId[pulseId>=0]), np.max(pulseId[pulseId>=0])]
 mean_data = np.nanmean(corrected, axis=(2, 3))
 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100, int(pulse_range[1])),
                                      range=[[-50, 1000], pulse_range])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100,  int(pulse_range[1])),
                                      range=[[-50, 200000], pulse_range])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 ```
 %% Cell type:markdown id: tags:
 ### Baseline shift ###
 Estimated base-line shift with respect to the total ADU counts of corrected image.
 %% Cell type:code id: tags:
 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(blshift.flatten(), bins=100, log=True)
 _ = plt.xlabel('Baseline shift [ADU]')
 _ = plt.ylabel('Counts')
 _ = ax.grid()
 ```
 %% Cell type:code id: tags:
 ``` python
 fig = plt.figure(figsize=(10, 10))
 corrected_ave = np.nansum(corrected, axis=(2, 3))
 plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)
 plt.xlim(-1, 1000)
 plt.grid()
 plt.xlabel('Illuminated corrected [MADU] ')
 _ = plt.ylabel('Estimated baseline shift [ADU]')
 ```
 %% Cell type:code id: tags:
 ``` python
 display(Markdown('### Raw preview ###\n'))
 display(Markdown(f'Mean over images of the RAW data\n'))
 ```
 %% Cell type:code id: tags:
 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 data = np.mean(raw[:, 0, ...], axis=0)
 vmin, vmax = get_range(data, 5)
 ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```
 %% Cell type:code id: tags:
 ``` python
 display(Markdown(f'Single shot of the RAW data from cell {np.max(cellId[cell_id_preview])} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(raw[cell_id_preview, 0, ...], 5)
 ax = geom.plot_data_fast(raw[cell_id_preview, 0, ...], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```
 %% Cell type:code id: tags:
 ``` python
 display(Markdown('### Corrected preview ###\n'))
 display(Markdown(f'A single shot image from cell {np.max(cellId[cell_id_preview])} \n'))
 ```
 %% Cell type:code id: tags:
 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_id_preview], 7, -50)
 vmin = - 50
 ax = geom.plot_data_fast(corrected[cell_id_preview], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```
 %% Cell type:code id: tags:
 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_id_preview], 5, -50)
 nbins = np.int((vmax + 50) / 2)
 h = ax.hist(corrected[cell_id_preview].flatten(),
            bins=nbins, range=(-50, vmax),
            histtype='stepfilled', log=True)
 _ = plt.xlabel('[ADU]')
 _ = plt.ylabel('Counts')
 _ = ax.grid()
 ```
 %% Cell type:code id: tags:
 ``` python
 display(Markdown('### Mean CORRECTED Preview ###\n'))
 display(Markdown(f'A mean across one train \n'))
 ```
 %% Cell type:code id: tags:
 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 data = np.mean(corrected, axis=0)
 vmin, vmax = get_range(data, 7)
 ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=-50, vmax=vmax)
 ```
 %% Cell type:code id: tags:
 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected, 10, -100)
 vmax = np.nanmax(corrected)
 if vmax > 50000:
    vmax=50000
 nbins = np.int((vmax + 100) / 5)
 h = ax.hist(corrected.flatten(), bins=nbins,
            range=(-100, vmax), histtype='step', log=True, label = 'All')
 _ = ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),
            alpha=0.5, log=True, label='High gain', color='green')
 _ = ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),
            alpha=0.5, log=True, label='Medium gain', color='red')
 _ = ax.hist(corrected[gains == 2].flatten(), bins=nbins,
            range=(-100, vmax), alpha=0.5, log=True, label='Low gain', color='yellow')
 _ = ax.legend()
 _ = ax.grid()
 _ = plt.xlabel('[ADU]')
 _ = plt.ylabel('Counts')
 ```
 %% Cell type:code id: tags:
 ``` python
 display(Markdown('### Maximum GAIN Preview ###\n'))
 display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
 ```
 %% Cell type:code id: tags:
 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.max(gains, axis=0), ax=ax,
                         cmap="jet", vmin=-1, vmax=3)
 ```
 %% Cell type:markdown id: tags:
 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:
 %% Cell type:code id: tags:
 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Bad pixel type", "Bit mask"])))
 ```
 %% Cell type:code id: tags:
 ``` python
 display(Markdown(f'### Single Shot Bad Pixels ### \n'))
 display(Markdown(f'A single shot bad pixel map from cell {np.max(cellId[cell_id_preview])} \n'))
 ```
 %% Cell type:code id: tags:
 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.log2(mask[cell_id_preview]), ax=ax, vmin=0, vmax=32, cmap="jet")
 ```
 %% Cell type:markdown id: tags:
 ### Percentage of Bad Pixels across one train  ###
 %% Cell type:code id: tags:
 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.mean(mask>0, axis=0),
                         vmin=0, ax=ax, vmax=1, cmap="jet")
 ```
 %% Cell type:markdown id: tags:
 ### Percentage of Bad Pixels across one train. Only Dark Related ###
 %% Cell type:code id: tags:
 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 cm = np.copy(mask)
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 ax = geom.plot_data_fast(np.mean(cm>0, axis=0),
                         vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

 %% Cell type:markdown id: tags:
 # AGIPD Offline Correction #
 Author: European XFEL Detector Group, Version: 2.0
 Offline Calibration for the AGIPD Detector
 %% Cell type:code id: tags:
 ``` python
 in_folder = "/gpfs/exfel/exp/HED/202031/p900174/raw" # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/hibef_agipd2"  # the folder to output to, required
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 run = 155 # runs to process, required
 karabo_id = "HED_DET_AGIPD500K2G" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information
 karabo_id_control = "HED_EXP_AGIPD500K2G" # karabo-id for control device
 karabo_da_control = 'AGIPD500K2G00' # karabo DA for control infromation
+slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants
 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milli seconds
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
 max_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300 # Bias voltage
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
 photon_energy = 9.2 # photon energy in keV
 overwrite = True # set to True if existing data should be overwritten
 max_pulses = [0, 500, 1] # range list [st, end, step] of maximum pulse indices within a train. 3 allowed maximum list input elements.
 mem_cells_db = 0 # set to a value different than 0 to use this value for DB queries
 cell_id_preview = 1 # cell Id used for preview in single-shot plots
 # Correction parameters
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
 cm_dark_fraction = 0.66 # threshold for fraction of  empty pixels to consider module enough dark to perform CM correction
 cm_dark_range = [-50.,30] # range for signal value ADU for pixel to be consider as a dark pixel
 cm_n_itr = 4 # number of iterations for common mode correction
 hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
 mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
 noisy_adc_threshold = 0.25 # threshold to mask complete adc
 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 zero_nans = False # set NaN values in corrected data to 0
 zero_orange = False # set to 0 very negative and very large values in corrected data
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
 corr_asic_diag = False # if set, diagonal drop offs on ASICs are correted
 force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold
 force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold
 mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
 common_mode = False # Common mode correction
 melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
 mask_zero_std = False # Mask pixels with zero standard deviation across train
 low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain
 # Paralellization parameters
 chunk_size = 1000 # Size of chunk for image-weise correction
 chunk_size_idim = 1  # chunking size of imaging dimension, adjust if user software is sensitive to this.
 n_cores_correct = 16 # Number of chunks to be processed in parallel
 n_cores_files = 4 # Number of files to be processed in parallel
 sequences_per_node = 2 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```
 %% Cell type:code id: tags:
 ``` python
 import copy
 from datetime import timedelta
 from dateutil import parser
 import gc
 import glob
 import itertools
 from IPython.display import HTML, display, Markdown, Latex
 import math
 from multiprocessing import Pool
 import os
 import re
 import sys
 import traceback
 from time import time, sleep, perf_counter
 import tabulate
 import warnings
 warnings.filterwarnings('ignore')
 import yaml
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from extra_data import RunDirectory, stack_detector_data
 from iCalibrationDB import Detectors
 from mpl_toolkits.mplot3d import Axes3D
 from matplotlib.ticker import LinearLocator, FormatStrFormatter
 from matplotlib.colors import LogNorm
 from matplotlib import cm as colormap
 import matplotlib.pyplot as plt
 import matplotlib
 matplotlib.use("agg")
 %matplotlib inline
 import numpy as np
 import seaborn as sns
 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")
 from cal_tools.agipdlib import (AgipdCorrections, get_acq_rate,
                                get_gain_setting, get_num_cells)
 from cal_tools.cython import agipdalgs as calgs
 from cal_tools.ana_tools import get_range
 from cal_tools.enums import BadPixels
 from cal_tools.tools import get_dir_creation_date, map_modules_from_folder
 from cal_tools.step_timing import StepTimer
 import seaborn as sns
 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")
 ```
 %% Cell type:markdown id: tags:
 ## Evaluated parameters ##
 %% Cell type:code id: tags:
 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis
 # Here the herarichy and dependability for correction booleans are defined
 corr_bools = {}
 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset
 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["zero_nans"] = zero_nans
    corr_bools["zero_orange"] = zero_orange
    corr_bools["mask_noisy_adc"] = mask_noisy_adc
    corr_bools["force_hg_if_below"] = force_hg_if_below
    corr_bools["force_mg_if_below"] = force_mg_if_below
    corr_bools["common_mode"] = common_mode
    corr_bools["melt_snow"] = melt_snow
    corr_bools["mask_zero_std"] = mask_zero_std
    corr_bools["low_medium_gap"] = low_medium_gap
 ```
 %% Cell type:code id: tags:
 ``` python
 if in_folder[-1] == "/":
    in_folder = in_folder[:-1]
 if sequences[0] == -1:
    sequences = None
 control_fname = f'{in_folder}/r{run:04d}/RAW-R{run:04d}-{karabo_da_control}-S00000.h5'
 h5path_ctrl = h5path_ctrl.format(karabo_id_control)
 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)
 print(f'Path to control file {control_fname}')
 ```
 %% Cell type:code id: tags:
 ``` python
 # Create output folder
 os.makedirs(out_folder, exist_ok=overwrite)
 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 # TODO: Remove DETLAB
 elif instrument == "HED" or instrument == "DETLAB":
    dinstance = "AGIPD500K"
    nmods = 8
 # Evaluate requested modules
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
 def mod_name(modno):
    return f"Q{modno // 4 + 1}M{modno % 4 + 1}"
 print("Process modules: ", ', '.join(
    [mod_name(x) for x in modules]))
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```
 %% Cell type:code id: tags:
 ``` python
 # Display Information about the selected pulses indices for correction.
 pulses_lst = list(range(*max_pulses)) if not (len(max_pulses)==1 and max_pulses[0]==0) else max_pulses
 try:
    if len(pulses_lst) > 1:
        print("A range of {} pulse indices is selected: from {} to {} with a step of {}"
               .format(len(pulses_lst), pulses_lst[0] , pulses_lst[-1] + (pulses_lst[1] - pulses_lst[0]),
                       pulses_lst[1] - pulses_lst[0]))
    else:
        print("one pulse is selected: a pulse of idx {}".format(pulses_lst[0]))
 except Exception as e:
    raise ValueError('max_pulses input Error: {}'.format(e))
 ```
 %% Cell type:code id: tags:
 ``` python
 # set everything up filewise
 mmf = map_modules_from_folder(in_folder, run, path_template,
                              karabo_da, sequences)
 mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf
 file_list = []
 # ToDo: Split table over pages
 print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
 table = []
 ti = 0
 for k, files in mapped_files.items():
    i = 0
    for f in list(files.queue):
        file_list.append(f)
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["#", "module", "# module", "file"])))
 file_list = sorted(file_list, key=lambda name: name[-10:])
 ```
 %% Cell type:code id: tags:
 ``` python
 filename = file_list[0]
 channel = int(re.findall(r".*-AGIPD([0-9]+)-.*", filename)[0])
 # Evaluate number of memory cells
 mem_cells = get_num_cells(filename, karabo_id, channel)
 if mem_cells is None:
    raise ValueError(f"No raw images found in {filename}")
 mem_cells_db = mem_cells if mem_cells_db == 0 else mem_cells_db
 max_cells = mem_cells if max_cells == 0 else max_cells
 # Evaluate aquisition rate
 if acq_rate == 0:
    acq_rate = get_acq_rate((filename, karabo_id, channel))
 print(f"Maximum memory cells to calibrate: {max_cells}")
 ```
 %% Cell type:code id: tags:
 ``` python
 # Evaluate creation time
 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    offset = parser.parse(creation_date_offset)
    delta = timedelta(hours=offset.hour,
                      minutes=offset.minute, seconds=offset.second)
    creation_time += delta
 # Evaluate gain setting
 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
        except Exception as e:
            print(f'ERROR: while reading gain setting from: \n{control_fname}')
            print(e)
            print("Set gain setting to 0")
            gain_setting = 0
 ```
 %% Cell type:code id: tags:
 ``` python
 print(f"Using {creation_time} as creation time")
 print(f"Operating conditions are:\n• Bias voltage: {bias_voltage}\n• Memory cells: {mem_cells_db}\n"
              f"• Acquisition rate: {acq_rate}\n• Gain setting: {gain_setting}\n• Photon Energy: {photon_energy}\n")
 ```
 %% Cell type:markdown id: tags:
 ## Data processing ##
 %% Cell type:code id: tags:
 ``` python
 agipd_corr = AgipdCorrections(max_cells, max_pulses,
                              h5_data_path=h5path,
                              h5_index_path=h5path_idx,
                              corr_bools=corr_bools)
 agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
 agipd_corr.hg_hard_threshold = hg_hard_threshold
 agipd_corr.mg_hard_threshold = mg_hard_threshold
 agipd_corr.cm_dark_min = cm_dark_range[0]
 agipd_corr.cm_dark_max = cm_dark_range[1]
 agipd_corr.cm_dark_fraction = cm_dark_fraction
 agipd_corr.cm_n_itr = cm_n_itr
 agipd_corr.noisy_adc_threshold = noisy_adc_threshold
 ```
 %% Cell type:code id: tags:
 ``` python
 # Retrieve calibration constants to RAM
 agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))
 const_yaml = None
 if os.path.isfile(f'{out_folder}/retrieved_constants.yml'):
    with open(f'{out_folder}/retrieved_constants.yml', "r") as f:
        const_yaml = yaml.safe_load(f.read())
 # retrive constants
 def retrieve_constants(mod):
    """
    Retrieve calibration constants and load them to shared memory
    Metadata for constants is taken from yml file or retrieved from the DB
    """
    device = getattr(getattr(Detectors, dinstance), mod_name(mod))
    err = ''
    try:
        # check if there is a yaml file in out_folder that has the device constants.
        if const_yaml and device.device_name in const_yaml:
            when = agipd_corr.initialize_from_yaml(const_yaml, mod, device)
        else:
            when = agipd_corr.initialize_from_db(cal_db_interface, creation_time, mem_cells_db, bias_voltage,
                                                 photon_energy, gain_setting, acq_rate, mod, device, False)
    except Exception as e:
        err = f"Error: {e}\nError traceback: {traceback.format_exc()}"
        when = None
    return err, mod, when, device.device_name
 ts = perf_counter()
 with Pool(processes=len(modules)) as pool:
    const_out = pool.map(retrieve_constants, modules)
 print(f"Constants were loaded in {perf_counter()-ts:.01f}s")
 ```
 %% Cell type:code id: tags:
 ``` python
 # allocate memory for images and hists
 n_images_max = max_cells*256
 data_shape = (n_images_max, 512, 128)
 agipd_corr.allocate_images(data_shape, n_cores_files)
 ```
 %% Cell type:code id: tags:
 ``` python
 def batches(l, batch_size):
    """Group a list into batches of (up to) batch_size elements"""
    start = 0
    while start < len(l):
        yield l[start:start + batch_size]
        start += batch_size
 ```
 %% Cell type:code id: tags:
 ``` python
 def imagewise_chunks(img_counts):
    """Break up the loaded data into chunks of up to chunk_size
    Yields (file data slot, start index, stop index)
    """
    for i_proc, n_img in enumerate(img_counts):
        n_chunks = math.ceil(n_img / chunk_size)
        for i in range(n_chunks):
            yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
 ```
 %% Cell type:code id: tags:
 ``` python
 step_timer = StepTimer()
 ```
 %% Cell type:code id: tags:
 ``` python
 with Pool() as pool:
    for file_batch in batches(file_list, n_cores_files):
        # TODO: Move some printed output to logging or similar
        print(f"Processing next {len(file_batch)} files:")
        for file_name in file_batch:
            print(" ", file_name)
        step_timer.start()
        img_counts = pool.starmap(agipd_corr.read_file, enumerate(file_batch))
        step_timer.done_step('Loading data from files')
        # Evaluate zero-data-std mask
        pool.starmap(agipd_corr.mask_zero_std, itertools.product(
            range(len(file_batch)), np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
        ))
        step_timer.done_step('Mask 0 std')
        # Perform offset image-wise correction
        pool.starmap(agipd_corr.offset_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Offset correction")
        if blc_noise or blc_stripes or blc_hmatch:
            # Perform image-wise correction
            pool.starmap(agipd_corr.baseline_correction, imagewise_chunks(img_counts))
            step_timer.done_step("Base-line shift correction")
        if common_mode:
            # Perform cross-file correction parallel over asics
            pool.starmap(agipd_corr.cm_correction, itertools.product(
                range(len(file_batch)), range(16)  # 16 ASICs per module
            ))
            step_timer.done_step("Common-mode correction")
        # Perform image-wise correction
        pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Image-wise correction")
        # Save corrected data
        pool.starmap(agipd_corr.write_file, [
            (i_proc, file_name, os.path.join(out_folder, os.path.basename(file_name).replace("RAW", "CORR")))
            for i_proc, file_name in enumerate(file_batch)
        ])
        step_timer.done_step("Save")
 ```
 %% Cell type:code id: tags:
 ``` python
 print(f"Correction of {len(file_list)} files is finished")
 print(f"Total processing time {step_timer.timespan():.01f} s")
 print(f"Timing summary per batch of {n_cores_files} files:")
 step_timer.print_summary()
 ```
 %% Cell type:code id: tags:
 ``` python
 # if there is a yml file that means a leading notebook got processed
 # and the reporting would be generated from it.
 fst_print = True
 to_store = []
 line = []
 for i, (error, modno, when, mod_dev) in enumerate(const_out):
    qm = mod_name(modno)
    # expose errors while applying correction
    if error:
        print("Error: {}".format(error) )
    if not const_yaml or mod_dev not in const_yaml:
        if fst_print:
            print("Constants are retrieved with creation time: ")
            fst_print = False
        line = [qm]
        # If correction is crashed
        if not error:
            print(f"{qm}:")
            for key, item in when.items():
                if hasattr(item, 'strftime'):
                    item = item.strftime('%y-%m-%d %H:%M')
                when[key] = item
                print('{:.<12s}'.format(key), item)
        # Store few time stamps if exists
        # Add NA to keep array structure
        for key in ['Offset', 'SlopesPC', 'SlopesFF']:
            if when and key in when and when[key]:
                line.append(when[key])
            else:
                if error is not None:
                    line.append('Err')
                else:
                    line.append('NA')
        if len(line) > 0:
            to_store.append(line)
 seq = sequences[0] if sequences else 0
 if len(to_store) > 0:
    with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fyml:
        yaml.safe_dump({"time-summary": {f"S{seq}":to_store}}, fyml)
 ```
 %% Cell type:code id: tags:
 ``` python
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.gca(projection='3d')
    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)
    Z = data.T
    # Plot the surface.
    surf = ax.plot_surface(X, Y, Z, cmap=colormap.coolwarm,
                           linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")
 def do_2d_plot(data, edges, y_axis, x_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),
              np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
                   norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```
 %% Cell type:code id: tags:
 ``` python
 def get_trains_data(run_folder, source, include, tid=None, path='*/DET/*', modules=16, fillvalue=np.nan):
    """
    Load single train for all module
    :param run_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param tid: Train Id to be loaded. First train is considered if None is given
    :param path: Path to find image data inside h5 file
    """
    run_data = RunDirectory(run_folder, include)
    if tid:
        tid, data = run_data.select('*/DET/*', source).train_from_id(tid)
        return tid, stack_detector_data(train=data, data=source, fillvalue=fillvalue, modules=modules)
    else:
        for tid, data in run_data.select('*/DET/*', source).trains(require_all=True):
            return tid, stack_detector_data(train=data, data=source, fillvalue=fillvalue, modules=modules)
    return None, None
 ```
 %% Cell type:code id: tags:
 ``` python
 if dinstance == "AGIPD500K":
    geom = AGIPD_500K2GGeometry.from_origin()
 else:
    geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
        (-525, 625),
        (-550, -10),
        (520, -160),
        (542.5, 475),
    ])
 ```
 %% Cell type:code id: tags:
 ``` python
 include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
 tid, corrected = get_trains_data(f'{out_folder}/', 'image.data', include, modules=nmods)
 _, gains = get_trains_data(f'{out_folder}/', 'image.gain', include, tid, modules=nmods)
 _, mask = get_trains_data(f'{out_folder}/', 'image.mask', include, tid, modules=nmods)
 _, blshift = get_trains_data(f'{out_folder}/', 'image.blShift', include, tid, modules=nmods)
 _, cellId = get_trains_data(f'{out_folder}/', 'image.cellId', include, tid, modules=nmods)
 _, pulseId = get_trains_data(f'{out_folder}/', 'image.pulseId', include, tid,
                             modules=nmods, fillvalue=0)
 _, raw = get_trains_data(f'{in_folder}/r{run:04d}/', 'image.data', include, tid, modules=nmods)
 ```
 %% Cell type:code id: tags:
 ``` python
 display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
 ```
 %% Cell type:markdown id: tags:
 ### Signal vs. Analogue Gain ###
 %% Cell type:code id: tags:
 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(raw[:,0,...].flatten().astype(np.float32),
                                         raw[:,1,...].flatten().astype(np.float32),
                                         bins=(100, 100),
                                         range=[[4000, 8192], [4000, 8192]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 ```
 %% Cell type:markdown id: tags:
 ### Signal vs. Digitized Gain ###
 The following plot shows plots signal vs. digitized gain
 %% Cell type:code id: tags:
 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(corrected.flatten().astype(np.float32),
                                         gains.flatten().astype(np.float32), bins=(100, 3),
                                         range=[[-50, 8192], [0, 3]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
 ```
 %% Cell type:code id: tags:
 ``` python
 print(f"Gain statistics in %")
 table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
          f'{gains[gains==1].size/gains.size*100:.03f}',
          f'{gains[gains==2].size/gains.size*100:.03f}']]
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["High", "Medium", "Low"])))
 ```
 %% Cell type:markdown id: tags:
 ### Intensity per Pulse ###
 %% Cell type:code id: tags:
 ``` python
 pulse_range = [np.min(pulseId[pulseId>=0]), np.max(pulseId[pulseId>=0])]
 mean_data = np.nanmean(corrected, axis=(2, 3))
 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100, int(pulse_range[1])),
                                      range=[[-50, 1000], pulse_range])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100,  int(pulse_range[1])),
                                      range=[[-50, 200000], pulse_range])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 ```
 %% Cell type:markdown id: tags:
 ### Baseline shift ###
 Estimated base-line shift with respect to the total ADU counts of corrected image.
 %% Cell type:code id: tags:
 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(blshift.flatten(), bins=100, log=True)
 _ = plt.xlabel('Baseline shift [ADU]')
 _ = plt.ylabel('Counts')
 _ = ax.grid()
 ```
 %% Cell type:code id: tags:
 ``` python
 fig = plt.figure(figsize=(10, 10))
 corrected_ave = np.nansum(corrected, axis=(2, 3))
 plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)
 plt.xlim(-1, 1000)
 plt.grid()
 plt.xlabel('Illuminated corrected [MADU] ')
 _ = plt.ylabel('Estimated baseline shift [ADU]')
 ```
 %% Cell type:code id: tags:
 ``` python
 display(Markdown('### Raw preview ###\n'))
 display(Markdown(f'Mean over images of the RAW data\n'))
 ```
 %% Cell type:code id: tags:
 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 data = np.mean(raw[:, 0, ...], axis=0)
 vmin, vmax = get_range(data, 5)
 ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```
 %% Cell type:code id: tags:
 ``` python
 display(Markdown(f'Single shot of the RAW data from cell {np.max(cellId[cell_id_preview])} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(raw[cell_id_preview, 0, ...], 5)
 ax = geom.plot_data_fast(raw[cell_id_preview, 0, ...], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```
 %% Cell type:code id: tags:
 ``` python
 display(Markdown('### Corrected preview ###\n'))
 display(Markdown(f'A single shot image from cell {np.max(cellId[cell_id_preview])} \n'))
 ```
 %% Cell type:code id: tags:
 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_id_preview], 7, -50)
 vmin = - 50
 ax = geom.plot_data_fast(corrected[cell_id_preview], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```
 %% Cell type:code id: tags:
 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_id_preview], 5, -50)
 nbins = np.int((vmax + 50) / 2)
 h = ax.hist(corrected[cell_id_preview].flatten(),
            bins=nbins, range=(-50, vmax),
            histtype='stepfilled', log=True)
 _ = plt.xlabel('[ADU]')
 _ = plt.ylabel('Counts')
 _ = ax.grid()
 ```
 %% Cell type:code id: tags:
 ``` python
 display(Markdown('### Mean CORRECTED Preview ###\n'))
 display(Markdown(f'A mean across one train \n'))
 ```
 %% Cell type:code id: tags:
 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 data = np.mean(corrected, axis=0)
 vmin, vmax = get_range(data, 7)
 ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=-50, vmax=vmax)
 ```
 %% Cell type:code id: tags:
 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected, 10, -100)
 vmax = np.nanmax(corrected)
 if vmax > 50000:
    vmax=50000
 nbins = np.int((vmax + 100) / 5)
 h = ax.hist(corrected.flatten(), bins=nbins,
            range=(-100, vmax), histtype='step', log=True, label = 'All')
 _ = ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),
            alpha=0.5, log=True, label='High gain', color='green')
 _ = ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),
            alpha=0.5, log=True, label='Medium gain', color='red')
 _ = ax.hist(corrected[gains == 2].flatten(), bins=nbins,
            range=(-100, vmax), alpha=0.5, log=True, label='Low gain', color='yellow')
 _ = ax.legend()
 _ = ax.grid()
 _ = plt.xlabel('[ADU]')
 _ = plt.ylabel('Counts')
 ```
 %% Cell type:code id: tags:
 ``` python
 display(Markdown('### Maximum GAIN Preview ###\n'))
 display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
 ```
 %% Cell type:code id: tags:
 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.max(gains, axis=0), ax=ax,
                         cmap="jet", vmin=-1, vmax=3)
 ```
 %% Cell type:markdown id: tags:
 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:
 %% Cell type:code id: tags:
 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Bad pixel type", "Bit mask"])))
 ```
 %% Cell type:code id: tags:
 ``` python
 display(Markdown(f'### Single Shot Bad Pixels ### \n'))
 display(Markdown(f'A single shot bad pixel map from cell {np.max(cellId[cell_id_preview])} \n'))
 ```
 %% Cell type:code id: tags:
 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.log2(mask[cell_id_preview]), ax=ax, vmin=0, vmax=32, cmap="jet")
 ```
 %% Cell type:markdown id: tags:
 ### Percentage of Bad Pixels across one train  ###
 %% Cell type:code id: tags:
 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.mean(mask>0, axis=0),
                         vmin=0, ax=ax, vmax=1, cmap="jet")
 ```
 %% Cell type:markdown id: tags:
 ### Percentage of Bad Pixels across one train. Only Dark Related ###
 %% Cell type:code id: tags:
 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 cm = np.copy(mask)
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 ax = geom.plot_data_fast(np.mean(cm>0, axis=0),
                         vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

--- a/notebooks/AGIPD/AGIPD_Retrieve_Constants_Precorrection.ipynb
+++ b/notebooks/AGIPD/AGIPD_Retrieve_Constants_Precorrection.ipynb
 %% Cell type:markdown id: tags:
 # AGIPD Retrieving Constants Pre-correction #
 Author: European XFEL Detector Group, Version: 1.0
 Retrieving Required Constants for Offline Calibration of the AGIPD Detector
 %% Cell type:code id: tags:
 ``` python
 cluster_profile = "noDB"
 in_folder = "/gpfs/exfel/exp/SPB/202030/p900119/raw" # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/AGIPD_"  # the folder to output to, required
 sequences =  [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 run = 80 # runs to process, required
 karabo_id = "SPB_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information
 karabo_id_control = "SPB_IRU_AGIPD1M1" # karabo-id for control device
 karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation
 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
+slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants
 calfile =  "" # path to calibration file. Leave empty if all data should come from DB
 nodb = False # if set only file-based constants will be used
 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
 photon_energy = 9.2 # photon energy in keV
 max_cells_db_dark = 0  # set to a value different than 0 to use this value for dark data DB queries
 max_cells_db = 0 # set to a value different than 0 to use this value for DB queries
 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = True # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 ```
 %% Cell type:code id: tags:
 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis
 # Here the herarichy and dependability for correction booleans are defined
 corr_bools = {}
 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset
 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_hmatch"] = blc_hmatch
 ```
 %% Cell type:code id: tags:
 ``` python
 import sys
 from collections import OrderedDict
-# make sure a cluster is running with ipcluster start --n=32, give it a while to start
 import os
 import h5py
 import numpy as np
 import matplotlib
 matplotlib.use("agg")
 import matplotlib.pyplot as plt
-from ipyparallel import Client
+import multiprocessing as mp
-print(f"Connecting to profile {cluster_profile}")
-view = Client(profile=cluster_profile)[:]
-view.use_dill()
 from iCalibrationDB import Constants, Conditions, Detectors
 from cal_tools.tools import (map_modules_from_folder, get_dir_creation_date)
 from cal_tools.agipdlib import get_gain_setting
 from dateutil import parser
 from datetime import timedelta
 ```
 %% Cell type:code id: tags:
 ``` python
 max_cells = mem_cells
 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    offset = parser.parse(creation_date_offset)
    delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
    creation_time += delta
    print(f"Using {creation_time} as creation time")
 if sequences[0] == -1:
    sequences = None
 if in_folder[-1] == "/":
    in_folder = in_folder[:-1]
 print(f"Outputting to {out_folder}")
 os.makedirs(out_folder, exist_ok=True)
 import warnings
 warnings.filterwarnings('ignore')
 from cal_tools.agipdlib import SnowResolution
 melt_snow = False if corr_bools["only_offset"] else SnowResolution.NONE
 ```
 %% Cell type:code id: tags:
 ``` python
 control_fname = f'{in_folder}/r{run:04d}/RAW-R{run:04d}-{karabo_da_control}-S00000.h5'
 h5path_ctrl = h5path_ctrl.format(karabo_id_control)
 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
        except Exception as e:
            print(f'ERROR: while reading gain setting from: \n{control_fname}')
            print(e)
            print("Set gain setting to 0")
            gain_setting = 0
 print(f"Gain setting: {gain_setting}")
 print(f"Detector in use is {karabo_id}")
 # Extracting Instrument string
 instrument = karabo_id.split("_")[0]
 # Evaluate detector instance for mapping
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 # TODO: Remove DETLAB
 elif instrument == "HED" or instrument == "DETLAB":
    dinstance = "AGIPD500K"
    nmods = 8
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
 ```
 %% Cell type:code id: tags:
 ``` python
 # set everything up filewise
 print(f"Checking the files before retrieving constants")
 mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)
 mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf
 ```
 %% Cell type:markdown id: tags:
 ## Retrieve Constants ##
 %% Cell type:code id: tags:
 ``` python
 from functools import partial
 import yaml
 def retrieve_constants(karabo_id, bias_voltage, max_cells, acq_rate,
                       gain_setting, photon_energy, only_dark, nodb_with_dark,
                       cal_db_interface, creation_time,
                       corr_bools, pc_bools, inp):
    """
    Retreive constant for each module in parallel and produce a dictionary
    with the creation-time and constant file path.
    :param karabo_id: (STR) Karabo ID
    :param bias_voltage: (FLOAT) Bias Voltage
    :param max_cells: (INT) Memory cells
    :param acq_rate: (FLOAT) Acquisition Rate
    :param gain_setting: (FLOAT) Gain setting
    :param photon_energy: (FLOAT) Photon Energy
    :param only_dark: (BOOL) only retrieve dark constants
    :param nodb_with_dark: (BOOL) no constant retrieval even for dark
    :param cal_db_interface: (STR) the database interface port
    :param creation_time: (STR) raw data creation time
    :param corr_bools: (DICT) A dictionary with bools for applying requested corrections
    :param pc_bools: (LIST) list of bools to retrieve pulse capacitor constants
    :param inp: (LIST) input for the parallel cluster of the partial function
    :return:
            mdata_dict: (DICT) dictionary with the metadata for the retrieved constants
            dev.device_name: (STR) device name
    """
    import numpy as np
    import sys
    import traceback
    from cal_tools.agipdlib import get_num_cells, get_acq_rate
    from cal_tools.agipdutils import assemble_constant_dict
    from cal_tools.tools import get_from_db
    from iCalibrationDB import Constants, Conditions, Detectors
    err = None
    qm_files, qm, dev, idx = inp
    # get number of memory cells from a sequence file with image data
    for f in qm_files:
        if not max_cells:
            max_cells = get_num_cells(f, karabo_id, idx)
            if max_cells is None:
                if f != qm_files[-1]:
                    continue
                else:
                    raise ValueError(f"No raw images found for {qm} for all sequences")
            else:
                cells = np.arange(max_cells)
                # get out of the loop,
                # if max_cells is successfully calculated.
                break
    if acq_rate == 0.:
        acq_rate = get_acq_rate((f, karabo_id, idx))
-    print(f"Set memory cells to {max_cells}")
-    print(f"Set acquistion rate cells to {acq_rate} MHz")
    # avoid creating retireving constant, if requested.
    if not nodb_with_dark:
        const_dict = assemble_constant_dict(corr_bools, pc_bools, max_cells, bias_voltage,
                                            gain_setting, acq_rate, photon_energy,
                                            beam_energy=None, only_dark=only_dark)
        # Retrieve multiple constants through an input dictionary
        # to return a dict of useful metadata.
        mdata_dict = dict()
        for cname, cval in const_dict.items():
-            try:
+            # saving metadata in a dict
-                condition = getattr(Conditions, cval[2][0]).AGIPD(**cval[2][1])
+            mdata_dict[cname] = dict()
-                co, mdata = \
-                    get_from_db(dev, getattr(Constants.AGIPD, cname)(),
+            if slopes_ff_from_files and cname in ["SlopesFF", "BadPixelsFF"]:
-                                condition, getattr(np, cval[0])(cval[1]),
+                mdata_dict[cname]["file-path"] = f"{slopes_ff_from_files}/slopesff_bpmask_module_{qm}.h5"
-                                cal_db_interface, creation_time, meta_only=True)
+                mdata_dict[cname]["creation-time"] = "00:00:00"
-                mdata_const = mdata.calibration_constant_version
+            else:
-                # saving metadata in a dict
+                try:
-                mdata_dict[cname] = dict()
+                    condition = getattr(Conditions, cval[2][0]).AGIPD(**cval[2][1])
-                # check if constant was sucessfully retrieved.
+                    co, mdata = \
-                if mdata.comm_db_success:
+                        get_from_db(dev, getattr(Constants.AGIPD, cname)(),
-                    mdata_dict[cname]["file-path"] = f"{mdata_const.hdf5path}" \
+                                    condition, getattr(np, cval[0])(cval[1]),
-                                                     f"{mdata_const.filename}"
+                                    cal_db_interface, creation_time, meta_only=True, verbosity=0)
-                    mdata_dict[cname]["creation-time"] = f"{mdata_const.begin_at}"
+                    mdata_const = mdata.calibration_constant_version
-                else:
-                    mdata_dict[cname]["file-path"] = const_dict[cname][:2]
+                    # check if constant was sucessfully retrieved.
-                    mdata_dict[cname]["creation-time"] = None
+                    if mdata.comm_db_success:
-            except Exception as e:
+                        mdata_dict[cname]["file-path"] = f"{mdata_const.hdf5path}" \
-                err = f"Error: {e}, Traceback: {traceback.format_exc()}"
+                                                         f"{mdata_const.filename}"
-                print(err)
+                        mdata_dict[cname]["creation-time"] = f"{mdata_const.begin_at}"
+                    else:
+                        mdata_dict[cname]["file-path"] = const_dict[cname][:2]
+                        mdata_dict[cname]["creation-time"] = None
+                except Exception as e:
+                    err = f"Error: {e}, Traceback: {traceback.format_exc()}"
+                    print(err)
    return qm, mdata_dict, dev.device_name, acq_rate, max_cells, err
 pc_bools = [corr_bools.get("rel_gain"),
            corr_bools.get("adjust_mg_baseline"),
            corr_bools.get('blc_noise'),
            corr_bools.get('blc_hmatch'),
            corr_bools.get('blc_stripes'),
            melt_snow]
 inp = []
 only_dark = False
 nodb_with_dark = False
 if not nodb:
    only_dark=(calfile != "")
 if calfile != "" and not corr_bools["only_offset"]:
    nodb_with_dark = nodb
 # A dict to connect virtual device
 # to actual device name.
 for i in modules:
    qm = f"Q{i//4+1}M{i%4+1}"
    if qm in mapped_files and not mapped_files[qm].empty():
        device = getattr(getattr(Detectors, dinstance), qm)
        qm_files = [str(mapped_files[qm].get()) for _ in range(mapped_files[qm].qsize())]
    else:
        print(f"Skipping {qm}")
        continue
    inp.append((qm_files, qm, device, i))
 p = partial(retrieve_constants, karabo_id, bias_voltage, max_cells,
            acq_rate, gain_setting, photon_energy, only_dark, nodb_with_dark,
            cal_db_interface, creation_time,
            corr_bools, pc_bools)
-results = view.map_sync(p, inp)
+with mp.Pool(processes=16) as pool:
-#results = list(map(p, inp))
+    results = pool.map(p, inp)
 mod_dev = dict()
 mdata_dict = dict()
 for r in results:
    if r:
        qm, md_dict, dname, acq_rate, max_cells, err = r
        mod_dev[dname] = {"mod": qm, "err": err}
        if err:
            print(f"Error for module {qm}: {err}")
        mdata_dict[dname] = md_dict
 # check if it is requested not to retrieve any constants from the database
 if not nodb_with_dark:
    with open(f"{out_folder}/retrieved_constants.yml", "w") as outfile:
        yaml.safe_dump(mdata_dict, outfile)
    print("\nRetrieved constants for modules: ",
          f"{[', '.join([f'Q{x//4+1}M{x%4+1}' for x in modules])]}")
    print(f"Operating conditions are:\n• Bias voltage: {bias_voltage}\n• Memory cells: {max_cells}\n"
          f"• Acquisition rate: {acq_rate}\n• Gain setting: {gain_setting}\n• Photon Energy: {photon_energy}\n")
    print(f"Constant metadata is saved in retrieved_constants.yml\n")
 else:
    print("No constants were retrieved as calibrated files will be used.")
 ```
 %% Cell type:code id: tags:
 ``` python
 print("Constants are retrieved with creation time: ")
 i = 0
 when = dict()
 to_store = []
 for dname, dinfo in mod_dev.items():
    print(dinfo["mod"], ":")
    line = [dinfo["mod"]]
    if dname in mdata_dict:
        for cname, mdata in mdata_dict[dname].items():
            if hasattr(mdata["creation-time"], 'strftime'):
                mdata["creation-time"] = mdata["creation-time"].strftime('%y-%m-%d %H:%M')
            print(f'{cname:.<12s}', mdata["creation-time"])
        # Store few time stamps if exists
        # Add NA to keep array structure
    for cname in ['Offset', 'SlopesPC', 'SlopesFF']:
        if not dname in mdata_dict or dinfo["err"]:
            line.append('Err')
        else:
            if cname in mdata_dict[dname]:
                if mdata_dict[dname][cname]["creation-time"]:
                    line.append(mdata_dict[dname][cname]["creation-time"])
                else:
                    line.append('NA')
            else:
                line.append('NA')
    to_store.append(line)
    i += 1
    if sequences:
        seq_num = sequences[0]
    else:
        # if sequences[0] changed to None as it was -1
        seq_num = 0
 with open(f"{out_folder}/retrieved_constants.yml","r") as fyml:
    time_summary = yaml.safe_load(fyml)
    time_summary.update({"time-summary": {
                                          "SAll":to_store
                                        }})
 with open(f"{out_folder}/retrieved_constants.yml","w") as fyml:
        yaml.safe_dump(time_summary, fyml)
 ```
-%% Cell type:code id: tags:
-``` python
-```

 %% Cell type:markdown id: tags:
 # AGIPD Retrieving Constants Pre-correction #
 Author: European XFEL Detector Group, Version: 1.0
 Retrieving Required Constants for Offline Calibration of the AGIPD Detector
 %% Cell type:code id: tags:
 ``` python
 cluster_profile = "noDB"
 in_folder = "/gpfs/exfel/exp/SPB/202030/p900119/raw" # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/AGIPD_"  # the folder to output to, required
 sequences =  [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 run = 80 # runs to process, required
 karabo_id = "SPB_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information
 karabo_id_control = "SPB_IRU_AGIPD1M1" # karabo-id for control device
 karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation
 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
+slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants
 calfile =  "" # path to calibration file. Leave empty if all data should come from DB
 nodb = False # if set only file-based constants will be used
 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
 photon_energy = 9.2 # photon energy in keV
 max_cells_db_dark = 0  # set to a value different than 0 to use this value for dark data DB queries
 max_cells_db = 0 # set to a value different than 0 to use this value for DB queries
 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = True # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 ```
 %% Cell type:code id: tags:
 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis
 # Here the herarichy and dependability for correction booleans are defined
 corr_bools = {}
 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset
 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_hmatch"] = blc_hmatch
 ```
 %% Cell type:code id: tags:
 ``` python
 import sys
 from collections import OrderedDict
-# make sure a cluster is running with ipcluster start --n=32, give it a while to start
 import os
 import h5py
 import numpy as np
 import matplotlib
 matplotlib.use("agg")
 import matplotlib.pyplot as plt
-from ipyparallel import Client
+import multiprocessing as mp
-print(f"Connecting to profile {cluster_profile}")
-view = Client(profile=cluster_profile)[:]
-view.use_dill()
 from iCalibrationDB import Constants, Conditions, Detectors
 from cal_tools.tools import (map_modules_from_folder, get_dir_creation_date)
 from cal_tools.agipdlib import get_gain_setting
 from dateutil import parser
 from datetime import timedelta
 ```
 %% Cell type:code id: tags:
 ``` python
 max_cells = mem_cells
 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    offset = parser.parse(creation_date_offset)
    delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
    creation_time += delta
    print(f"Using {creation_time} as creation time")
 if sequences[0] == -1:
    sequences = None
 if in_folder[-1] == "/":
    in_folder = in_folder[:-1]
 print(f"Outputting to {out_folder}")
 os.makedirs(out_folder, exist_ok=True)
 import warnings
 warnings.filterwarnings('ignore')
 from cal_tools.agipdlib import SnowResolution
 melt_snow = False if corr_bools["only_offset"] else SnowResolution.NONE
 ```
 %% Cell type:code id: tags:
 ``` python
 control_fname = f'{in_folder}/r{run:04d}/RAW-R{run:04d}-{karabo_da_control}-S00000.h5'
 h5path_ctrl = h5path_ctrl.format(karabo_id_control)
 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
        except Exception as e:
            print(f'ERROR: while reading gain setting from: \n{control_fname}')
            print(e)
            print("Set gain setting to 0")
            gain_setting = 0
 print(f"Gain setting: {gain_setting}")
 print(f"Detector in use is {karabo_id}")
 # Extracting Instrument string
 instrument = karabo_id.split("_")[0]
 # Evaluate detector instance for mapping
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 # TODO: Remove DETLAB
 elif instrument == "HED" or instrument == "DETLAB":
    dinstance = "AGIPD500K"
    nmods = 8
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]
 ```
 %% Cell type:code id: tags:
 ``` python
 # set everything up filewise
 print(f"Checking the files before retrieving constants")
 mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)
 mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf
 ```
 %% Cell type:markdown id: tags:
 ## Retrieve Constants ##
 %% Cell type:code id: tags:
 ``` python
 from functools import partial
 import yaml
 def retrieve_constants(karabo_id, bias_voltage, max_cells, acq_rate,
                       gain_setting, photon_energy, only_dark, nodb_with_dark,
                       cal_db_interface, creation_time,
                       corr_bools, pc_bools, inp):
    """
    Retreive constant for each module in parallel and produce a dictionary
    with the creation-time and constant file path.
    :param karabo_id: (STR) Karabo ID
    :param bias_voltage: (FLOAT) Bias Voltage
    :param max_cells: (INT) Memory cells
    :param acq_rate: (FLOAT) Acquisition Rate
    :param gain_setting: (FLOAT) Gain setting
    :param photon_energy: (FLOAT) Photon Energy
    :param only_dark: (BOOL) only retrieve dark constants
    :param nodb_with_dark: (BOOL) no constant retrieval even for dark
    :param cal_db_interface: (STR) the database interface port
    :param creation_time: (STR) raw data creation time
    :param corr_bools: (DICT) A dictionary with bools for applying requested corrections
    :param pc_bools: (LIST) list of bools to retrieve pulse capacitor constants
    :param inp: (LIST) input for the parallel cluster of the partial function
    :return:
            mdata_dict: (DICT) dictionary with the metadata for the retrieved constants
            dev.device_name: (STR) device name
    """
    import numpy as np
    import sys
    import traceback
    from cal_tools.agipdlib import get_num_cells, get_acq_rate
    from cal_tools.agipdutils import assemble_constant_dict
    from cal_tools.tools import get_from_db
    from iCalibrationDB import Constants, Conditions, Detectors
    err = None
    qm_files, qm, dev, idx = inp
    # get number of memory cells from a sequence file with image data
    for f in qm_files:
        if not max_cells:
            max_cells = get_num_cells(f, karabo_id, idx)
            if max_cells is None:
                if f != qm_files[-1]:
                    continue
                else:
                    raise ValueError(f"No raw images found for {qm} for all sequences")
            else:
                cells = np.arange(max_cells)
                # get out of the loop,
                # if max_cells is successfully calculated.
                break
    if acq_rate == 0.:
        acq_rate = get_acq_rate((f, karabo_id, idx))
-    print(f"Set memory cells to {max_cells}")
-    print(f"Set acquistion rate cells to {acq_rate} MHz")
    # avoid creating retireving constant, if requested.
    if not nodb_with_dark:
        const_dict = assemble_constant_dict(corr_bools, pc_bools, max_cells, bias_voltage,
                                            gain_setting, acq_rate, photon_energy,
                                            beam_energy=None, only_dark=only_dark)
        # Retrieve multiple constants through an input dictionary
        # to return a dict of useful metadata.
        mdata_dict = dict()
        for cname, cval in const_dict.items():
-            try:
+            # saving metadata in a dict
-                condition = getattr(Conditions, cval[2][0]).AGIPD(**cval[2][1])
+            mdata_dict[cname] = dict()
-                co, mdata = \
-                    get_from_db(dev, getattr(Constants.AGIPD, cname)(),
+            if slopes_ff_from_files and cname in ["SlopesFF", "BadPixelsFF"]:
-                                condition, getattr(np, cval[0])(cval[1]),
+                mdata_dict[cname]["file-path"] = f"{slopes_ff_from_files}/slopesff_bpmask_module_{qm}.h5"
-                                cal_db_interface, creation_time, meta_only=True)
+                mdata_dict[cname]["creation-time"] = "00:00:00"
-                mdata_const = mdata.calibration_constant_version
+            else:
-                # saving metadata in a dict
+                try:
-                mdata_dict[cname] = dict()
+                    condition = getattr(Conditions, cval[2][0]).AGIPD(**cval[2][1])
-                # check if constant was sucessfully retrieved.
+                    co, mdata = \
-                if mdata.comm_db_success:
+                        get_from_db(dev, getattr(Constants.AGIPD, cname)(),
-                    mdata_dict[cname]["file-path"] = f"{mdata_const.hdf5path}" \
+                                    condition, getattr(np, cval[0])(cval[1]),
-                                                     f"{mdata_const.filename}"
+                                    cal_db_interface, creation_time, meta_only=True, verbosity=0)
-                    mdata_dict[cname]["creation-time"] = f"{mdata_const.begin_at}"
+                    mdata_const = mdata.calibration_constant_version
-                else:
-                    mdata_dict[cname]["file-path"] = const_dict[cname][:2]
+                    # check if constant was sucessfully retrieved.
-                    mdata_dict[cname]["creation-time"] = None
+                    if mdata.comm_db_success:
-            except Exception as e:
+                        mdata_dict[cname]["file-path"] = f"{mdata_const.hdf5path}" \
-                err = f"Error: {e}, Traceback: {traceback.format_exc()}"
+                                                         f"{mdata_const.filename}"
-                print(err)
+                        mdata_dict[cname]["creation-time"] = f"{mdata_const.begin_at}"
+                    else:
+                        mdata_dict[cname]["file-path"] = const_dict[cname][:2]
+                        mdata_dict[cname]["creation-time"] = None
+                except Exception as e:
+                    err = f"Error: {e}, Traceback: {traceback.format_exc()}"
+                    print(err)
    return qm, mdata_dict, dev.device_name, acq_rate, max_cells, err
 pc_bools = [corr_bools.get("rel_gain"),
            corr_bools.get("adjust_mg_baseline"),
            corr_bools.get('blc_noise'),
            corr_bools.get('blc_hmatch'),
            corr_bools.get('blc_stripes'),
            melt_snow]
 inp = []
 only_dark = False
 nodb_with_dark = False
 if not nodb:
    only_dark=(calfile != "")
 if calfile != "" and not corr_bools["only_offset"]:
    nodb_with_dark = nodb
 # A dict to connect virtual device
 # to actual device name.
 for i in modules:
    qm = f"Q{i//4+1}M{i%4+1}"
    if qm in mapped_files and not mapped_files[qm].empty():
        device = getattr(getattr(Detectors, dinstance), qm)
        qm_files = [str(mapped_files[qm].get()) for _ in range(mapped_files[qm].qsize())]
    else:
        print(f"Skipping {qm}")
        continue
    inp.append((qm_files, qm, device, i))
 p = partial(retrieve_constants, karabo_id, bias_voltage, max_cells,
            acq_rate, gain_setting, photon_energy, only_dark, nodb_with_dark,
            cal_db_interface, creation_time,
            corr_bools, pc_bools)
-results = view.map_sync(p, inp)
+with mp.Pool(processes=16) as pool:
-#results = list(map(p, inp))
+    results = pool.map(p, inp)
 mod_dev = dict()
 mdata_dict = dict()
 for r in results:
    if r:
        qm, md_dict, dname, acq_rate, max_cells, err = r
        mod_dev[dname] = {"mod": qm, "err": err}
        if err:
            print(f"Error for module {qm}: {err}")
        mdata_dict[dname] = md_dict
 # check if it is requested not to retrieve any constants from the database
 if not nodb_with_dark:
    with open(f"{out_folder}/retrieved_constants.yml", "w") as outfile:
        yaml.safe_dump(mdata_dict, outfile)
    print("\nRetrieved constants for modules: ",
          f"{[', '.join([f'Q{x//4+1}M{x%4+1}' for x in modules])]}")
    print(f"Operating conditions are:\n• Bias voltage: {bias_voltage}\n• Memory cells: {max_cells}\n"
          f"• Acquisition rate: {acq_rate}\n• Gain setting: {gain_setting}\n• Photon Energy: {photon_energy}\n")
    print(f"Constant metadata is saved in retrieved_constants.yml\n")
 else:
    print("No constants were retrieved as calibrated files will be used.")
 ```
 %% Cell type:code id: tags:
 ``` python
 print("Constants are retrieved with creation time: ")
 i = 0
 when = dict()
 to_store = []
 for dname, dinfo in mod_dev.items():
    print(dinfo["mod"], ":")
    line = [dinfo["mod"]]
    if dname in mdata_dict:
        for cname, mdata in mdata_dict[dname].items():
            if hasattr(mdata["creation-time"], 'strftime'):
                mdata["creation-time"] = mdata["creation-time"].strftime('%y-%m-%d %H:%M')
            print(f'{cname:.<12s}', mdata["creation-time"])
        # Store few time stamps if exists
        # Add NA to keep array structure
    for cname in ['Offset', 'SlopesPC', 'SlopesFF']:
        if not dname in mdata_dict or dinfo["err"]:
            line.append('Err')
        else:
            if cname in mdata_dict[dname]:
                if mdata_dict[dname][cname]["creation-time"]:
                    line.append(mdata_dict[dname][cname]["creation-time"])
                else:
                    line.append('NA')
            else:
                line.append('NA')
    to_store.append(line)
    i += 1
    if sequences:
        seq_num = sequences[0]
    else:
        # if sequences[0] changed to None as it was -1
        seq_num = 0
 with open(f"{out_folder}/retrieved_constants.yml","r") as fyml:
    time_summary = yaml.safe_load(fyml)
    time_summary.update({"time-summary": {
                                          "SAll":to_store
                                        }})
 with open(f"{out_folder}/retrieved_constants.yml","w") as fyml:
        yaml.safe_dump(time_summary, fyml)
 ```
-%% Cell type:code id: tags:
-``` python
-```

--- a/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_NBC.ipynb
+++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_NBC.ipynb
 %% Cell type:markdown id: tags:
-# Gain Characterization (Flat Fields) #
+# Gain Characterization #
-The following code characterizes the gain of the AGIPD detector from flat field data, i.e. data with X-rays of defined intensity. This data should fullfil the following requirements:
-* intensity should be such that single photon peaks are visible
-* data for all modules should be present
-* no shadowing should occur on any of the modules
-* each pixel should have at minimum arround 100 events per photon peak per memory cell
-* if central beam edges are visible, they should not be significantly more intense
-Characterization is done by a weighted average algorithm, which evaluates the peak locations for all pixels
-and memory cells of a given module. These locations are then fitted to a linear function of the average peak
-location in each module, such that it yield a relative gain correction.
 %% Cell type:code id: tags:
 ``` python
-# the following lines should be adjusted depending on data
+in_folder = "/gpfs/exfel/exp/SPB/202030/p900138/scratch/karnem/r0203_r0204_v01/" # the folder to read histograms from, required
-in_folder = '/gpfs/exfel/exp/MID/201931/p900091/raw/' # path to input data, required
+out_folder = "/gpfs/exfel/exp/SPB/202030/p900138/scratch/karnem/r0203_r0204_v01/"  # the folder to output to, required
-modules = [3] # modules to work on, required, range allowed
+hist_file_template = "hists_m{:02d}_sum.h5" # the template to use to access histograms
-out_folder = "/gpfs/exfel/exp/MID/201931/p900091/usr/FF/2.2" # path to output to, required
+modules = [10] # modules to correct, set to -1 for all, range allowed
-runs = [484, 485,] # runs to use, required, range allowed
-sequences = [0,1,2,3]#,4,5,6,7,8] #,5,6,7,8,9,10] # sequences files to use, range allowed
+image_data_path = "/gpfs/exfel/exp/MID/202030/p900137/raw" # Path to image data used to create histograms
-cluster_profile = "noDB" # The ipcluster profile to use
+run = 449 # of the run of image data used to create histograms
+karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
+karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
+receiver_id = "{}CH0" # inset for receiver devices
+path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
+h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
+h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images
+h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information
+karabo_id_control = "MID_IRU_AGIPD1M1" # karabo-id for control device
+karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation
+use_dir_creation_date = True # use the creation data of the input dir for database queries
+cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
+cal_db_timeout = 30000 # in milli seconds
 local_output = True # output constants locally
 db_output = False # output constants to database
-bias_voltage = 300 # detector bias voltage
-cal_db_interface = "tcp://max-exfl016:8026#8035"  # the database interface to use
+# Fit parameters
-mem_cells = 0  # number of memory cells used
+peak_range = [-30, 30, 35, 70, 95, 135, 145, 220] # where to look for the peaks, [a0, b0, a1, b1, ...] exactly 8 elements
-interlaced = False # assume interlaced data format, for data prior to Dec. 2017
+peak_width_range = [0, 30, 0, 35, 0, 40, 0, 45] # fit limits on the peak widths, [a0, b0, a1, b1, ...] exactly 8 elements
-fit_hook = True # fit a hook function to medium gain slope
+peak_norm_range = [0.0, -1, 0, -1, 0, -1, 0, -1] #
-rawversion = 2 # RAW file format version
-instrument = "MID"
+# Bad-pixel thresholds (gain evaluation error). Contribute to BadPixel bit "Gain_Evaluation_Error"
-photon_energy = 9.2 # the photon energy in keV
+peak_lim = [-30, 30] # Limit of position of noise peak
-offset_store = "" # for file-baed access
+d0_lim = [10, 80] # hard limits for distance between noise and first peak
-high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h
+peak_width_lim = [0.9, 1.55, 0.95, 1.65] # hard limits on the peak widths for first and second peak, in units of the noise peak. 4 parameters.
-db_input = True # retreive data from calibration database, setting offset-store will overwrite this
+chi2_lim = [0, 3.0] # Hard limit on chi2/nDOF value
-deviation_threshold = 0.75 # peaks with an absolute location deviation larger than the medium are are considere bad
-acqrate = 0. # acquisition rate
+intensity_lim = 15 # Threshold on standard deviation of a histogram in ADU. Contribute to BadPixel bit "No_Entry"
-use_dir_creation_date = True
+gain_lim = [0.85, 1.15] # Threshold on gain in relative number. Contribute to BadPixel bit "Gain_deviation"
-creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC.ms e.g. 2019-07-04 11:02:41.00
-gain_setting = 0.1 # gain setting can have value 0 or 1, Default=0.1 for no (None) gain-setting
+cell_range = [1, 3] # range of cell to be considered, [0,0] for all
-karabo_da_control = "AGIPD1MCTRL00" # karabo DA for control infromation
+pixel_range = [0, 0, 32, 32] # range of pixels x1,y1,x2,y2 to consider [0,0,512,128] for all
-h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information
+max_bins = 0 # Maximum number of bins to consider, 0 for all bins
+batch_size = [1, 8, 8] # batch size: [cell,x,y]
+fit_range = [0, 0] # range of a histogram considered for fitting in ADU. Dynamically evaluated in case [0,0]
+n_peaks_fit = 4 # Number of gaussian peaks to fit including noise peak
+fix_peaks = False # Fix distance between photon peaks
+do_minos = False # This is additional feature of minuit to evaluate errors.
+# Detector conditions
+max_cells = 0 # number of memory cells used, set to 0 to automatically infer
+bias_voltage = 0  # Bias voltage
+acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
+gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
+photon_energy = 8.05 # photon energy in keV
 ```
 %% Cell type:code id: tags:
 ``` python
-# std library imports
+import glob
-from datetime import datetime
+from multiprocessing import Pool
-import dateutil
-from functools import partial
-import warnings
-warnings.filterwarnings('ignore')
 import os
+import traceback
+import warnings
 import h5py
-# numpy and matplot lib specific
+from iminuit import Minuit
-import numpy as np
-import matplotlib
-matplotlib.use("Agg")
 import matplotlib.pyplot as plt
-%matplotlib inline
+import numpy as np
+import sharedmem
+from XFELDetAna.plotting.heatmap import heatmapPlot
+from XFELDetAna.plotting.simpleplot import simplePlot
+import XFELDetAna.xfelpyanatools as xana
-# parallel processing via ipcluster
+from cal_tools.ana_tools import save_dict_to_hdf5, get_range
-# make sure a cluster is running with ipcluster start --n=32, give it a while to start
+from cal_tools.agipdlib import get_bias_voltage
-from ipyparallel import Client
+from cal_tools.agipdutils_ff import (
+    any_in, gaussian, gaussian_sum, get_mask,
-client = Client(profile=cluster_profile)
+    get_starting_parameters, set_par_limits, fit_n_peaks
-view = client[:]
+)
-view.use_dill()
+from cal_tools.enums import BadPixelsFF
-# pyDetLib imports
+# %load_ext autotime
-import XFELDetAna.xfelpycaltools as xcal
+%matplotlib inline
-import XFELDetAna.xfelpyanatools as xana
+warnings.filterwarnings('ignore')
-from XFELDetAna.util import env
-env.iprofile = cluster_profile
-profile = cluster_profile
-from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions
-from cal_tools.agipdlib import get_num_cells, get_acq_rate, get_gain_setting
-from cal_tools.enums import BadPixels
-from cal_tools.influx import InfluxLogger
-from cal_tools.plotting import show_overview, plot_badpix_3d
-from cal_tools.tools import gain_map_files, parse_runs, run_prop_seq_from_path, get_notebook_name, get_dir_creation_date, get_random_db_interface
 ```
 %% Cell type:code id: tags:
 ``` python
-# usually no need to change these lines
+peak_range = np.reshape(peak_range,(4,2))
-sensor_size = [128, 512]
+peak_width_range = np.reshape(peak_width_range,(4,2))
-block_size = [128, 512]
+peak_width_lim = np.reshape(peak_width_lim,(2,2))
-QUADRANTS = 4
+peak_norm_range = [None if x == -1 else x for x in peak_norm_range]
-MODULES_PER_QUAD = 4
+peak_norm_range = np.reshape(peak_norm_range,(4,2))
-DET_FILE_INSET = "AGIPD"
+module = modules[0]
-# Define constant creation time.
-if creation_time:
-    try:
-        creation_time = datetime.strptime(creation_time, '%Y-%m-%d %H:%M:%S.%f')
-    except Exception as e:
-        print(f"creation_time value error: {e}."
-               "Use same format as YYYY-MM-DD HR:MN:SC.ms e.g. 2019-07-04 11:02:41.00/n")
-        creation_time = None
-        print("Given creation time wont be used.")
-else:
-    creation_time = None
-if not creation_time and use_dir_creation_date:
-    ntries = 100
-    while ntries > 0:
-        try:
-            creation_time = get_dir_creation_date(in_folder, runs[0])
-            break
-        except OSError:
-            pass
-        ntries -= 1
-print("Using {} as creation time".format(creation_time))
-runs = parse_runs(runs)
-if offset_store != "":
-    db_input = False
-else:
-    db_input = True
-limit_trains = 20
-limit_trains_eval = None
-print("Parameters are:")
-print("Runs: {}".format(runs))
-print("Modules: {}".format(modules))
-print("Sequences: {}".format(sequences))
-print("Using DB: {}".format(db_output))
-if instrument == "SPB":
-    loc = "SPB_DET_AGIPD1M-1"
-    dinstance = "AGIPD1M1"
-    karabo_id_control = "SPB_IRU_AGIPD1M1"
-else:
-    loc = "MID_DET_AGIPD1M-1"
-    dinstance = "AGIPD1M2"
-    karabo_id_control = "MID_EXP_AGIPD1M1"
-cal_db_interface = get_random_db_interface(cal_db_interface)
-# these lines can usually stay as is
-fbase = "{}/{{}}/RAW-{{}}-AGIPD{{:02d}}-S{{:05d}}.h5".format(in_folder)
-gains = np.arange(3)
-run, prop, seq = run_prop_seq_from_path(in_folder)
-logger = InfluxLogger(detector="AGIPD", instrument=instrument, mem_cells=mem_cells,
-                      notebook=get_notebook_name(), proposal=prop)
-# extract the memory cells and acquisition rate from
-# the file of the first module, first sequence and first run
-channel = 0
-fname = fbase.format(runs[0], runs[0].upper(), channel, sequences[0])
-if acqrate == 0.:
-    acqrate = get_acq_rate((fname, loc, channel))
-if mem_cells == 0:
-    cells = get_num_cells(fname, loc, channel)
-    mem_cells = cells  # avoid setting twice
-IL_MODE = interlaced
-max_cells = mem_cells if not interlaced else mem_cells*2
-memory_cells = mem_cells
-print("Interlaced mode: {}".format(IL_MODE))
-cells = np.arange(max_cells)
-print(f"Acquisition rate and memory cells set from file {fname} are "
-      f"{acqrate} MHz and {max_cells}, respectively")
 ```
 %% Cell type:code id: tags:
 ``` python
-control_fname = f'{in_folder}/{runs[0]}/RAW-{runs[0].upper()}-{karabo_da_control}-S00000.h5'
+if bias_voltage == 0:
+    # Read the bias voltage from files, if recorded.
-if "{" in h5path_ctrl:
+    # If not available, make use of the historical voltage the detector is running at
-    h5path_ctrl = h5path_ctrl.format(karabo_id_control)
+    control_filename = f'{image_data_path}/r{run:04d}/RAW-R{run:04d}-{karabo_da_control}-S00000.h5'
+    bias_voltage = get_bias_voltage(control_filename, karabo_id_control)
-if gain_setting == 0.1:
+    bias_voltage = bias_voltage if bias_voltage is not None else 300
-    if creation_time.replace(tzinfo=None) < dateutil.parser.parse('2020-01-31'):
+print(f"Bias voltage: {bias_voltage}V")
-        print("Set gain-setting to None for runs taken before 2020-01-31")
-        gain_setting = None
-    else:
-        try:
-            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
-        except Exception as e:
-            print(f'Error while reading gain setting from: \n{control_fname}')
-            print(e)
-            print("Gain setting is not found in the control information")
-            print("Data will not be processed")
-            sequences = []
-print(f"Gain setting: {gain_setting}")
 ```
-%% Cell type:markdown id: tags:
-For the characterization offset maps for each module are needed. The are read in either locally or through the XFEL calibration database.
 %% Cell type:code id: tags:
 ``` python
-from dateutil import parser
+def idx_gen(batch_start, batch_size):
-offset_g = {}
+    """
-noise_g = {}
+    This generator iterate across pixels and memory cells starting
-thresholds_g = {}
+    from batch_start until batch_start+batch_size
-pc_g = {}
+    """
-if not db_input:
+    for c_idx in range(batch_start[0], batch_start[0]+batch_size[0]):
-    print("Offset, noise and thresholds have been read in from: {}".format(offset_store))
+        for x_idx in range(batch_start[1], batch_start[1]+batch_size[1]):
-    store_file = h5py.File(offset_store, "r")
+            for y_idx in range(batch_start[2], batch_start[2]+batch_size[2]):
-    for i in modules:
+                yield(c_idx, x_idx, y_idx)
-        qm = "Q{}M{}".format(i//4+1, i%4+1)
-        offset_g[qm] = np.array(store_file["{}/Offset/0/data".format(qm)])
-        noise_g[qm] = np.array(store_file["{}/Noise/0/data".format(qm)])
-        thresholds_g[qm] = np.array(store_file["{}/Threshold/0/data".format(qm)])
-    store_file.close()
-else:
-    print("Offset, noise and thresholds have been read in from calibration database:")
-    first_ex = True
-    for i in modules:
-        qm = "Q{}M{}".format(i//4+1, i%4+1)
-        metadata = ConstantMetaData()
-        offset = Constants.AGIPD.Offset()
-        metadata.calibration_constant = offset
-        det = getattr(Detectors, dinstance)
-        # set the operating condition
-        condition = Conditions.Dark.AGIPD(memory_cells=max_cells, bias_voltage=bias_voltage,
-                                          acquisition_rate=acqrate, gain_setting=gain_setting)
-        metadata.detector_condition = condition
-        # specify the a version for this constant
-        if creation_time is None:
-            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
-        else:
-            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
-        metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=300000)
-        offset_g[qm] = offset.data
-        if first_ex:
-            print("Offset map was injected on: {}".format(metadata.calibration_constant_version.begin_at))
-        metadata = ConstantMetaData()
-        noise = Constants.AGIPD.Noise()
-        metadata.calibration_constant = noise
-        # set the operating condition
-        condition = Conditions.Dark.AGIPD(memory_cells=max_cells, bias_voltage=bias_voltage,
-                                          acquisition_rate=acqrate, gain_setting=gain_setting)
-        metadata.detector_condition = condition
-        # specify the a version for this constant
-        if creation_time is None:
-            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
-        else:
-            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
-        metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=300000)
-        noise_g[qm] = noise.data
-        if first_ex:
-            print("Noise map was injected on: {}".format(metadata.calibration_constant_version.begin_at))
-        metadata = ConstantMetaData()
-        thresholds = Constants.AGIPD.ThresholdsDark()
-        metadata.calibration_constant = thresholds
-        # set the operating condition
-        condition = Conditions.Dark.AGIPD(memory_cells=max_cells, bias_voltage=bias_voltage,
-                                          acquisition_rate=acqrate, gain_setting=gain_setting)
-        metadata.detector_condition = condition
-        # specify the a version for this constant
-        if creation_time is None:
-            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
-        else:
-            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
-        metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=300000)
-        thresholds_g[qm] = thresholds.data
-        if first_ex:
-            print("Threshold map was injected on: {}".format(metadata.calibration_constant_version.begin_at))
-        metadata = ConstantMetaData()
-        pc = Constants.AGIPD.SlopesPC()
-        metadata.calibration_constant = pc
-        # set the operating condition
-        condition = Conditions.Dark.AGIPD(memory_cells=max_cells, bias_voltage=bias_voltage,
-                                          acquisition_rate=acqrate, gain_setting=gain_setting)
-        metadata.detector_condition = condition
-        # specify the a version for this constant
-        if creation_time is None:
-            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
-        else:
-            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
-        metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=300000)
-        pc_g[qm] = np.nanmedian(pc.data[0,...])/pc.data
-        if first_ex:
-            print("PC map was injected on: {}".format(metadata.calibration_constant_version.begin_at))
-            first_ex = False
 ```
-%% Cell type:markdown id: tags:
-## Initial peak estimates ##
-First initial peak estimates need to be defined. This is done by inspecting histograms created from (a subset of) the offset-corrected data for peak locations and peak heights.
 %% Cell type:code id: tags:
 ``` python
-def hist_single_module(fbase, runs, sequences, sensor_size, memory_cells, block_size,
+n_pixels_x = pixel_range[2]-pixel_range[0]
-                       il_mode, limit_trains, profile, rawversion, instrument, inp):
+n_pixels_y = pixel_range[3]-pixel_range[1]
-    """ This function calculates a per-pixel histogram for a single module
-    Runs and sequences give the data to calculate histogram from
+hist_data = {}
-    """
+with h5py.File(f"{in_folder}/{hist_file_template.format(module)}", 'r') as hf:
-    channel, offset, thresholds, pc, noise = inp
+    hist_data['cellId'] = np.array(hf['cellId'][()])
+    hist_data['hRange'] = np.array(hf['hRange'][()])
+    hist_data['nBins'] = np.array(hf['nBins'][()])
-    import XFELDetAna.xfelpycaltools as xcal
+    if cell_range == [0,0]:
-    import numpy as np
+        cell_range[1] = hist_data['cellId'].shape[0]
-    import h5py
-    from XFELDetAna.util import env
-    env.iprofile = profile
-    def baseline_correct_via_noise(d, noise, g, baseline_corr_noise_threshold=1000):
-        """ Correct baseline shifts by evaluating position of the noise peak
-        :param: d: the data to correct, should be a single image
-        :param noise: the mean noise for the cell id of `d`
-        :param g: gain array matching `d` array
-        Correction is done by identifying the left-most (significant) peak
-        in the histogram of `d` and shifting it to be centered on 0.
-        This is done via a continous wavelet transform (CWT), using a Ricker
-        wavelet.
-        Only high gain data is evaulated, and data larger than 50 ADU,
-        equivalent of roughly a 9 keV photon is ignored.
-        Two passes are executed: the first shift is accurate to 10 ADU and
-        will catch baseline shifts smaller then -2000 ADU. CWT is evaluated
-        for peaks of widths one, three and five time the noise.
-        The baseline is then shifted by the position of the summed CWT maximum.
-        In a second pass histogram is evaluated within a range of
-        +- 5*noise of the initial shift value, for peak of width noise.
-        Baseline shifts larger than the maximum threshold or positive shifts
-        larger 10 are discarded, and the original data is returned, otherwise
-        the shift corrected data is returned.
-        """
-        import copy
-        from scipy.signal import cwt, ricker
-        # we work on a copy to be able to filter values by setting them to
-        # np.nan
-        dd = copy.copy(d)
-        dd[g != 0] = np.nan  # only high gain data
-        dd[dd > 50] = np.nan  # only noise data
-        h, e = np.histogram(dd, bins=210, range=(-2000, 100))
-        c = (e[1:] + e[:-1]) / 2
-        try:
+    if max_bins == 0:
-            cwtmatr = cwt(h, ricker, [noise, 3. * noise, 5. * noise])
+        max_bins = hist_data['nBins']
-        except:
-            return d
-        cwtall = np.sum(cwtmatr, axis=0)
-        marg = np.argmax(cwtall)
-        pc = c[marg]
-        high_res_range = (dd > pc - 5 * noise) & (dd < pc + 5 * noise)
-        dd[~high_res_range] = np.nan
-        h, e = np.histogram(dd, bins=200,
-                            range=(pc - 5 * noise, pc + 5 * noise))
-        c = (e[1:] + e[:-1]) / 2
-        try:
-            cwtmatr = cwt(h, ricker, [noise, ])
-        except:
-            return d
-        marg = np.argmax(cwtmatr)
-        pc = c[marg]
-        # too large shift to be easily decernable via noise
-        if pc > 10 or pc < -baseline_corr_noise_threshold:
-            return d
-        return d - pc
-    # function needs to be inline for parallell processing
-    def read_fun(filename, channel):
-        """ A reader function used by pyDetLib
-        """
-        infile = h5py.File(filename, "r", driver="core")
-        if rawversion == 2:
-            count = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)])
-            first = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first".format(instrument, channel)])
-            last_index = int(first[count != 0][-1]+count[count != 0][-1])
-            first_index = int(first[count != 0][0])
-        else:
-            status = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)])
-            if np.count_nonzero(status != 0) == 0:
-                return
-            last = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last".format(instrument, channel)])
-            last_index = int(last[status != 0][-1])
-            first_index = int(last[status != 0][0])
-        if limit_trains is not None:
-            last_index = min(limit_trains*memory_cells+first_index, last_index)
-        im = np.array(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(instrument, channel)][first_index:last_index,...])
-        carr = infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId".format(instrument, channel)][first_index:last_index]
-        cells = np.squeeze(np.array(carr))
-        infile.close()
-        if il_mode:
-            ga = im[1::2, 0, ...]
-            im = im[0::2, 0, ...].astype(np.float32)
-        else:
-            ga = im[:, 1, ...]
-            im = im[:, 0, ...].astype(np.float32)
-        im = np.rollaxis(im, 2)
+    hist_data['cellId'] = hist_data['cellId'][cell_range[0]:cell_range[1]]
-        im = np.rollaxis(im, 2, 1)
+    hist_data['hist'] = np.array(hf['hist'][cell_range[0]:cell_range[1], :max_bins, :])
-        ga = np.rollaxis(ga, 2)
+n_cells = cell_range[1]-cell_range[0]
-        ga = np.rollaxis(ga, 2, 1)
+hist_data['hist'] = hist_data['hist'].reshape(n_cells, max_bins, 512, 128)
-        return im, ga, cells
+hist_data['hist'] = hist_data['hist'][:,:,pixel_range[0]:pixel_range[2],pixel_range[1]:pixel_range[3]]
-    offset_cor = xcal.OffsetCorrection(sensor_size,
-                                       offset,
-                                       nCells=memory_cells,
-                                       blockSize=block_size,
-                                       gains=[0,1,2])
-    offset_cor.mapper = offset_cor._view.map_sync
-    offset_cor.debug() # force non-parallel processing since outer function will run concurrently
-    hist_calc = xcal.HistogramCalculator(sensor_size,
-                                         bins=4000,
-                                         range=(-4000, 8000),
-                                         blockSize=block_size)
-    hist_calc.mapper = hist_calc._view.map_sync
-    hist_calc.debug() # force non-parallel processing since outer function will run concurrently
-    for run in runs:
-        for seq in sequences:
-            fname = fbase.format(run, run.upper(), channel, seq)
-            d, ga, c = read_fun(fname, channel)
-            vidx = (c < offset.shape[2]) & (c < thresholds.shape[2])
-            c = c[vidx]
-            d = d[:,:,vidx]
-            ga = ga[:,:,vidx]
-            # we need to do proper gain thresholding
-            g = np.zeros(ga.shape, np.uint8)
-            g[...] = 2
-            g[ga < thresholds[...,c,1]] = 1
-            g[ga < thresholds[...,c,0]] = 0
-            d = offset_cor.correct(d, cellTable=c, gainMap=g)
-            for i in range(d.shape[2]):
-                mn_noise = np.nanmean(noise[...,c[i]])
-                d[...,i] = baseline_correct_via_noise(d[...,i],
-                                                      mn_noise,
-                                                      g[..., i])
-            d *= np.moveaxis(pc[c,...], 0, 2)
-            hist_calc.fill(d)
-    h, e, c, _ = hist_calc.get()
-    return h, e, c
-inp = []
-for i in modules:
-    qm = "Q{}M{}".format(i//4+1, i%4+1)
-    inp.append((i, offset_g[qm], thresholds_g[qm], pc_g[qm][0,...], noise_g[qm][...,0]))
-p = partial(hist_single_module, fbase, runs, sequences,
-            sensor_size, memory_cells, block_size, IL_MODE, limit_trains, profile, rawversion, instrument)
-res_uncorr = list(map(p, inp))
-```
-%% Cell type:code id: tags:
+print(f'Data shape {hist_data["hist"].shape}')
-``` python
+bin_edges = np.linspace(hist_data['hRange'][0], hist_data['hRange'][1], int(hist_data['nBins']+1))
-d = []
+x = (bin_edges[1:] + bin_edges[:-1])[:max_bins] * 0.5
-qms = []
-for i, r in enumerate(res_uncorr):
-    ii = list(modules)[i]
+batches = []
-    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
+for c_idx in range(0, n_cells, batch_size[0]):
-    qms.append(qm)
+    for x_idx in range(0, n_pixels_x, batch_size[1]):
-    h, e, c = r
+        for y_idx in range(0, n_pixels_y, batch_size[2]):
-    d.append({
+            batches.append([c_idx,x_idx,y_idx])
-            'x': c,
-            'y': h,
-            'drawstyle': 'steps-mid'
-        })
-fig = xana.simplePlot(d, y_log=False,
-                      figsize="2col",
-                      aspect=2,
-                      x_range=(-50, 500),
-                      x_label="Intensity (ADU)",
-                      y_label="Counts")
-fig.savefig("{}/FF_module_{}_peak_pos.png".format(out_folder, ",".join(qms)))
+print(f'Number of batches {len(batches)}')
 ```
 %% Cell type:code id: tags:
 ``` python
-# these should be quite stable
+def fit_batch(batch_start):
+    current_result = {}
+    prev = None
+    for c_idx, x_idx, y_idx in idx_gen(batch_start, batch_size):
+        try:
+            y = hist_data['hist'][c_idx, :, x_idx, y_idx]
-peak_estimates = [0, 55, 105, 155]
+            if prev is None:
-peak_heights = [5e7, 5e6, 1e6, 5e5]
+                prev, _ = get_starting_parameters(x, y, peak_range, n_peaks=n_peaks_fit)
-peak_sigma = [5., 5.,5., 5.,]
-print("Using the following peak estimates: {}".format(peak_estimates))
+            if fit_range == [0, 0]:
+                frange = (prev[f'g0mean']-2*prev[f'g0sigma'],
+                          prev[f'g{n_peaks_fit-1}mean'] + prev[f'g{n_peaks_fit-1}sigma'])
+            else:
+                frange = fit_range
+            set_par_limits(prev, peak_range, peak_norm_range,
+                           peak_width_range, n_peaks_fit)
+            minuit = fit_n_peaks(x, y, prev, frange,
+                                 do_minos=do_minos, n_peaks=n_peaks_fit,
+                                 fix_d01=fix_peaks)
+            ndof = np.rint(frange[1]-frange[0])-len(minuit.args)
+            current_result['chi2_ndof'] = minuit.fval/ndof
+            current_result.update(minuit.fitarg)
+            current_result.update(minuit.get_fmin())
+            fit_result['chi2_ndof'][c_idx, x_idx, y_idx] = current_result['chi2_ndof']
+            fit_result['g0mean'][c_idx, x_idx, y_idx] = current_result['g0mean']
+            fit_result['g1mean'][c_idx, x_idx, y_idx] = current_result['g1mean']
+            fit_result['g2mean'][c_idx, x_idx, y_idx] = current_result['g2mean']
+            fit_result['g3mean'][c_idx, x_idx, y_idx] = current_result['g3mean']
+            for key in minuit.fitarg.keys():
+                if key in fit_result:
+                    fit_result[key][c_idx, x_idx, y_idx] = minuit.fitarg[key]
+            fit_result['mask'][c_idx, x_idx, y_idx] = get_mask(current_result,
+                                                                    peak_lim,
+                                                                    d0_lim, chi2_lim,
+                                                                    peak_width_lim)
+        except Exception as e:
+            fit_result['mask'][c_idx, x_idx,
+                                    y_idx] = BadPixelsFF.FIT_FAILED.value
+            print(c_idx, x_idx, y_idx, e, traceback.format_exc())
+        if fit_result['mask'][c_idx, x_idx, y_idx] == 0:
+            prev = minuit.fitarg
+        else:
+            prev = None
 ```
 %% Cell type:markdown id: tags:
-## Calculate relative gain per module ##
+# Single fit ##
-Using the so obtained estimates, we calculate the relative gain per module. For this we use the weighted average method implemented in pyDetLib.
+Left plot shows starting parameters for fitting. Right plot shows result of the fit. Errors are evaluated with minos.
 %% Cell type:code id: tags:
 ``` python
-block_size = [64, 64]
+hist = hist_data['hist'][1,:,1, 1]
-def relgain_single_module(fbase, runs, sequences, peak_estimates,
+prev, shapes = get_starting_parameters(x, hist, peak_range, n_peaks=n_peaks_fit)
-                          peak_heights, peak_sigma, memory_cells, sensor_size,
-                          block_size, il_mode, profile, limit_trains_eval, rawversion, instrument, inp):
-    """ A function for calculated the relative gain of a single AGIPD module
-    """
-    # import needed inline for parallel processing
-    import XFELDetAna.xfelpycaltools as xcal
-    import numpy as np
-    import h5py
-    from XFELDetAna.util import env
-    env.iprofile = profile
-    channel, offset, thresholds, noise, pc = inp
-    def baseline_correct_via_noise(d, noise, g, baseline_corr_noise_threshold=1000):
-        """ Correct baseline shifts by evaluating position of the noise peak
-        :param: d: the data to correct, should be a single image
-        :param noise: the mean noise for the cell id of `d`
-        :param g: gain array matching `d` array
-        Correction is done by identifying the left-most (significant) peak
-        in the histogram of `d` and shifting it to be centered on 0.
-        This is done via a continous wavelet transform (CWT), using a Ricker
-        wavelet.
-        Only high gain data is evaulated, and data larger than 50 ADU,
-        equivalent of roughly a 9 keV photon is ignored.
-        Two passes are executed: the first shift is accurate to 10 ADU and
-        will catch baseline shifts smaller then -2000 ADU. CWT is evaluated
-        for peaks of widths one, three and five time the noise.
-        The baseline is then shifted by the position of the summed CWT maximum.
-        In a second pass histogram is evaluated within a range of
-        +- 5*noise of the initial shift value, for peak of width noise.
-        Baseline shifts larger than the maximum threshold or positive shifts
-        larger 10 are discarded, and the original data is returned, otherwise
-        the shift corrected data is returned.
-        """
-        import copy
-        from scipy.signal import cwt, ricker
-        # we work on a copy to be able to filter values by setting them to
-        # np.nan
-        dd = copy.copy(d)
-        dd[g != 0] = np.nan  # only high gain data
-        dd[dd > 50] = np.nan  # only noise data
-        h, e = np.histogram(dd, bins=210, range=(-2000, 100))
-        c = (e[1:] + e[:-1]) / 2
-        try:
+if fit_range == [0, 0]:
-            cwtmatr = cwt(h, ricker, [noise, 3. * noise, 5. * noise])
+    frange = (prev[f'g0mean']-2*prev[f'g0sigma'],
-        except:
+              prev[f'g3mean'] + prev[f'g3sigma'])
-            return d
+else:
-        cwtall = np.sum(cwtmatr, axis=0)
+    frange = fit_range
-        marg = np.argmax(cwtall)
-        pc = c[marg]
-        high_res_range = (dd > pc - 5 * noise) & (dd < pc + 5 * noise)
-        dd[~high_res_range] = np.nan
-        h, e = np.histogram(dd, bins=200,
-                            range=(pc - 5 * noise, pc + 5 * noise))
-        c = (e[1:] + e[:-1]) / 2
-        try:
-            cwtmatr = cwt(h, ricker, [noise, ])
-        except:
-            return d
-        marg = np.argmax(cwtmatr)
-        pc = c[marg]
-        # too large shift to be easily decernable via noise
-        if pc > 10 or pc < -baseline_corr_noise_threshold:
-            return d
-        return d - pc
-    # function needs to be inline for parallell processing
-    def read_fun(filename, channel):
-        """ A reader function used by pyDetLib
-        """
-        infile = h5py.File(filename, "r", driver="core")
-        if rawversion == 2:
-            count = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)])
-            first = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first".format(instrument, channel)])
-            last_index = int(first[count != 0][-1]+count[count != 0][-1])
-            first_index = int(first[count != 0][0])
-        else:
-            status = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)])
-            if np.count_nonzero(status != 0) == 0:
-                return
-            last = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last".format(instrument, channel)])
-            last_index = int(last[status != 0][-1])
-            first_index = int(last[status != 0][0])
-        if limit_trains is not None:
-            last_index = min(limit_trains*memory_cells+first_index, last_index)
-        im = np.array(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(instrument, channel)][first_index:last_index,...])
-        carr = infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId".format(instrument, channel)][first_index:last_index]
-        cells = np.squeeze(np.array(carr))
-        infile.close()
-        if il_mode:
-            ga = im[1::2, 0, ...]
-            im = im[0::2, 0, ...].astype(np.float32)
-        else:
-            ga = im[:, 1, ...]
-            im = im[:, 0, ...].astype(np.float32)
-        im = np.rollaxis(im, 2)
-        im = np.rollaxis(im, 2, 1)
-        ga = np.rollaxis(ga, 2)
+set_par_limits(prev, peak_range, peak_norm_range,
-        ga = np.rollaxis(ga, 2, 1)
+               peak_width_range, n_peaks=n_peaks_fit)
-        return im, ga, cells
+minuit = fit_n_peaks(x, hist, prev, frange,
+                     do_minos=True, n_peaks=n_peaks_fit,
-    offset_cor = xcal.OffsetCorrection(sensor_size, offset, nCells=memory_cells,
+                     fix_d01=fix_peaks)
-                                       blockSize=block_size, gains=[0,1,2])
-    offset_cor.mapper = offset_cor._view.map_sync
+res = minuit.fitarg
-    rel_gain = xcal.GainMapCalculator(sensor_size,
+err = minuit.errors
-                                      peak_estimates,
+p = minuit.args
-                                      peak_sigma,
+ya = np.arange(0,1e4)
-                                      nCells=memory_cells,
+y = gaussian_sum(x,n_peaks_fit, *p)
-                                      peakHeights = peak_heights,
+peak_colors = ['g', 'y', 'b', 'orange']
-                                      noiseMap=noise,
-                                      deviationType="relative")
+d = [{'x': x,
-    rel_gain.mapper = rel_gain._view.map_sync
+      'y': hist.astype(np.float64),
-    for run in runs:
+      'y_err': np.sqrt(hist),
-        for seq in sequences:
+      'drawstyle': 'bars',
-            fname = fbase.format(run, run.upper(), channel, seq)
+      'errorstyle': 'bars',
-            d, ga, c = read_fun(fname, channel)
+      'ecolor': 'navy',
-            # we need to do proper gain thresholding
+      'errorcoarsing': 3,
-            vidx = (c < offset.shape[2]) & (c < thresholds.shape[2])
+      'label': 'X-ray Data'
-            c = c[vidx]
+     },
-            d = d[:,:,vidx]
+     {'x': x,
-            ga = ga[:,:,vidx]
+      'y': y,
-            # we need to do proper gain thresholding
+      'y2': (hist-y)/np.sqrt(hist),
-            g = np.zeros(ga.shape, np.uint8)
+      'color': 'red',
-            g[...] = 2
+      'drawstyle':'line',
+      'drawstyle2': 'steps-mid',
-            g[ga < thresholds[...,c,1]] = 1
+      'label': 'Fit'
-            g[ga < thresholds[...,c,0]] = 0
+     },
-            d = offset_cor.correct(d, cellTable=c, gainMap=g)
+    ]
-            for i in range(d.shape[2]):
+for i in range(n_peaks_fit):
-                mn_noise = np.nanmean(noise[...,c[i]])
+    d.append({'x': x,
-                d[...,i] = baseline_correct_via_noise(d[...,i],
+             'y': gaussian_sum(x, 1, res[f'g{i}n'], res[f'g{i}mean'], res[f'g{i}sigma']),
-                                                      mn_noise,
+             'drawstyle':'line',
-                                                      g[..., i])
+             'color': peak_colors[i],
+             })
-            d *= np.moveaxis(pc[c,...], 0, 2)
+    d.append({'x': np.full_like(ya, res[f'g{i}mean']),
-            rel_gain.fill(d, cellTable=c)
+              'y': ya,
+              'drawstyle': 'line',
-    gain_map = rel_gain.get()
+              'linestyle': 'dashed',
-    gain_map_func = rel_gain.getUsingFunc(inverse=False)
+              'color': peak_colors[i],
+              'label': f'peak {i} = {res[f"g{i}mean"]:0.1f} $ \pm $ {err[f"g{i}mean"]:0.2f} ADU' })
-    pks, stds = rel_gain.getRawPeaks()
-    return gain_map, pks, stds, gain_map_func
+fig = plt.figure(figsize=(16,7))
+ax = fig.add_subplot(121)
-inp = []
+for i, shape in enumerate(shapes):
-for i in modules:
+    idx = shape[3]
-    qm = "Q{}M{}".format(i//4+1, i%4+1)
+    plt.errorbar(x[idx], hist[idx], np.sqrt(hist[idx]),
-    inp.append((i, offset_g[qm], thresholds_g[qm], noise_g[qm][...,0], pc_g[qm][0,...]))
+         marker='+', ls=''
-start = datetime.now()
+         )
-p = partial(relgain_single_module, fbase, runs, sequences,
+    yg = gaussian(x[idx], *shape[:3])
-            peak_estimates, peak_heights, peak_sigma, memory_cells,
+    l = f'Peak {i}: {shape[1]:0.1f} $ \pm $ {shape[2]:0.2f} ADU'
-            sensor_size, block_size, IL_MODE, profile, limit_trains_eval, rawversion, instrument)
+    plt.plot(x[idx], yg, label=l)
-res_gain = list(map(p, inp)) # don't run concurently as inner function are parelllized
+plt.grid(True)
-duration = (datetime.now()-start).total_seconds()
+plt.xlabel("Signal [ADU]")
-logger.runtime_summary_entry(success=True, runtime=duration)
+plt.ylabel("Counts")
-logger.send()
+plt.legend(ncol=2)
+ax2 = fig.add_subplot(122)
+fig2 = xana.simplePlot(d,
+                       use_axis=ax2,
+                       x_label='Signal [ADU]',
+                       y_label='Counts',
+                       secondpanel=True, y_log=False,
+                       x_range=(frange[0], frange[1]),
+                       y_range=(1., np.max(hist)*1.6),
+                       legend='top-left-frame-ncol2')
+print (minuit.get_fmin())
+minuit.print_matrix()
+print(minuit.get_param_states())
 ```
-%% Cell type:markdown id: tags:
-Finally, we inspect the results, by plotting the number of entries, peak locations and resulting gain maps for each peak. In the course of doing so, we identify bad pixels by either having 0 entries for a peak, or having `nan` values for the peak location.
 %% Cell type:code id: tags:
 ``` python
+# Allocate memory for fit results
-gain_m = {}
+fit_result = {}
-flatsff = {}
+keys = list(minuit.fitarg.keys())
-gainoff_g = {}
+keys = [x for x in keys if 'limit_' not in x and 'fix_' not in x]
-entries_g = {}
+keys += ['chi2_ndof', 'mask', 'gain']
-peaks_g = {}
+for key in keys:
-mask_g = {}
+    dtype = 'f4'
-cell_to_preview = 4
+    if key == 'mask':
-masks_eval_peaks = [1, 2]
+        dtype = 'i4'
-global_eval_peaks = [1]
+    fit_result[key] = sharedmem.empty([n_cells, n_pixels_x, n_pixels_y], dtype=dtype)
-global_meds = {}
-for i, r in enumerate(res_gain):
-    ii = list(modules)[i]
-    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
-    gain, pks, std, gfunc = r
-    gains, errors, chisq, valid, max_dev, stats = gfunc
-    _, entries, stds, sow = gain
-    gain_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells))
-    gain_mdb = np.zeros((gains.shape[0], gains.shape[1], memory_cells))
-    entries_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells, 5))
-    gainoff_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells))
-    mask_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells), np.uint32)
-    gainoff_g[qm] = gainoff_db
-    gain_m[qm] = gain_mdb
-    entries_g[qm] = entries_db
-    peaks_g[qm] = pks
-    # create a mask for unregular pixels
-    # first bit set if first peak has nan entries
-    for pk in masks_eval_peaks:
-        mask_db[~(np.isfinite(gain_mdb) & np.isfinite(gainoff_db))] |= BadPixels.FF_GAIN_EVAL_ERROR.value
-        mask_db[(np.abs(1-gain_mdb/np.nanmedian(gain_mdb) > deviation_threshold) )] |= BadPixels.FF_GAIN_DEVIATION.value
-    # second bit set if entries are 0 for first peak
-    mask_db[entries[...,1] == 0] |= BadPixels.FF_NO_ENTRIES.value
-    zero_oc = np.count_nonzero((entries > 0).astype(np.int), axis=3)
-    mask_db[zero_oc <= len(peak_estimates)-2] |= BadPixels.FF_NO_ENTRIES.value
-    # third bit set if entries of a given adc show significant noise
-    stds = []
-    for ii in range(8):
-        for jj in range(8):
-            stds.append(np.std(entries_db[ii*16:(ii+1)*16,jj*64+2:(jj+1)*64-2,:,1], axis=(0,1)))
-    avg_stds = np.median(np.array(stds), axis=0)
-    for ii in range(8):
-        for jj in range(8):
-            std = np.std(entries_db[ii*16:(ii+1)*16,jj*64+2:(jj+1)*64-2,:,1], axis=(0,1))
-            if np.any(std > 5*avg_stds):
-                mask_db[ii*16:(ii+1)*16,jj*64:(jj+1)*64,std > 5*avg_stds] |= BadPixels.FF_GAIN_DEVIATION
-    mask_g[qm] = mask_db
-    flat = np.zeros((gains.shape[0], gains.shape[1], memory_cells, 3))
-    for j in range(2,len(peak_estimates)):
-        flat[...,j-2] = np.mean(entries[...,j]/np.mean(entries[...,j]))
-    flat = np.mean(flat, axis=3)
-    #flat_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells))
-    #for j in range(2):
-    #    flat_db[...,j::2] = flat
-    flatsff[qm] = flat
-    global_meds[qm] = np.nanmedian(pks[...,global_eval_peaks][np.max(mask_db, axis=2) != 0])
 ```
-%% Cell type:markdown id: tags:
-## Evaluated peak locations ##
-The following plot shows the evaluated peak locations for each peak. Peak ids increase downward:
 %% Cell type:code id: tags:
 ``` python
-from mpl_toolkits.axes_grid1 import AxesGrid
+# Perform fitting
-cell_to_preview = 4
+with Pool() as pool:
-for qm, data in peaks_g.items():
+    const_out = pool.map(fit_batch, batches)
-    print("The module shown is: {}".format(qm))
-    print("The cell shown is: {}".format(cell_to_preview))
-    print("Evaluated peaks: {}".format(data.shape[-1]))
-    fig = plt.figure(figsize=(15,15))
-    grid = AxesGrid(fig, 111,
-                    nrows_ncols=(data.shape[-1], 1),
-                    axes_pad=0.1,
-                    share_all=True,
-                    label_mode="L",
-                    cbar_location="right",
-                    cbar_mode="each",
-                    cbar_size="7%",
-                    cbar_pad="2%")
-    for j in range(data.shape[-1]):
-        d = data[...,cell_to_preview,j]
-        d[~np.isfinite(d)] = 0
-        im = grid[j].imshow(d, interpolation="nearest", vmin=0.9*np.nanmedian(d), vmax=max(1.1*np.nanmedian(d), 50))
-        cb = grid.cbar_axes[j].colorbar(im)
-        cb.set_label_text("Peak location (ADU)")
 ```
-%% Cell type:markdown id: tags:
-## Gain Slopes And Offsets ##
-The gain slopes and offsets are deduced by fitting a linear function $y = mx + b$ to the evaluated peaks. Gains are normalized to all pixels and all memory cells of a module by using the average peak locations a $x$ values. Thus slopes centered around 1 are to be expected.
 %% Cell type:code id: tags:
 ``` python
-for i, r in enumerate(res_gain):
+# Evaluate bad pixels
-    ii = list(modules)[i]
+fit_result['gain'] = (fit_result['g1mean'] - fit_result['g0mean'])/photon_energy
-    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
-    gain, pks, std, gfunc = r
+# Calculate histogram width and evaluate cut
-    gains, errors, chisq, valid, max_dev, stats = gfunc
+h_sums = np.sum(hist_data['hist'], axis=1)
-    _, entries, stds, sow = gain
+hist_norm = hist_data['hist'] / h_sums[:, None, :, :]
+hist_mean = np.sum(hist_norm[:, :max_bins, ...] *
-    fig = plt.figure(figsize=(15,8))
+                   x[None, :, None, None], axis=1)
-    ax = fig.add_subplot(211)
+hist_sqr = (x[None, :, None, None] - hist_mean[:, None, ...])**2
-    im = ax.imshow(gains[...,0], interpolation="nearest", vmin=0.85, vmax=1.15)
+hist_std = np.sqrt(np.sum(hist_norm[:, :max_bins, ...] * hist_sqr, axis=1))
-    cb = fig.colorbar(im)
-    cb.set_label("Gain slope m")
+fit_result['mask'][hist_std<intensity_lim] |= BadPixelsFF.NO_ENTRY.value
-    fig.savefig("{}/gain_m_mod{}.png".format(out_folder, qm))
+# Bad pixel on gain deviation
-    ax = fig.add_subplot(212)
+gains = np.copy(fit_result['gain'])
-    ax.hist(gains[...,0].flatten(), range=(0.5, 1.5), bins=100)
+gains[fit_result['mask']>0] = np.nan
-    ax.set_ylabel("Occurences")
+gain_mean = np.nanmean(gains, axis=(1,2))
-    ax.set_xlabel("Gain slope m")
-    ax.semilogy()
+fit_result['mask'][fit_result['gain'] > gain_mean[:,None,None]*gain_lim[1] ] |=  BadPixelsFF.GAIN_DEVIATION.value
+fit_result['mask'][fit_result['gain'] < gain_mean[:,None,None]*gain_lim[0] ] |=  BadPixelsFF.GAIN_DEVIATION.value
 ```
-%% Cell type:markdown id: tags:
-The gain offsets b are expected to be centered around 0 and minimal, as data is already offset substracted.
 %% Cell type:code id: tags:
 ``` python
-for i, r in enumerate(res_gain):
+# Save fit results
-    ii = list(modules)[i]
+os.makedirs(out_folder, exist_ok=True)
-    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
+out_name = f'{out_folder}/fits_m{module:02d}.h5'
-    gain, pks, std, gfunc = r
+print(f'Save to file: {out_name}')
-    gains, errors, chisq, valid, max_dev, stats = gfunc
+save_dict_to_hdf5({'data': fit_result}, out_name)
-    _, entries, stds, sow = gain
-    fig = plt.figure(figsize=(15,8))
-    ax = fig.add_subplot(211)
-    im = ax.imshow(gains[...,1], interpolation="nearest", vmin=-25, vmax=25)
-    cb = fig.colorbar(im)
-    cb.set_label("Gain offset b")
-    fig.savefig("{}/gain_b_mod{}.png".format(out_folder, qm))
-    ax = fig.add_subplot(212)
-    ax.hist(gains[...,1].flatten(), range=(-25, 25), bins=100)
-    ax.set_ylabel("Occurences")
-    ax.set_xlabel("Gain offset b")
-    ax.semilogy()
 ```
 %% Cell type:markdown id: tags:
-## Bad Pixels ##
+## Summary across cells ##
-Bad pixels are defined as any of the following criteria:
-* the gain evaluation did not converge, or location of noise peak deviates by more than than the deviation threshold from the median location.
-* the number of entries for the noise peak in 0.
-* the standard deviation of the number of entries is larger than 5 times the standard deviation for the ASIC the pixel is on.
 %% Cell type:code id: tags:
 ``` python
-print("The deviation threshold is: {}".format(deviation_threshold))
+labels = ['Noise peak [ADU]',
+         'First photon peak [ADU]',
+         f"gain [ADU/keV], $\gamma$={photon_energy} [keV]",
+         "$\chi^2$/nDOF",
+         'Fraction of bad pixels']
+for i, key in enumerate(['g0mean', 'g1mean', 'gain', 'chi2_ndof', 'mask']):
+    fig = plt.figure(figsize=(20,5))
+    ax = fig.add_subplot(121)
+    data = fit_result[key]
+    if key == 'mask':
+        data = data>0
+        vmin, vmax = [0, 1]
+    else:
+        vmin, vmax = get_range(data, 5)
+    _ = heatmapPlot(np.mean(data, axis=0).T,
+                    add_panels=False, cmap='viridis', use_axis=ax,
+                    vmin=vmin, vmax=vmax, lut_label=labels[i] )
+    if key != 'mask':
+        vmin, vmax = get_range(data, 7)
+        ax1 = fig.add_subplot(122)
+        _ = xana.histPlot(ax1,data.flatten(),
+                      bins=45,range=[vmin, vmax],
+                      log=True,color='red',histtype='stepfilled')
+        plt.xlabel(labels[i])
+        plt.ylabel("Counts")
 ```
-%% Cell type:code id: tags:
+%% Cell type:markdown id: tags:
-``` python
+## histograms of fit parameters ##
-for i, r in enumerate(res_gain):
-    ii = list(modules)[i]
-    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
-    mask_db = mask_g[qm]
-    fig = plt.figure(figsize=(15,8))
-    ax = fig.add_subplot(211)
-    im = ax.imshow(np.log2(mask_db[...,cell_to_preview]), interpolation="nearest", vmin=0, vmax=32)
-    cb = fig.colorbar(im)
-    cb.set_label("Mask value")
-    fig.savefig("{}/mask_mod{}.png".format(out_folder, qm))
-    ax = fig.add_subplot(212)
-    ax.hist(np.log2(mask_db.flatten()), range=(0, 32), bins=32, normed=True)
-    ax.set_ylabel("Occurences")
-    ax.set_xlabel("Mask value")
-    ax.semilogy()
-```
 %% Cell type:code id: tags:
 ``` python
-cols = {BadPixels.FF_GAIN_EVAL_ERROR.value: (BadPixels.FF_GAIN_EVAL_ERROR.name, '#FF000080'),
+fig = plt.figure(figsize=(10, 5))
-        BadPixels.FF_GAIN_DEVIATION.value: (BadPixels.FF_GAIN_DEVIATION.name, '#0000FF80'),
+ax0 = fig.add_subplot(111)
-        BadPixels.FF_NO_ENTRIES.value: (BadPixels.FF_NO_ENTRIES.name, '#00FF0080'),
+a = ax0.hist(hist_std.flatten(), bins=100, range=(0,100) )
-        BadPixels.FF_GAIN_EVAL_ERROR.value |
+ax0.plot([intensity_lim, intensity_lim], [0, np.nanmax(a[0])], linewidth=1.5, color='red' )
-        BadPixels.FF_GAIN_DEVIATION.value: ('EVAL+DEV', '#DD00DD80'),
+ax0.set_xlabel('Histogram width [ADU]', fontsize=14)
-        BadPixels.FF_GAIN_EVAL_ERROR.value |
+ax0.set_ylabel('Number of histograms', fontsize=14)
-        BadPixels.FF_NO_ENTRIES.value: ('EVAL+NO_ENTRY', '#00DDDD80'),
+ax0.set_title(f'{hist_std[hist_std<intensity_lim].shape[0]} histograms below threshold in {intensity_lim} ADU',
-        BadPixels.FF_GAIN_DEVIATION.value |
+              fontsize=14, fontweight='bold')
-        BadPixels.FF_NO_ENTRIES.value: ('DEV+NO_ENTRY', '#DDDD0080'),
+ax0.grid()
-       }
+plt.yscale('log')
-rebin = 32 if not high_res_badpix_3d else 2
-gain = 0
-for mod, data in mask_g.items():
-    plot_badpix_3d(data, cols, title=mod, rebin_fac=rebin)
 ```
 %% Cell type:code id: tags:
 ``` python
-if local_output:
+num_bins = int(frange[1] - frange[0])
-    ofile = "{}/agipd_gain_store_{}_modules_{}.h5".format(out_folder,
+fig = plt.figure(figsize=(16,5))
-                                                          "_".join([str(r) for r in runs]),
+ax = fig.add_subplot(131)
-                                                          "_".join([str(m) for m in modules]))
+_ = xana.histPlot(ax,fit_result['g1mean'].flatten(),
-    store_file = h5py.File(ofile, "w")
+                  bins= num_bins,range=[frange[0] ,frange[1]],
-    for i, r in enumerate(res_gain):
+                  log=True,color='red',histtype='stepfilled')
-        ii = list(modules)[i]
+plt.xlabel("g1 mean [ADU]")
-        qm = "Q{}M{}".format(ii//4+1, ii%4+1)
-        gain, pks, std, gfunc = r
-        gains, errors, chisq, valid, max_dev, stats = gfunc
-        gainmap, entires, stds, sow = gain
-        store_file["/{}/Gain/0/data".format(qm)] = gains[...,0]
-        store_file["/{}/GainOffset/0/data".format(qm)] = gains[...,1]
-        store_file["/{}/Flat/0/data".format(qm)] = flatsff[qm]
-        store_file["/{}/Entries/0/data".format(qm)] = entires
-        store_file["/{}/BadPixels/0/data".format(qm)] = mask_g[qm]
-    store_file.close()
-```
-%% Cell type:code id: tags:
+ax1 = fig.add_subplot(132)
+_ = xana.histPlot(ax1,fit_result['g2mean'].flatten(),
+#                  bins=45,range=[80 ,140],
+                  bins= num_bins,range=[frange[0] ,frange[1]],
+                  log=True,color='red',histtype='stepfilled')
+plt.xlabel("g2 mean [ADU]")
-``` python
+ax2 = fig.add_subplot(133)
-proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
+_ = xana.histPlot(ax2,fit_result['g3mean'].flatten(),
-file_loc = proposal + ' ' + ' '.join(list(map(str,runs)))
+                  bins= num_bins,range=[frange[0] ,frange[1]],
+                  log=True,color='red',histtype='stepfilled')
+_ = plt.xlabel("g3 mean [ADU]")
 ```
 %% Cell type:code id: tags:
 ``` python
-if db_output:
+fig = plt.figure(figsize=(16,5))
-    for i, r in enumerate(res_gain):
+ax = fig.add_subplot(131)
-        ii = list(modules)[i]
+_ = xana.histPlot(ax,fit_result['g0sigma'].flatten(),
-        qm = "Q{}M{}".format(ii//4+1, ii%4+1)
+                  bins=45,range=[0 ,50],
+                  log=True,color='red',histtype='stepfilled')
-        gain, pks, std, gfunc = r
+plt.xlabel("g1 sigma [ADU]")
-        gains, errors, chisq, valid, max_dev, stats = gfunc
-        gainmap, entires, stds, sow = gain
-        det = getattr(Detectors, dinstance)
-        device = getattr(det, qm)
-        # gains related
-        metadata = ConstantMetaData()
-        cgain = Constants.AGIPD.SlopesFF()
-        cgain.data = gains
-        metadata.calibration_constant = cgain
-        # set the operating condition
-        condition = Conditions.Illuminated.AGIPD(memory_cells, bias_voltage, 9.2,
-                                                 pixels_x=512, pixels_y=128, beam_energy=None,
-                                                 acquisition_rate=acqrate, gain_setting=gain_setting)
-        metadata.detector_condition = condition
-        # specify the a version for this constant
-        if creation_time is None:
-            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
-        else:
-            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
-        metadata.calibration_constant_version.raw_data_location = file_loc
+ax1 = fig.add_subplot(132)
-        metadata.send(cal_db_interface, timeout=300000)
+_ = xana.histPlot(ax1,fit_result['g1sigma'].flatten(),
+                  bins=45,range=[0 ,50],
+                  log=True,color='red',histtype='stepfilled')
+plt.xlabel("g2 sigma [ADU]")
+ax2 = fig.add_subplot(133)
-        # bad pixels
+_ = xana.histPlot(ax2,fit_result['g2sigma'].flatten(),
-        metadata = ConstantMetaData()
+                  bins=45,range=[0 ,50],
-        bp = Constants.AGIPD.BadPixelsFF()
+                  log=True,color='red',histtype='stepfilled')
-        bp.data = mask_g[qm]
+_ = plt.xlabel("g3 sigma [ADU]")
-        metadata.calibration_constant = bp
-        # set the operating condition
-        condition = Conditions.Illuminated.AGIPD(memory_cells, bias_voltage, 9.2,
-                                                 pixels_x=512, pixels_y=128, beam_energy=None,
-                                                 acquisition_rate=acqrate, gain_setting=gain_setting)
-        metadata.detector_condition = condition
-        # specify the a version for this constant
-        if creation_time is None:
-            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
-        else:
-            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
-        metadata.calibration_constant_version.raw_data_location = file_loc
-        metadata.send(cal_db_interface, timeout=300000)
 ```
 %% Cell type:markdown id: tags:
-## Sanity check ##
+## Summary across pixels ##
-Finally, we perform a correction of the data used to derive the gain constants with said constants. We expected an improvement in peak FWHM, if constants have been deduced correctly.
+Mean and median values are calculated across all pixels for each memory cell.
 %% Cell type:code id: tags:
 ``` python
-def hist_single_module(fbase, runs, sequences, il_mode, profile, limit_trains, memory_cells, rawversion, instrument, inp):
+def plot_error_band(key, x, ax):
-    channel, offset, thresholds, relgain, noise, pc = inp
-    gain, pks, std, gfunc = relgain
-    gains, errors, chisq, valid, max_dev, stats = gfunc
-    import XFELDetAna.xfelpycaltools as xcal
-    import numpy as np
-    import h5py
-    import copy
-    from XFELDetAna.util import env
-    env.iprofile = profile
-    sensor_size = [128, 512]
-    block_size = sensor_size
-    def baseline_correct_via_noise(d, noise, g, baseline_corr_noise_threshold=1000):
-        """ Correct baseline shifts by evaluating position of the noise peak
-        :param: d: the data to correct, should be a single image
-        :param noise: the mean noise for the cell id of `d`
-        :param g: gain array matching `d` array
-        Correction is done by identifying the left-most (significant) peak
-        in the histogram of `d` and shifting it to be centered on 0.
-        This is done via a continous wavelet transform (CWT), using a Ricker
-        wavelet.
-        Only high gain data is evaulated, and data larger than 50 ADU,
-        equivalent of roughly a 9 keV photon is ignored.
-        Two passes are executed: the first shift is accurate to 10 ADU and
-        will catch baseline shifts smaller then -2000 ADU. CWT is evaluated
-        for peaks of widths one, three and five time the noise.
-        The baseline is then shifted by the position of the summed CWT maximum.
-        In a second pass histogram is evaluated within a range of
-        +- 5*noise of the initial shift value, for peak of width noise.
-        Baseline shifts larger than the maximum threshold or positive shifts
-        larger 10 are discarded, and the original data is returned, otherwise
-        the shift corrected data is returned.
-        """
-        import copy
-        from scipy.signal import cwt, ricker
-        # we work on a copy to be able to filter values by setting them to
-        # np.nan
-        dd = copy.copy(d)
-        dd[g != 0] = np.nan  # only high gain data
-        dd[dd > 50] = np.nan  # only noise data
-        h, e = np.histogram(dd, bins=210, range=(-2000, 100))
-        c = (e[1:] + e[:-1]) / 2
-        try:
+    cdata = np.copy(fit_result[key])
-            cwtmatr = cwt(h, ricker, [noise, 3. * noise, 5. * noise])
+    cdata[fit_result['mask']>0] = np.nan
-        except:
-            return d
-        cwtall = np.sum(cwtmatr, axis=0)
-        marg = np.argmax(cwtall)
-        pc = c[marg]
-        high_res_range = (dd > pc - 5 * noise) & (dd < pc + 5 * noise)
-        dd[~high_res_range] = np.nan
-        h, e = np.histogram(dd, bins=200,
-                            range=(pc - 5 * noise, pc + 5 * noise))
-        c = (e[1:] + e[:-1]) / 2
-        try:
-            cwtmatr = cwt(h, ricker, [noise, ])
-        except:
-            return d
-        marg = np.argmax(cwtmatr)
-        pc = c[marg]
-        # too large shift to be easily decernable via noise
-        if pc > 10 or pc < -baseline_corr_noise_threshold:
-            return d
-        return d - pc
-    def read_fun(filename, channel):
-        infile = h5py.File(filename, "r", driver="core")
-        if rawversion == 2:
-            count = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)])
-            first = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first".format(instrument, channel)])
-            last_index = int(first[count != 0][-1]+count[count != 0][-1])
-            first_index = int(first[count != 0][0])
-        else:
-            status = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)])
-            if np.count_nonzero(status != 0) == 0:
-                return
-            last = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last".format(instrument, channel)])
-            last_index = int(last[status != 0][-1])
-            first_index = int(last[status != 0][0])
-        if limit_trains is not None:
-            last_index = min(limit_trains*memory_cells+first_index, last_index)
-        im = np.array(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(instrument, channel)][first_index:last_index,...])
-        carr = infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId".format(instrument, channel)][first_index:last_index]
-        cells = np.squeeze(np.array(carr))
-        infile.close()
-        if il_mode:
-            ga = im[1::2, 0, ...]
-            im = im[0::2, 0, ...].astype(np.float32)
-        else:
-            ga = im[:, 1, ...]
-            im = im[:, 0, ...].astype(np.float32)
-        im = np.rollaxis(im, 2)
-        im = np.rollaxis(im, 2, 1)
-        ga = np.rollaxis(ga, 2)
-        ga = np.rollaxis(ga, 2, 1)
-        return im, ga, cells
-    offset_cor = xcal.OffsetCorrection(sensor_size, offset, nCells=memory_cells, blockSize=block_size, gains=[0,1,2])
-    offset_cor.debug()
-    hist_calc = xcal.HistogramCalculator(sensor_size, bins=2000, range=(0, 2000), blockSize=block_size)
-    hist_calc.debug()
-    hist_calc_uncorr = xcal.HistogramCalculator(sensor_size, bins=2000, range=(0, 2000), blockSize=block_size)
-    hist_calc_uncorr.debug()
-    for run in runs:
-        for seq in sequences[:2]:
-            fname = fbase.format(run, run.upper(), channel, seq)
+    mean = np.nanmean(cdata, axis=(1,2))
+    median = np.nanmedian(cdata, axis=(1,2))
+    std = np.nanstd(cdata, axis=(1,2))
+    mad = np.nanmedian(np.abs(cdata - median[:,None,None]), axis=(1,2))
-            d, ga, c = read_fun(fname, channel)
+    ax0 = fig.add_subplot(111)
+    ax0.plot(x, mean, 'k', color='#3F7F4C', label=" mean value ")
+    ax0.plot(x, median, 'o', color='red', label=" median value ")
+    ax0.fill_between(x, mean-std, mean+std,
+                     alpha=0.6, edgecolor='#3F7F4C', facecolor='#7EFF99',
+                     linewidth=1, linestyle='dashdot', antialiased=True,
+                     label=" mean value $ \pm $ std ")
-            # we need to do proper gain thresholding
+    ax0.fill_between(x, median-mad, median+mad,
-            vidx = (c < offset.shape[2]) & (c < thresholds.shape[2])
+                     alpha=0.3, edgecolor='red', facecolor='red',
-            c = c[vidx]
+                     linewidth=1, linestyle='dashdot', antialiased=True,
-            d = d[:,:,vidx]
+                     label=" median value $ \pm $ mad ")
-            ga = ga[:,:,vidx]
-            g = np.zeros(ga.shape, np.uint8)
+    if f'error_{key}' in fit_result:
-            g[...] = 2
+        cerr = np.copy(fit_result[f'error_{key}'])
+        cerr[fit_result['mask']>0] = np.nan
-            g[ga < thresholds[...,c,1]] = 1
+        meanerr = np.nanmean(cerr, axis=(1,2))
-            g[ga < thresholds[...,c,0]] = 0
+        ax0.fill_between(x, mean-meanerr, mean+meanerr,
-            d = offset_cor.correct(d, cellTable=c, gainMap=g)
+                 alpha=0.6, edgecolor='#089FFF', facecolor='#089FFF',
-            for i in range(d.shape[2]):
+                 linewidth=1, linestyle='dashdot', antialiased=True,
-                mn_noise = np.nanmean(noise[...,c[i]])
+                 label=" mean fit error ")
-                d[...,i] = baseline_correct_via_noise(d[...,i],
-                                                      mn_noise,
-                                                      g[..., i])
-            d *= np.moveaxis(pc[c,...], 0, 2)
-            hist_calc_uncorr.fill(d)
+x = np.linspace(*cell_range, n_cells)
-            d = (d-gains[..., 1][...,None])/gains[..., 0][...,None]
-            #d = d/gains[..., 0][...,None]
-            hist_calc.fill(d)
-    h, e, c, _ = hist_calc.get()
+for i, key in enumerate(['g0mean', 'g1mean', 'gain', 'chi2_ndof']):
-    hu  = hist_calc_uncorr.get()
-    return h, e, c, hu[0]
-inp = []
+    fig = plt.figure(figsize=(10, 5))
-idx = 0
+    ax0 = fig.add_subplot(111)
-for i in modules:
+    plot_error_band(key, x, ax0)
-    qm = "Q{}M{}".format(i//4+1, i%4+1)
-    inp.append((i, offset_g[qm], thresholds_g[qm], res_gain[idx], noise_g[qm][...,0], pc_g[qm][0,...]))
+    ax0.set_xlabel('Memory Cell ID', fontsize=14)
-    idx += 1
+    ax0.set_ylabel(labels[i], fontsize=14)
+    ax0.grid()
-p = partial(hist_single_module, fbase, runs, sequences, IL_MODE, profile, limit_trains, memory_cells, rawversion, instrument)
+    _ = ax0.legend()
-#res = view.map_sync(p, inp)
-res = list(map(p, inp))
 ```
-%% Cell type:code id: tags:
+%% Cell type:markdown id: tags:
-``` python
-from iminuit import Minuit
-from iminuit.util import make_func_code, describe
-from IPython.display import HTML, display
-import tabulate
-# fitting
-par_ests = {}
-par_ests["mu0"] = 0
-par_ests["mu1"] = 50
-par_ests["mu2"] = 100
-par_ests["limit_mu0"] = [-5, 5]
-par_ests["limit_mu1"] = [35, 65]
-par_ests["limit_mu2"] = [100, 150]
-par_ests["s0"] = 10
-par_ests["s1"] = 10
-par_ests["s2"] = 10
-par_ests["limit_A0"] = [1e5, 1e12]
-par_ests["limit_A1"] = [1e4, 1e10]
-par_ests["limit_A2"] = [1e3, 1e10]
-par_ests["throw_nan"] = False
-par_ests["pedantic"] = False
-par_ests["print_level"] = 1
-def gaussian3(x, mu0, s0, A0, mu1, s1, A1, mu2, s2, A2):
-    return (A0/np.sqrt(2*np.pi*s0**2)*np.exp(-0.5*((x-mu0)/s0)**2) +
-            A1/np.sqrt(2*np.pi*s1**2)*np.exp(-0.5*((x-mu1)/s1)**2) +
-            A2/np.sqrt(2*np.pi*s2**2)*np.exp(-0.5*((x-mu2)/s2)**2))
-f_sig = describe(gaussian3)[1:]
-class _Chi2Functor:
-    def __init__(self, f, x, y, err):
-        self.f = f
-        self.x = x
-        self.y = y
-        self.err = err
-        f_sig = describe(f)
-        # this is how you fake function
-        # signature dynamically
-        self.func_code = make_func_code(
-            f_sig[1:])  # docking off independent variable
-        self.func_defaults = None  # this keeps numpy.vectorize happy
-    def __call__(self, *arg):
-        # notice that it accept variable length
-        # positional arguments
-        # chi2 = sum((y-self.f(x,*arg))**2 for x,y in zip(self.x,self.y))
-        return np.sum(((self.f(self.x, *arg) - self.y) ** 2) / self.err)
-d = []
-y_range_max = 0
-table = []
-headers = ['Module',
-           'FWHM (cor.) [ADU]', 'Separation (cor.) [$\sigma$]',
-           'FWHM (uncor.) [ADU]', 'Separation (uncor.) [$\sigma$]',
-           'Improvement'
-          ]
-for i, r in enumerate(res):
-    qm = "Q{}M{}".format(i//4+1, i%4+1)
-    row = []
-    row.append(qm)
-    h, e, c, hu = r
-    d.append({
-            'x': c,
-            'y': h,
-            'drawstyle': 'steps-mid',
-            'label': '{}: corrected'.format(qm),
-            'marker': '.',
-            'color': 'blue',
-        })
-    idx = (h > 0) & np.isfinite(h)
-    h_non_zero = h[idx]
-    c_non_zero = c[idx]
-    par_ests["A0"] = np.float(h[np.argmin(abs(c-0))])
-    par_ests["A1"] = np.float(h[np.argmin(abs(c-50))])
-    par_ests["A2"] = np.float(h[np.argmin(abs(c-100))])
-    wrapped = _Chi2Functor(gaussian3, c_non_zero, h_non_zero,
-                                     np.sqrt(h_non_zero))
-    m = Minuit(wrapped, **par_ests)
-    fmin = m.migrad()
-    xt = np.arange(0, 200)
-    yt = gaussian3(xt, m.values['mu0'],  m.values['s0'],  m.values['A0'],
-                   m.values['mu1'],  m.values['s1'],  m.values['A1'],
-                   m.values['mu2'],  m.values['s2'],  m.values['A2'])
-    d.append({
-            'x': xt,
-            'y': yt,
-            'label': '{}: corrected (fit)'.format(qm),
-            'color': 'green',
-            'drawstyle': 'line',
-            'linewidth': 2
-        })
-    d.append({
-            'x': c,
-            'y': hu,
-            'drawstyle': 'steps-mid',
-            'label': '{}: uncorrected'.format(qm),
-            'marker': '.',
-            'color': 'grey',
-            'alpha': 0.5
-        })
-    row += [m.values['s1']*2.35, (m.values['mu1']-m.values['mu0'])/m.values['s1']]
-    idx = (hu > 0) & np.isfinite(hu)
-    h_non_zero = hu[idx]
-    c_non_zero = c[idx]
-    wrapped = _Chi2Functor(gaussian3, c_non_zero, h_non_zero,
-                                     np.sqrt(h_non_zero))
-    m = Minuit(wrapped, **par_ests)
-    fmin = m.migrad()
-    xt = np.arange(0, 200)
-    yt = gaussian3(xt, m.values['mu0'],  m.values['s0'],  m.values['A0'],
-                   m.values['mu1'],  m.values['s1'],  m.values['A1'],
-                   m.values['mu2'],  m.values['s2'],  m.values['A2'])
-    d.append({
-            'x': xt,
-            'y': yt,
-            'label': '{}: uncorrected (fit)'.format(qm),
-            'color': 'red',
-            'linewidth': 2
-        })
-    row += [m.values['s1']*2.35, (m.values['mu1']-m.values['mu0'])/m.values['s1']]
-    row.append("{:0.2f} %".format(100*(row[3]/row[1]-1)))
-    y_range_max = max(y_range_max, np.max(h[c>25])*1.5)
-    # output table
-    table.append(row)
-fig = xana.simplePlot(d, y_log=False, figsize="2col",
-                      aspect=2,
-                      x_range=(0, 200),
-                      legend='top-right-frame',
-                      y_range=(0, y_range_max),
-                      x_label='Energy (ADU)',
-                      y_label='Counts')
-display(HTML(tabulate.tabulate(table, tablefmt='html', headers=headers,
+## Cut flow ##
-                               numalign="right", floatfmt="0.2f")))
-```
 %% Cell type:code id: tags:
 ``` python
+fig = plt.figure(figsize=(10, 5))
+ax = fig.add_subplot(111)
+n_bars = 8
+x = np.arange(n_bars)
+width = 0.3
+msk = fit_result['mask']
+n_fits = np.prod(msk.shape)
+y = [any_in(msk, BadPixelsFF.FIT_FAILED.value),
+     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value),
+     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
+           BadPixelsFF.CHI2_THRESHOLD.value),
+     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
+           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value),
+     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
+           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
+           BadPixelsFF.NOISE_PEAK_THRESHOLD.value),
+     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
+           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
+           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value),
+     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
+           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
+           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value
+           | BadPixelsFF.NO_ENTRY.value),
+     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
+           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
+           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value
+           | BadPixelsFF.NO_ENTRY.value| BadPixelsFF.GAIN_DEVIATION.value)
+    ]
+y2 = [any_in(msk, BadPixelsFF.FIT_FAILED.value),
+     any_in(msk, BadPixelsFF.ACCURATE_COVAR.value),
+     any_in(msk, BadPixelsFF.CHI2_THRESHOLD.value),
+     any_in(msk, BadPixelsFF.GAIN_THRESHOLD.value),
+     any_in(msk, BadPixelsFF.NOISE_PEAK_THRESHOLD.value),
+     any_in(msk, BadPixelsFF.PEAK_WIDTH_THRESHOLD.value),
+     any_in(msk, BadPixelsFF.NO_ENTRY.value),
+     any_in(msk, BadPixelsFF.GAIN_DEVIATION.value)
+    ]
+y = (1 - np.sum(y, axis=(1,2,3))/n_fits)*100
+y2 = (1 - np.sum(y2, axis=(1,2,3))/n_fits)*100
+labels = ['Fit failes',
+         'Accurate covar',
+         'Chi2/nDOF',
+         'Gain',
+         'Noise peak',
+         'Peak width',
+         'No Entry',
+         'Gain deviation']
+ax.bar(x, y2, width, label='Only this cut')
+ax.bar(x, y, width, label='Cut flow')
+plt.xticks(x, labels, rotation=90)
+plt.ylim(y[5]-0.5,100)
+plt.grid(True)
+plt.legend()
+plt.show()
 ```

 %% Cell type:markdown id: tags:
-# Gain Characterization (Flat Fields) #
+# Gain Characterization #
-The following code characterizes the gain of the AGIPD detector from flat field data, i.e. data with X-rays of defined intensity. This data should fullfil the following requirements:
-* intensity should be such that single photon peaks are visible
-* data for all modules should be present
-* no shadowing should occur on any of the modules
-* each pixel should have at minimum arround 100 events per photon peak per memory cell
-* if central beam edges are visible, they should not be significantly more intense
-Characterization is done by a weighted average algorithm, which evaluates the peak locations for all pixels
-and memory cells of a given module. These locations are then fitted to a linear function of the average peak
-location in each module, such that it yield a relative gain correction.
 %% Cell type:code id: tags:
 ``` python
-# the following lines should be adjusted depending on data
+in_folder = "/gpfs/exfel/exp/SPB/202030/p900138/scratch/karnem/r0203_r0204_v01/" # the folder to read histograms from, required
-in_folder = '/gpfs/exfel/exp/MID/201931/p900091/raw/' # path to input data, required
+out_folder = "/gpfs/exfel/exp/SPB/202030/p900138/scratch/karnem/r0203_r0204_v01/"  # the folder to output to, required
-modules = [3] # modules to work on, required, range allowed
+hist_file_template = "hists_m{:02d}_sum.h5" # the template to use to access histograms
-out_folder = "/gpfs/exfel/exp/MID/201931/p900091/usr/FF/2.2" # path to output to, required
+modules = [10] # modules to correct, set to -1 for all, range allowed
-runs = [484, 485,] # runs to use, required, range allowed
-sequences = [0,1,2,3]#,4,5,6,7,8] #,5,6,7,8,9,10] # sequences files to use, range allowed
+image_data_path = "/gpfs/exfel/exp/MID/202030/p900137/raw" # Path to image data used to create histograms
-cluster_profile = "noDB" # The ipcluster profile to use
+run = 449 # of the run of image data used to create histograms
+karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
+karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
+receiver_id = "{}CH0" # inset for receiver devices
+path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
+h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
+h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images
+h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information
+karabo_id_control = "MID_IRU_AGIPD1M1" # karabo-id for control device
+karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation
+use_dir_creation_date = True # use the creation data of the input dir for database queries
+cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
+cal_db_timeout = 30000 # in milli seconds
 local_output = True # output constants locally
 db_output = False # output constants to database
-bias_voltage = 300 # detector bias voltage
-cal_db_interface = "tcp://max-exfl016:8026#8035"  # the database interface to use
+# Fit parameters
-mem_cells = 0  # number of memory cells used
+peak_range = [-30, 30, 35, 70, 95, 135, 145, 220] # where to look for the peaks, [a0, b0, a1, b1, ...] exactly 8 elements
-interlaced = False # assume interlaced data format, for data prior to Dec. 2017
+peak_width_range = [0, 30, 0, 35, 0, 40, 0, 45] # fit limits on the peak widths, [a0, b0, a1, b1, ...] exactly 8 elements
-fit_hook = True # fit a hook function to medium gain slope
+peak_norm_range = [0.0, -1, 0, -1, 0, -1, 0, -1] #
-rawversion = 2 # RAW file format version
-instrument = "MID"
+# Bad-pixel thresholds (gain evaluation error). Contribute to BadPixel bit "Gain_Evaluation_Error"
-photon_energy = 9.2 # the photon energy in keV
+peak_lim = [-30, 30] # Limit of position of noise peak
-offset_store = "" # for file-baed access
+d0_lim = [10, 80] # hard limits for distance between noise and first peak
-high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h
+peak_width_lim = [0.9, 1.55, 0.95, 1.65] # hard limits on the peak widths for first and second peak, in units of the noise peak. 4 parameters.
-db_input = True # retreive data from calibration database, setting offset-store will overwrite this
+chi2_lim = [0, 3.0] # Hard limit on chi2/nDOF value
-deviation_threshold = 0.75 # peaks with an absolute location deviation larger than the medium are are considere bad
-acqrate = 0. # acquisition rate
+intensity_lim = 15 # Threshold on standard deviation of a histogram in ADU. Contribute to BadPixel bit "No_Entry"
-use_dir_creation_date = True
+gain_lim = [0.85, 1.15] # Threshold on gain in relative number. Contribute to BadPixel bit "Gain_deviation"
-creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC.ms e.g. 2019-07-04 11:02:41.00
-gain_setting = 0.1 # gain setting can have value 0 or 1, Default=0.1 for no (None) gain-setting
+cell_range = [1, 3] # range of cell to be considered, [0,0] for all
-karabo_da_control = "AGIPD1MCTRL00" # karabo DA for control infromation
+pixel_range = [0, 0, 32, 32] # range of pixels x1,y1,x2,y2 to consider [0,0,512,128] for all
-h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information
+max_bins = 0 # Maximum number of bins to consider, 0 for all bins
+batch_size = [1, 8, 8] # batch size: [cell,x,y]
+fit_range = [0, 0] # range of a histogram considered for fitting in ADU. Dynamically evaluated in case [0,0]
+n_peaks_fit = 4 # Number of gaussian peaks to fit including noise peak
+fix_peaks = False # Fix distance between photon peaks
+do_minos = False # This is additional feature of minuit to evaluate errors.
+# Detector conditions
+max_cells = 0 # number of memory cells used, set to 0 to automatically infer
+bias_voltage = 0  # Bias voltage
+acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
+gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
+photon_energy = 8.05 # photon energy in keV
 ```
 %% Cell type:code id: tags:
 ``` python
-# std library imports
+import glob
-from datetime import datetime
+from multiprocessing import Pool
-import dateutil
-from functools import partial
-import warnings
-warnings.filterwarnings('ignore')
 import os
+import traceback
+import warnings
 import h5py
-# numpy and matplot lib specific
+from iminuit import Minuit
-import numpy as np
-import matplotlib
-matplotlib.use("Agg")
 import matplotlib.pyplot as plt
-%matplotlib inline
+import numpy as np
+import sharedmem
+from XFELDetAna.plotting.heatmap import heatmapPlot
+from XFELDetAna.plotting.simpleplot import simplePlot
+import XFELDetAna.xfelpyanatools as xana
-# parallel processing via ipcluster
+from cal_tools.ana_tools import save_dict_to_hdf5, get_range
-# make sure a cluster is running with ipcluster start --n=32, give it a while to start
+from cal_tools.agipdlib import get_bias_voltage
-from ipyparallel import Client
+from cal_tools.agipdutils_ff import (
+    any_in, gaussian, gaussian_sum, get_mask,
-client = Client(profile=cluster_profile)
+    get_starting_parameters, set_par_limits, fit_n_peaks
-view = client[:]
+)
-view.use_dill()
+from cal_tools.enums import BadPixelsFF
-# pyDetLib imports
+# %load_ext autotime
-import XFELDetAna.xfelpycaltools as xcal
+%matplotlib inline
-import XFELDetAna.xfelpyanatools as xana
+warnings.filterwarnings('ignore')
-from XFELDetAna.util import env
-env.iprofile = cluster_profile
-profile = cluster_profile
-from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions
-from cal_tools.agipdlib import get_num_cells, get_acq_rate, get_gain_setting
-from cal_tools.enums import BadPixels
-from cal_tools.influx import InfluxLogger
-from cal_tools.plotting import show_overview, plot_badpix_3d
-from cal_tools.tools import gain_map_files, parse_runs, run_prop_seq_from_path, get_notebook_name, get_dir_creation_date, get_random_db_interface
 ```
 %% Cell type:code id: tags:
 ``` python
-# usually no need to change these lines
+peak_range = np.reshape(peak_range,(4,2))
-sensor_size = [128, 512]
+peak_width_range = np.reshape(peak_width_range,(4,2))
-block_size = [128, 512]
+peak_width_lim = np.reshape(peak_width_lim,(2,2))
-QUADRANTS = 4
+peak_norm_range = [None if x == -1 else x for x in peak_norm_range]
-MODULES_PER_QUAD = 4
+peak_norm_range = np.reshape(peak_norm_range,(4,2))
-DET_FILE_INSET = "AGIPD"
+module = modules[0]
-# Define constant creation time.
-if creation_time:
-    try:
-        creation_time = datetime.strptime(creation_time, '%Y-%m-%d %H:%M:%S.%f')
-    except Exception as e:
-        print(f"creation_time value error: {e}."
-               "Use same format as YYYY-MM-DD HR:MN:SC.ms e.g. 2019-07-04 11:02:41.00/n")
-        creation_time = None
-        print("Given creation time wont be used.")
-else:
-    creation_time = None
-if not creation_time and use_dir_creation_date:
-    ntries = 100
-    while ntries > 0:
-        try:
-            creation_time = get_dir_creation_date(in_folder, runs[0])
-            break
-        except OSError:
-            pass
-        ntries -= 1
-print("Using {} as creation time".format(creation_time))
-runs = parse_runs(runs)
-if offset_store != "":
-    db_input = False
-else:
-    db_input = True
-limit_trains = 20
-limit_trains_eval = None
-print("Parameters are:")
-print("Runs: {}".format(runs))
-print("Modules: {}".format(modules))
-print("Sequences: {}".format(sequences))
-print("Using DB: {}".format(db_output))
-if instrument == "SPB":
-    loc = "SPB_DET_AGIPD1M-1"
-    dinstance = "AGIPD1M1"
-    karabo_id_control = "SPB_IRU_AGIPD1M1"
-else:
-    loc = "MID_DET_AGIPD1M-1"
-    dinstance = "AGIPD1M2"
-    karabo_id_control = "MID_EXP_AGIPD1M1"
-cal_db_interface = get_random_db_interface(cal_db_interface)
-# these lines can usually stay as is
-fbase = "{}/{{}}/RAW-{{}}-AGIPD{{:02d}}-S{{:05d}}.h5".format(in_folder)
-gains = np.arange(3)
-run, prop, seq = run_prop_seq_from_path(in_folder)
-logger = InfluxLogger(detector="AGIPD", instrument=instrument, mem_cells=mem_cells,
-                      notebook=get_notebook_name(), proposal=prop)
-# extract the memory cells and acquisition rate from
-# the file of the first module, first sequence and first run
-channel = 0
-fname = fbase.format(runs[0], runs[0].upper(), channel, sequences[0])
-if acqrate == 0.:
-    acqrate = get_acq_rate((fname, loc, channel))
-if mem_cells == 0:
-    cells = get_num_cells(fname, loc, channel)
-    mem_cells = cells  # avoid setting twice
-IL_MODE = interlaced
-max_cells = mem_cells if not interlaced else mem_cells*2
-memory_cells = mem_cells
-print("Interlaced mode: {}".format(IL_MODE))
-cells = np.arange(max_cells)
-print(f"Acquisition rate and memory cells set from file {fname} are "
-      f"{acqrate} MHz and {max_cells}, respectively")
 ```
 %% Cell type:code id: tags:
 ``` python
-control_fname = f'{in_folder}/{runs[0]}/RAW-{runs[0].upper()}-{karabo_da_control}-S00000.h5'
+if bias_voltage == 0:
+    # Read the bias voltage from files, if recorded.
-if "{" in h5path_ctrl:
+    # If not available, make use of the historical voltage the detector is running at
-    h5path_ctrl = h5path_ctrl.format(karabo_id_control)
+    control_filename = f'{image_data_path}/r{run:04d}/RAW-R{run:04d}-{karabo_da_control}-S00000.h5'
+    bias_voltage = get_bias_voltage(control_filename, karabo_id_control)
-if gain_setting == 0.1:
+    bias_voltage = bias_voltage if bias_voltage is not None else 300
-    if creation_time.replace(tzinfo=None) < dateutil.parser.parse('2020-01-31'):
+print(f"Bias voltage: {bias_voltage}V")
-        print("Set gain-setting to None for runs taken before 2020-01-31")
-        gain_setting = None
-    else:
-        try:
-            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
-        except Exception as e:
-            print(f'Error while reading gain setting from: \n{control_fname}')
-            print(e)
-            print("Gain setting is not found in the control information")
-            print("Data will not be processed")
-            sequences = []
-print(f"Gain setting: {gain_setting}")
 ```
-%% Cell type:markdown id: tags:
-For the characterization offset maps for each module are needed. The are read in either locally or through the XFEL calibration database.
 %% Cell type:code id: tags:
 ``` python
-from dateutil import parser
+def idx_gen(batch_start, batch_size):
-offset_g = {}
+    """
-noise_g = {}
+    This generator iterate across pixels and memory cells starting
-thresholds_g = {}
+    from batch_start until batch_start+batch_size
-pc_g = {}
+    """
-if not db_input:
+    for c_idx in range(batch_start[0], batch_start[0]+batch_size[0]):
-    print("Offset, noise and thresholds have been read in from: {}".format(offset_store))
+        for x_idx in range(batch_start[1], batch_start[1]+batch_size[1]):
-    store_file = h5py.File(offset_store, "r")
+            for y_idx in range(batch_start[2], batch_start[2]+batch_size[2]):
-    for i in modules:
+                yield(c_idx, x_idx, y_idx)
-        qm = "Q{}M{}".format(i//4+1, i%4+1)
-        offset_g[qm] = np.array(store_file["{}/Offset/0/data".format(qm)])
-        noise_g[qm] = np.array(store_file["{}/Noise/0/data".format(qm)])
-        thresholds_g[qm] = np.array(store_file["{}/Threshold/0/data".format(qm)])
-    store_file.close()
-else:
-    print("Offset, noise and thresholds have been read in from calibration database:")
-    first_ex = True
-    for i in modules:
-        qm = "Q{}M{}".format(i//4+1, i%4+1)
-        metadata = ConstantMetaData()
-        offset = Constants.AGIPD.Offset()
-        metadata.calibration_constant = offset
-        det = getattr(Detectors, dinstance)
-        # set the operating condition
-        condition = Conditions.Dark.AGIPD(memory_cells=max_cells, bias_voltage=bias_voltage,
-                                          acquisition_rate=acqrate, gain_setting=gain_setting)
-        metadata.detector_condition = condition
-        # specify the a version for this constant
-        if creation_time is None:
-            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
-        else:
-            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
-        metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=300000)
-        offset_g[qm] = offset.data
-        if first_ex:
-            print("Offset map was injected on: {}".format(metadata.calibration_constant_version.begin_at))
-        metadata = ConstantMetaData()
-        noise = Constants.AGIPD.Noise()
-        metadata.calibration_constant = noise
-        # set the operating condition
-        condition = Conditions.Dark.AGIPD(memory_cells=max_cells, bias_voltage=bias_voltage,
-                                          acquisition_rate=acqrate, gain_setting=gain_setting)
-        metadata.detector_condition = condition
-        # specify the a version for this constant
-        if creation_time is None:
-            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
-        else:
-            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
-        metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=300000)
-        noise_g[qm] = noise.data
-        if first_ex:
-            print("Noise map was injected on: {}".format(metadata.calibration_constant_version.begin_at))
-        metadata = ConstantMetaData()
-        thresholds = Constants.AGIPD.ThresholdsDark()
-        metadata.calibration_constant = thresholds
-        # set the operating condition
-        condition = Conditions.Dark.AGIPD(memory_cells=max_cells, bias_voltage=bias_voltage,
-                                          acquisition_rate=acqrate, gain_setting=gain_setting)
-        metadata.detector_condition = condition
-        # specify the a version for this constant
-        if creation_time is None:
-            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
-        else:
-            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
-        metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=300000)
-        thresholds_g[qm] = thresholds.data
-        if first_ex:
-            print("Threshold map was injected on: {}".format(metadata.calibration_constant_version.begin_at))
-        metadata = ConstantMetaData()
-        pc = Constants.AGIPD.SlopesPC()
-        metadata.calibration_constant = pc
-        # set the operating condition
-        condition = Conditions.Dark.AGIPD(memory_cells=max_cells, bias_voltage=bias_voltage,
-                                          acquisition_rate=acqrate, gain_setting=gain_setting)
-        metadata.detector_condition = condition
-        # specify the a version for this constant
-        if creation_time is None:
-            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
-        else:
-            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
-        metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=300000)
-        pc_g[qm] = np.nanmedian(pc.data[0,...])/pc.data
-        if first_ex:
-            print("PC map was injected on: {}".format(metadata.calibration_constant_version.begin_at))
-            first_ex = False
 ```
-%% Cell type:markdown id: tags:
-## Initial peak estimates ##
-First initial peak estimates need to be defined. This is done by inspecting histograms created from (a subset of) the offset-corrected data for peak locations and peak heights.
 %% Cell type:code id: tags:
 ``` python
-def hist_single_module(fbase, runs, sequences, sensor_size, memory_cells, block_size,
+n_pixels_x = pixel_range[2]-pixel_range[0]
-                       il_mode, limit_trains, profile, rawversion, instrument, inp):
+n_pixels_y = pixel_range[3]-pixel_range[1]
-    """ This function calculates a per-pixel histogram for a single module
-    Runs and sequences give the data to calculate histogram from
+hist_data = {}
-    """
+with h5py.File(f"{in_folder}/{hist_file_template.format(module)}", 'r') as hf:
-    channel, offset, thresholds, pc, noise = inp
+    hist_data['cellId'] = np.array(hf['cellId'][()])
+    hist_data['hRange'] = np.array(hf['hRange'][()])
+    hist_data['nBins'] = np.array(hf['nBins'][()])
-    import XFELDetAna.xfelpycaltools as xcal
+    if cell_range == [0,0]:
-    import numpy as np
+        cell_range[1] = hist_data['cellId'].shape[0]
-    import h5py
-    from XFELDetAna.util import env
-    env.iprofile = profile
-    def baseline_correct_via_noise(d, noise, g, baseline_corr_noise_threshold=1000):
-        """ Correct baseline shifts by evaluating position of the noise peak
-        :param: d: the data to correct, should be a single image
-        :param noise: the mean noise for the cell id of `d`
-        :param g: gain array matching `d` array
-        Correction is done by identifying the left-most (significant) peak
-        in the histogram of `d` and shifting it to be centered on 0.
-        This is done via a continous wavelet transform (CWT), using a Ricker
-        wavelet.
-        Only high gain data is evaulated, and data larger than 50 ADU,
-        equivalent of roughly a 9 keV photon is ignored.
-        Two passes are executed: the first shift is accurate to 10 ADU and
-        will catch baseline shifts smaller then -2000 ADU. CWT is evaluated
-        for peaks of widths one, three and five time the noise.
-        The baseline is then shifted by the position of the summed CWT maximum.
-        In a second pass histogram is evaluated within a range of
-        +- 5*noise of the initial shift value, for peak of width noise.
-        Baseline shifts larger than the maximum threshold or positive shifts
-        larger 10 are discarded, and the original data is returned, otherwise
-        the shift corrected data is returned.
-        """
-        import copy
-        from scipy.signal import cwt, ricker
-        # we work on a copy to be able to filter values by setting them to
-        # np.nan
-        dd = copy.copy(d)
-        dd[g != 0] = np.nan  # only high gain data
-        dd[dd > 50] = np.nan  # only noise data
-        h, e = np.histogram(dd, bins=210, range=(-2000, 100))
-        c = (e[1:] + e[:-1]) / 2
-        try:
+    if max_bins == 0:
-            cwtmatr = cwt(h, ricker, [noise, 3. * noise, 5. * noise])
+        max_bins = hist_data['nBins']
-        except:
-            return d
-        cwtall = np.sum(cwtmatr, axis=0)
-        marg = np.argmax(cwtall)
-        pc = c[marg]
-        high_res_range = (dd > pc - 5 * noise) & (dd < pc + 5 * noise)
-        dd[~high_res_range] = np.nan
-        h, e = np.histogram(dd, bins=200,
-                            range=(pc - 5 * noise, pc + 5 * noise))
-        c = (e[1:] + e[:-1]) / 2
-        try:
-            cwtmatr = cwt(h, ricker, [noise, ])
-        except:
-            return d
-        marg = np.argmax(cwtmatr)
-        pc = c[marg]
-        # too large shift to be easily decernable via noise
-        if pc > 10 or pc < -baseline_corr_noise_threshold:
-            return d
-        return d - pc
-    # function needs to be inline for parallell processing
-    def read_fun(filename, channel):
-        """ A reader function used by pyDetLib
-        """
-        infile = h5py.File(filename, "r", driver="core")
-        if rawversion == 2:
-            count = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)])
-            first = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first".format(instrument, channel)])
-            last_index = int(first[count != 0][-1]+count[count != 0][-1])
-            first_index = int(first[count != 0][0])
-        else:
-            status = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)])
-            if np.count_nonzero(status != 0) == 0:
-                return
-            last = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last".format(instrument, channel)])
-            last_index = int(last[status != 0][-1])
-            first_index = int(last[status != 0][0])
-        if limit_trains is not None:
-            last_index = min(limit_trains*memory_cells+first_index, last_index)
-        im = np.array(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(instrument, channel)][first_index:last_index,...])
-        carr = infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId".format(instrument, channel)][first_index:last_index]
-        cells = np.squeeze(np.array(carr))
-        infile.close()
-        if il_mode:
-            ga = im[1::2, 0, ...]
-            im = im[0::2, 0, ...].astype(np.float32)
-        else:
-            ga = im[:, 1, ...]
-            im = im[:, 0, ...].astype(np.float32)
-        im = np.rollaxis(im, 2)
+    hist_data['cellId'] = hist_data['cellId'][cell_range[0]:cell_range[1]]
-        im = np.rollaxis(im, 2, 1)
+    hist_data['hist'] = np.array(hf['hist'][cell_range[0]:cell_range[1], :max_bins, :])
-        ga = np.rollaxis(ga, 2)
+n_cells = cell_range[1]-cell_range[0]
-        ga = np.rollaxis(ga, 2, 1)
+hist_data['hist'] = hist_data['hist'].reshape(n_cells, max_bins, 512, 128)
-        return im, ga, cells
+hist_data['hist'] = hist_data['hist'][:,:,pixel_range[0]:pixel_range[2],pixel_range[1]:pixel_range[3]]
-    offset_cor = xcal.OffsetCorrection(sensor_size,
-                                       offset,
-                                       nCells=memory_cells,
-                                       blockSize=block_size,
-                                       gains=[0,1,2])
-    offset_cor.mapper = offset_cor._view.map_sync
-    offset_cor.debug() # force non-parallel processing since outer function will run concurrently
-    hist_calc = xcal.HistogramCalculator(sensor_size,
-                                         bins=4000,
-                                         range=(-4000, 8000),
-                                         blockSize=block_size)
-    hist_calc.mapper = hist_calc._view.map_sync
-    hist_calc.debug() # force non-parallel processing since outer function will run concurrently
-    for run in runs:
-        for seq in sequences:
-            fname = fbase.format(run, run.upper(), channel, seq)
-            d, ga, c = read_fun(fname, channel)
-            vidx = (c < offset.shape[2]) & (c < thresholds.shape[2])
-            c = c[vidx]
-            d = d[:,:,vidx]
-            ga = ga[:,:,vidx]
-            # we need to do proper gain thresholding
-            g = np.zeros(ga.shape, np.uint8)
-            g[...] = 2
-            g[ga < thresholds[...,c,1]] = 1
-            g[ga < thresholds[...,c,0]] = 0
-            d = offset_cor.correct(d, cellTable=c, gainMap=g)
-            for i in range(d.shape[2]):
-                mn_noise = np.nanmean(noise[...,c[i]])
-                d[...,i] = baseline_correct_via_noise(d[...,i],
-                                                      mn_noise,
-                                                      g[..., i])
-            d *= np.moveaxis(pc[c,...], 0, 2)
-            hist_calc.fill(d)
-    h, e, c, _ = hist_calc.get()
-    return h, e, c
-inp = []
-for i in modules:
-    qm = "Q{}M{}".format(i//4+1, i%4+1)
-    inp.append((i, offset_g[qm], thresholds_g[qm], pc_g[qm][0,...], noise_g[qm][...,0]))
-p = partial(hist_single_module, fbase, runs, sequences,
-            sensor_size, memory_cells, block_size, IL_MODE, limit_trains, profile, rawversion, instrument)
-res_uncorr = list(map(p, inp))
-```
-%% Cell type:code id: tags:
+print(f'Data shape {hist_data["hist"].shape}')
-``` python
+bin_edges = np.linspace(hist_data['hRange'][0], hist_data['hRange'][1], int(hist_data['nBins']+1))
-d = []
+x = (bin_edges[1:] + bin_edges[:-1])[:max_bins] * 0.5
-qms = []
-for i, r in enumerate(res_uncorr):
-    ii = list(modules)[i]
+batches = []
-    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
+for c_idx in range(0, n_cells, batch_size[0]):
-    qms.append(qm)
+    for x_idx in range(0, n_pixels_x, batch_size[1]):
-    h, e, c = r
+        for y_idx in range(0, n_pixels_y, batch_size[2]):
-    d.append({
+            batches.append([c_idx,x_idx,y_idx])
-            'x': c,
-            'y': h,
-            'drawstyle': 'steps-mid'
-        })
-fig = xana.simplePlot(d, y_log=False,
-                      figsize="2col",
-                      aspect=2,
-                      x_range=(-50, 500),
-                      x_label="Intensity (ADU)",
-                      y_label="Counts")
-fig.savefig("{}/FF_module_{}_peak_pos.png".format(out_folder, ",".join(qms)))
+print(f'Number of batches {len(batches)}')
 ```
 %% Cell type:code id: tags:
 ``` python
-# these should be quite stable
+def fit_batch(batch_start):
+    current_result = {}
+    prev = None
+    for c_idx, x_idx, y_idx in idx_gen(batch_start, batch_size):
+        try:
+            y = hist_data['hist'][c_idx, :, x_idx, y_idx]
-peak_estimates = [0, 55, 105, 155]
+            if prev is None:
-peak_heights = [5e7, 5e6, 1e6, 5e5]
+                prev, _ = get_starting_parameters(x, y, peak_range, n_peaks=n_peaks_fit)
-peak_sigma = [5., 5.,5., 5.,]
-print("Using the following peak estimates: {}".format(peak_estimates))
+            if fit_range == [0, 0]:
+                frange = (prev[f'g0mean']-2*prev[f'g0sigma'],
+                          prev[f'g{n_peaks_fit-1}mean'] + prev[f'g{n_peaks_fit-1}sigma'])
+            else:
+                frange = fit_range
+            set_par_limits(prev, peak_range, peak_norm_range,
+                           peak_width_range, n_peaks_fit)
+            minuit = fit_n_peaks(x, y, prev, frange,
+                                 do_minos=do_minos, n_peaks=n_peaks_fit,
+                                 fix_d01=fix_peaks)
+            ndof = np.rint(frange[1]-frange[0])-len(minuit.args)
+            current_result['chi2_ndof'] = minuit.fval/ndof
+            current_result.update(minuit.fitarg)
+            current_result.update(minuit.get_fmin())
+            fit_result['chi2_ndof'][c_idx, x_idx, y_idx] = current_result['chi2_ndof']
+            fit_result['g0mean'][c_idx, x_idx, y_idx] = current_result['g0mean']
+            fit_result['g1mean'][c_idx, x_idx, y_idx] = current_result['g1mean']
+            fit_result['g2mean'][c_idx, x_idx, y_idx] = current_result['g2mean']
+            fit_result['g3mean'][c_idx, x_idx, y_idx] = current_result['g3mean']
+            for key in minuit.fitarg.keys():
+                if key in fit_result:
+                    fit_result[key][c_idx, x_idx, y_idx] = minuit.fitarg[key]
+            fit_result['mask'][c_idx, x_idx, y_idx] = get_mask(current_result,
+                                                                    peak_lim,
+                                                                    d0_lim, chi2_lim,
+                                                                    peak_width_lim)
+        except Exception as e:
+            fit_result['mask'][c_idx, x_idx,
+                                    y_idx] = BadPixelsFF.FIT_FAILED.value
+            print(c_idx, x_idx, y_idx, e, traceback.format_exc())
+        if fit_result['mask'][c_idx, x_idx, y_idx] == 0:
+            prev = minuit.fitarg
+        else:
+            prev = None
 ```
 %% Cell type:markdown id: tags:
-## Calculate relative gain per module ##
+# Single fit ##
-Using the so obtained estimates, we calculate the relative gain per module. For this we use the weighted average method implemented in pyDetLib.
+Left plot shows starting parameters for fitting. Right plot shows result of the fit. Errors are evaluated with minos.
 %% Cell type:code id: tags:
 ``` python
-block_size = [64, 64]
+hist = hist_data['hist'][1,:,1, 1]
-def relgain_single_module(fbase, runs, sequences, peak_estimates,
+prev, shapes = get_starting_parameters(x, hist, peak_range, n_peaks=n_peaks_fit)
-                          peak_heights, peak_sigma, memory_cells, sensor_size,
-                          block_size, il_mode, profile, limit_trains_eval, rawversion, instrument, inp):
-    """ A function for calculated the relative gain of a single AGIPD module
-    """
-    # import needed inline for parallel processing
-    import XFELDetAna.xfelpycaltools as xcal
-    import numpy as np
-    import h5py
-    from XFELDetAna.util import env
-    env.iprofile = profile
-    channel, offset, thresholds, noise, pc = inp
-    def baseline_correct_via_noise(d, noise, g, baseline_corr_noise_threshold=1000):
-        """ Correct baseline shifts by evaluating position of the noise peak
-        :param: d: the data to correct, should be a single image
-        :param noise: the mean noise for the cell id of `d`
-        :param g: gain array matching `d` array
-        Correction is done by identifying the left-most (significant) peak
-        in the histogram of `d` and shifting it to be centered on 0.
-        This is done via a continous wavelet transform (CWT), using a Ricker
-        wavelet.
-        Only high gain data is evaulated, and data larger than 50 ADU,
-        equivalent of roughly a 9 keV photon is ignored.
-        Two passes are executed: the first shift is accurate to 10 ADU and
-        will catch baseline shifts smaller then -2000 ADU. CWT is evaluated
-        for peaks of widths one, three and five time the noise.
-        The baseline is then shifted by the position of the summed CWT maximum.
-        In a second pass histogram is evaluated within a range of
-        +- 5*noise of the initial shift value, for peak of width noise.
-        Baseline shifts larger than the maximum threshold or positive shifts
-        larger 10 are discarded, and the original data is returned, otherwise
-        the shift corrected data is returned.
-        """
-        import copy
-        from scipy.signal import cwt, ricker
-        # we work on a copy to be able to filter values by setting them to
-        # np.nan
-        dd = copy.copy(d)
-        dd[g != 0] = np.nan  # only high gain data
-        dd[dd > 50] = np.nan  # only noise data
-        h, e = np.histogram(dd, bins=210, range=(-2000, 100))
-        c = (e[1:] + e[:-1]) / 2
-        try:
+if fit_range == [0, 0]:
-            cwtmatr = cwt(h, ricker, [noise, 3. * noise, 5. * noise])
+    frange = (prev[f'g0mean']-2*prev[f'g0sigma'],
-        except:
+              prev[f'g3mean'] + prev[f'g3sigma'])
-            return d
+else:
-        cwtall = np.sum(cwtmatr, axis=0)
+    frange = fit_range
-        marg = np.argmax(cwtall)
-        pc = c[marg]
-        high_res_range = (dd > pc - 5 * noise) & (dd < pc + 5 * noise)
-        dd[~high_res_range] = np.nan
-        h, e = np.histogram(dd, bins=200,
-                            range=(pc - 5 * noise, pc + 5 * noise))
-        c = (e[1:] + e[:-1]) / 2
-        try:
-            cwtmatr = cwt(h, ricker, [noise, ])
-        except:
-            return d
-        marg = np.argmax(cwtmatr)
-        pc = c[marg]
-        # too large shift to be easily decernable via noise
-        if pc > 10 or pc < -baseline_corr_noise_threshold:
-            return d
-        return d - pc
-    # function needs to be inline for parallell processing
-    def read_fun(filename, channel):
-        """ A reader function used by pyDetLib
-        """
-        infile = h5py.File(filename, "r", driver="core")
-        if rawversion == 2:
-            count = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)])
-            first = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first".format(instrument, channel)])
-            last_index = int(first[count != 0][-1]+count[count != 0][-1])
-            first_index = int(first[count != 0][0])
-        else:
-            status = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)])
-            if np.count_nonzero(status != 0) == 0:
-                return
-            last = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last".format(instrument, channel)])
-            last_index = int(last[status != 0][-1])
-            first_index = int(last[status != 0][0])
-        if limit_trains is not None:
-            last_index = min(limit_trains*memory_cells+first_index, last_index)
-        im = np.array(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(instrument, channel)][first_index:last_index,...])
-        carr = infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId".format(instrument, channel)][first_index:last_index]
-        cells = np.squeeze(np.array(carr))
-        infile.close()
-        if il_mode:
-            ga = im[1::2, 0, ...]
-            im = im[0::2, 0, ...].astype(np.float32)
-        else:
-            ga = im[:, 1, ...]
-            im = im[:, 0, ...].astype(np.float32)
-        im = np.rollaxis(im, 2)
-        im = np.rollaxis(im, 2, 1)
-        ga = np.rollaxis(ga, 2)
+set_par_limits(prev, peak_range, peak_norm_range,
-        ga = np.rollaxis(ga, 2, 1)
+               peak_width_range, n_peaks=n_peaks_fit)
-        return im, ga, cells
+minuit = fit_n_peaks(x, hist, prev, frange,
+                     do_minos=True, n_peaks=n_peaks_fit,
-    offset_cor = xcal.OffsetCorrection(sensor_size, offset, nCells=memory_cells,
+                     fix_d01=fix_peaks)
-                                       blockSize=block_size, gains=[0,1,2])
-    offset_cor.mapper = offset_cor._view.map_sync
+res = minuit.fitarg
-    rel_gain = xcal.GainMapCalculator(sensor_size,
+err = minuit.errors
-                                      peak_estimates,
+p = minuit.args
-                                      peak_sigma,
+ya = np.arange(0,1e4)
-                                      nCells=memory_cells,
+y = gaussian_sum(x,n_peaks_fit, *p)
-                                      peakHeights = peak_heights,
+peak_colors = ['g', 'y', 'b', 'orange']
-                                      noiseMap=noise,
-                                      deviationType="relative")
+d = [{'x': x,
-    rel_gain.mapper = rel_gain._view.map_sync
+      'y': hist.astype(np.float64),
-    for run in runs:
+      'y_err': np.sqrt(hist),
-        for seq in sequences:
+      'drawstyle': 'bars',
-            fname = fbase.format(run, run.upper(), channel, seq)
+      'errorstyle': 'bars',
-            d, ga, c = read_fun(fname, channel)
+      'ecolor': 'navy',
-            # we need to do proper gain thresholding
+      'errorcoarsing': 3,
-            vidx = (c < offset.shape[2]) & (c < thresholds.shape[2])
+      'label': 'X-ray Data'
-            c = c[vidx]
+     },
-            d = d[:,:,vidx]
+     {'x': x,
-            ga = ga[:,:,vidx]
+      'y': y,
-            # we need to do proper gain thresholding
+      'y2': (hist-y)/np.sqrt(hist),
-            g = np.zeros(ga.shape, np.uint8)
+      'color': 'red',
-            g[...] = 2
+      'drawstyle':'line',
+      'drawstyle2': 'steps-mid',
-            g[ga < thresholds[...,c,1]] = 1
+      'label': 'Fit'
-            g[ga < thresholds[...,c,0]] = 0
+     },
-            d = offset_cor.correct(d, cellTable=c, gainMap=g)
+    ]
-            for i in range(d.shape[2]):
+for i in range(n_peaks_fit):
-                mn_noise = np.nanmean(noise[...,c[i]])
+    d.append({'x': x,
-                d[...,i] = baseline_correct_via_noise(d[...,i],
+             'y': gaussian_sum(x, 1, res[f'g{i}n'], res[f'g{i}mean'], res[f'g{i}sigma']),
-                                                      mn_noise,
+             'drawstyle':'line',
-                                                      g[..., i])
+             'color': peak_colors[i],
+             })
-            d *= np.moveaxis(pc[c,...], 0, 2)
+    d.append({'x': np.full_like(ya, res[f'g{i}mean']),
-            rel_gain.fill(d, cellTable=c)
+              'y': ya,
+              'drawstyle': 'line',
-    gain_map = rel_gain.get()
+              'linestyle': 'dashed',
-    gain_map_func = rel_gain.getUsingFunc(inverse=False)
+              'color': peak_colors[i],
+              'label': f'peak {i} = {res[f"g{i}mean"]:0.1f} $ \pm $ {err[f"g{i}mean"]:0.2f} ADU' })
-    pks, stds = rel_gain.getRawPeaks()
-    return gain_map, pks, stds, gain_map_func
+fig = plt.figure(figsize=(16,7))
+ax = fig.add_subplot(121)
-inp = []
+for i, shape in enumerate(shapes):
-for i in modules:
+    idx = shape[3]
-    qm = "Q{}M{}".format(i//4+1, i%4+1)
+    plt.errorbar(x[idx], hist[idx], np.sqrt(hist[idx]),
-    inp.append((i, offset_g[qm], thresholds_g[qm], noise_g[qm][...,0], pc_g[qm][0,...]))
+         marker='+', ls=''
-start = datetime.now()
+         )
-p = partial(relgain_single_module, fbase, runs, sequences,
+    yg = gaussian(x[idx], *shape[:3])
-            peak_estimates, peak_heights, peak_sigma, memory_cells,
+    l = f'Peak {i}: {shape[1]:0.1f} $ \pm $ {shape[2]:0.2f} ADU'
-            sensor_size, block_size, IL_MODE, profile, limit_trains_eval, rawversion, instrument)
+    plt.plot(x[idx], yg, label=l)
-res_gain = list(map(p, inp)) # don't run concurently as inner function are parelllized
+plt.grid(True)
-duration = (datetime.now()-start).total_seconds()
+plt.xlabel("Signal [ADU]")
-logger.runtime_summary_entry(success=True, runtime=duration)
+plt.ylabel("Counts")
-logger.send()
+plt.legend(ncol=2)
+ax2 = fig.add_subplot(122)
+fig2 = xana.simplePlot(d,
+                       use_axis=ax2,
+                       x_label='Signal [ADU]',
+                       y_label='Counts',
+                       secondpanel=True, y_log=False,
+                       x_range=(frange[0], frange[1]),
+                       y_range=(1., np.max(hist)*1.6),
+                       legend='top-left-frame-ncol2')
+print (minuit.get_fmin())
+minuit.print_matrix()
+print(minuit.get_param_states())
 ```
-%% Cell type:markdown id: tags:
-Finally, we inspect the results, by plotting the number of entries, peak locations and resulting gain maps for each peak. In the course of doing so, we identify bad pixels by either having 0 entries for a peak, or having `nan` values for the peak location.
 %% Cell type:code id: tags:
 ``` python
+# Allocate memory for fit results
-gain_m = {}
+fit_result = {}
-flatsff = {}
+keys = list(minuit.fitarg.keys())
-gainoff_g = {}
+keys = [x for x in keys if 'limit_' not in x and 'fix_' not in x]
-entries_g = {}
+keys += ['chi2_ndof', 'mask', 'gain']
-peaks_g = {}
+for key in keys:
-mask_g = {}
+    dtype = 'f4'
-cell_to_preview = 4
+    if key == 'mask':
-masks_eval_peaks = [1, 2]
+        dtype = 'i4'
-global_eval_peaks = [1]
+    fit_result[key] = sharedmem.empty([n_cells, n_pixels_x, n_pixels_y], dtype=dtype)
-global_meds = {}
-for i, r in enumerate(res_gain):
-    ii = list(modules)[i]
-    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
-    gain, pks, std, gfunc = r
-    gains, errors, chisq, valid, max_dev, stats = gfunc
-    _, entries, stds, sow = gain
-    gain_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells))
-    gain_mdb = np.zeros((gains.shape[0], gains.shape[1], memory_cells))
-    entries_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells, 5))
-    gainoff_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells))
-    mask_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells), np.uint32)
-    gainoff_g[qm] = gainoff_db
-    gain_m[qm] = gain_mdb
-    entries_g[qm] = entries_db
-    peaks_g[qm] = pks
-    # create a mask for unregular pixels
-    # first bit set if first peak has nan entries
-    for pk in masks_eval_peaks:
-        mask_db[~(np.isfinite(gain_mdb) & np.isfinite(gainoff_db))] |= BadPixels.FF_GAIN_EVAL_ERROR.value
-        mask_db[(np.abs(1-gain_mdb/np.nanmedian(gain_mdb) > deviation_threshold) )] |= BadPixels.FF_GAIN_DEVIATION.value
-    # second bit set if entries are 0 for first peak
-    mask_db[entries[...,1] == 0] |= BadPixels.FF_NO_ENTRIES.value
-    zero_oc = np.count_nonzero((entries > 0).astype(np.int), axis=3)
-    mask_db[zero_oc <= len(peak_estimates)-2] |= BadPixels.FF_NO_ENTRIES.value
-    # third bit set if entries of a given adc show significant noise
-    stds = []
-    for ii in range(8):
-        for jj in range(8):
-            stds.append(np.std(entries_db[ii*16:(ii+1)*16,jj*64+2:(jj+1)*64-2,:,1], axis=(0,1)))
-    avg_stds = np.median(np.array(stds), axis=0)
-    for ii in range(8):
-        for jj in range(8):
-            std = np.std(entries_db[ii*16:(ii+1)*16,jj*64+2:(jj+1)*64-2,:,1], axis=(0,1))
-            if np.any(std > 5*avg_stds):
-                mask_db[ii*16:(ii+1)*16,jj*64:(jj+1)*64,std > 5*avg_stds] |= BadPixels.FF_GAIN_DEVIATION
-    mask_g[qm] = mask_db
-    flat = np.zeros((gains.shape[0], gains.shape[1], memory_cells, 3))
-    for j in range(2,len(peak_estimates)):
-        flat[...,j-2] = np.mean(entries[...,j]/np.mean(entries[...,j]))
-    flat = np.mean(flat, axis=3)
-    #flat_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells))
-    #for j in range(2):
-    #    flat_db[...,j::2] = flat
-    flatsff[qm] = flat
-    global_meds[qm] = np.nanmedian(pks[...,global_eval_peaks][np.max(mask_db, axis=2) != 0])
 ```
-%% Cell type:markdown id: tags:
-## Evaluated peak locations ##
-The following plot shows the evaluated peak locations for each peak. Peak ids increase downward:
 %% Cell type:code id: tags:
 ``` python
-from mpl_toolkits.axes_grid1 import AxesGrid
+# Perform fitting
-cell_to_preview = 4
+with Pool() as pool:
-for qm, data in peaks_g.items():
+    const_out = pool.map(fit_batch, batches)
-    print("The module shown is: {}".format(qm))
-    print("The cell shown is: {}".format(cell_to_preview))
-    print("Evaluated peaks: {}".format(data.shape[-1]))
-    fig = plt.figure(figsize=(15,15))
-    grid = AxesGrid(fig, 111,
-                    nrows_ncols=(data.shape[-1], 1),
-                    axes_pad=0.1,
-                    share_all=True,
-                    label_mode="L",
-                    cbar_location="right",
-                    cbar_mode="each",
-                    cbar_size="7%",
-                    cbar_pad="2%")
-    for j in range(data.shape[-1]):
-        d = data[...,cell_to_preview,j]
-        d[~np.isfinite(d)] = 0
-        im = grid[j].imshow(d, interpolation="nearest", vmin=0.9*np.nanmedian(d), vmax=max(1.1*np.nanmedian(d), 50))
-        cb = grid.cbar_axes[j].colorbar(im)
-        cb.set_label_text("Peak location (ADU)")
 ```
-%% Cell type:markdown id: tags:
-## Gain Slopes And Offsets ##
-The gain slopes and offsets are deduced by fitting a linear function $y = mx + b$ to the evaluated peaks. Gains are normalized to all pixels and all memory cells of a module by using the average peak locations a $x$ values. Thus slopes centered around 1 are to be expected.
 %% Cell type:code id: tags:
 ``` python
-for i, r in enumerate(res_gain):
+# Evaluate bad pixels
-    ii = list(modules)[i]
+fit_result['gain'] = (fit_result['g1mean'] - fit_result['g0mean'])/photon_energy
-    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
-    gain, pks, std, gfunc = r
+# Calculate histogram width and evaluate cut
-    gains, errors, chisq, valid, max_dev, stats = gfunc
+h_sums = np.sum(hist_data['hist'], axis=1)
-    _, entries, stds, sow = gain
+hist_norm = hist_data['hist'] / h_sums[:, None, :, :]
+hist_mean = np.sum(hist_norm[:, :max_bins, ...] *
-    fig = plt.figure(figsize=(15,8))
+                   x[None, :, None, None], axis=1)
-    ax = fig.add_subplot(211)
+hist_sqr = (x[None, :, None, None] - hist_mean[:, None, ...])**2
-    im = ax.imshow(gains[...,0], interpolation="nearest", vmin=0.85, vmax=1.15)
+hist_std = np.sqrt(np.sum(hist_norm[:, :max_bins, ...] * hist_sqr, axis=1))
-    cb = fig.colorbar(im)
-    cb.set_label("Gain slope m")
+fit_result['mask'][hist_std<intensity_lim] |= BadPixelsFF.NO_ENTRY.value
-    fig.savefig("{}/gain_m_mod{}.png".format(out_folder, qm))
+# Bad pixel on gain deviation
-    ax = fig.add_subplot(212)
+gains = np.copy(fit_result['gain'])
-    ax.hist(gains[...,0].flatten(), range=(0.5, 1.5), bins=100)
+gains[fit_result['mask']>0] = np.nan
-    ax.set_ylabel("Occurences")
+gain_mean = np.nanmean(gains, axis=(1,2))
-    ax.set_xlabel("Gain slope m")
-    ax.semilogy()
+fit_result['mask'][fit_result['gain'] > gain_mean[:,None,None]*gain_lim[1] ] |=  BadPixelsFF.GAIN_DEVIATION.value
+fit_result['mask'][fit_result['gain'] < gain_mean[:,None,None]*gain_lim[0] ] |=  BadPixelsFF.GAIN_DEVIATION.value
 ```
-%% Cell type:markdown id: tags:
-The gain offsets b are expected to be centered around 0 and minimal, as data is already offset substracted.
 %% Cell type:code id: tags:
 ``` python
-for i, r in enumerate(res_gain):
+# Save fit results
-    ii = list(modules)[i]
+os.makedirs(out_folder, exist_ok=True)
-    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
+out_name = f'{out_folder}/fits_m{module:02d}.h5'
-    gain, pks, std, gfunc = r
+print(f'Save to file: {out_name}')
-    gains, errors, chisq, valid, max_dev, stats = gfunc
+save_dict_to_hdf5({'data': fit_result}, out_name)
-    _, entries, stds, sow = gain
-    fig = plt.figure(figsize=(15,8))
-    ax = fig.add_subplot(211)
-    im = ax.imshow(gains[...,1], interpolation="nearest", vmin=-25, vmax=25)
-    cb = fig.colorbar(im)
-    cb.set_label("Gain offset b")
-    fig.savefig("{}/gain_b_mod{}.png".format(out_folder, qm))
-    ax = fig.add_subplot(212)
-    ax.hist(gains[...,1].flatten(), range=(-25, 25), bins=100)
-    ax.set_ylabel("Occurences")
-    ax.set_xlabel("Gain offset b")
-    ax.semilogy()
 ```
 %% Cell type:markdown id: tags:
-## Bad Pixels ##
+## Summary across cells ##
-Bad pixels are defined as any of the following criteria:
-* the gain evaluation did not converge, or location of noise peak deviates by more than than the deviation threshold from the median location.
-* the number of entries for the noise peak in 0.
-* the standard deviation of the number of entries is larger than 5 times the standard deviation for the ASIC the pixel is on.
 %% Cell type:code id: tags:
 ``` python
-print("The deviation threshold is: {}".format(deviation_threshold))
+labels = ['Noise peak [ADU]',
+         'First photon peak [ADU]',
+         f"gain [ADU/keV], $\gamma$={photon_energy} [keV]",
+         "$\chi^2$/nDOF",
+         'Fraction of bad pixels']
+for i, key in enumerate(['g0mean', 'g1mean', 'gain', 'chi2_ndof', 'mask']):
+    fig = plt.figure(figsize=(20,5))
+    ax = fig.add_subplot(121)
+    data = fit_result[key]
+    if key == 'mask':
+        data = data>0
+        vmin, vmax = [0, 1]
+    else:
+        vmin, vmax = get_range(data, 5)
+    _ = heatmapPlot(np.mean(data, axis=0).T,
+                    add_panels=False, cmap='viridis', use_axis=ax,
+                    vmin=vmin, vmax=vmax, lut_label=labels[i] )
+    if key != 'mask':
+        vmin, vmax = get_range(data, 7)
+        ax1 = fig.add_subplot(122)
+        _ = xana.histPlot(ax1,data.flatten(),
+                      bins=45,range=[vmin, vmax],
+                      log=True,color='red',histtype='stepfilled')
+        plt.xlabel(labels[i])
+        plt.ylabel("Counts")
 ```
-%% Cell type:code id: tags:
+%% Cell type:markdown id: tags:
-``` python
+## histograms of fit parameters ##
-for i, r in enumerate(res_gain):
-    ii = list(modules)[i]
-    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
-    mask_db = mask_g[qm]
-    fig = plt.figure(figsize=(15,8))
-    ax = fig.add_subplot(211)
-    im = ax.imshow(np.log2(mask_db[...,cell_to_preview]), interpolation="nearest", vmin=0, vmax=32)
-    cb = fig.colorbar(im)
-    cb.set_label("Mask value")
-    fig.savefig("{}/mask_mod{}.png".format(out_folder, qm))
-    ax = fig.add_subplot(212)
-    ax.hist(np.log2(mask_db.flatten()), range=(0, 32), bins=32, normed=True)
-    ax.set_ylabel("Occurences")
-    ax.set_xlabel("Mask value")
-    ax.semilogy()
-```
 %% Cell type:code id: tags:
 ``` python
-cols = {BadPixels.FF_GAIN_EVAL_ERROR.value: (BadPixels.FF_GAIN_EVAL_ERROR.name, '#FF000080'),
+fig = plt.figure(figsize=(10, 5))
-        BadPixels.FF_GAIN_DEVIATION.value: (BadPixels.FF_GAIN_DEVIATION.name, '#0000FF80'),
+ax0 = fig.add_subplot(111)
-        BadPixels.FF_NO_ENTRIES.value: (BadPixels.FF_NO_ENTRIES.name, '#00FF0080'),
+a = ax0.hist(hist_std.flatten(), bins=100, range=(0,100) )
-        BadPixels.FF_GAIN_EVAL_ERROR.value |
+ax0.plot([intensity_lim, intensity_lim], [0, np.nanmax(a[0])], linewidth=1.5, color='red' )
-        BadPixels.FF_GAIN_DEVIATION.value: ('EVAL+DEV', '#DD00DD80'),
+ax0.set_xlabel('Histogram width [ADU]', fontsize=14)
-        BadPixels.FF_GAIN_EVAL_ERROR.value |
+ax0.set_ylabel('Number of histograms', fontsize=14)
-        BadPixels.FF_NO_ENTRIES.value: ('EVAL+NO_ENTRY', '#00DDDD80'),
+ax0.set_title(f'{hist_std[hist_std<intensity_lim].shape[0]} histograms below threshold in {intensity_lim} ADU',
-        BadPixels.FF_GAIN_DEVIATION.value |
+              fontsize=14, fontweight='bold')
-        BadPixels.FF_NO_ENTRIES.value: ('DEV+NO_ENTRY', '#DDDD0080'),
+ax0.grid()
-       }
+plt.yscale('log')
-rebin = 32 if not high_res_badpix_3d else 2
-gain = 0
-for mod, data in mask_g.items():
-    plot_badpix_3d(data, cols, title=mod, rebin_fac=rebin)
 ```
 %% Cell type:code id: tags:
 ``` python
-if local_output:
+num_bins = int(frange[1] - frange[0])
-    ofile = "{}/agipd_gain_store_{}_modules_{}.h5".format(out_folder,
+fig = plt.figure(figsize=(16,5))
-                                                          "_".join([str(r) for r in runs]),
+ax = fig.add_subplot(131)
-                                                          "_".join([str(m) for m in modules]))
+_ = xana.histPlot(ax,fit_result['g1mean'].flatten(),
-    store_file = h5py.File(ofile, "w")
+                  bins= num_bins,range=[frange[0] ,frange[1]],
-    for i, r in enumerate(res_gain):
+                  log=True,color='red',histtype='stepfilled')
-        ii = list(modules)[i]
+plt.xlabel("g1 mean [ADU]")
-        qm = "Q{}M{}".format(ii//4+1, ii%4+1)
-        gain, pks, std, gfunc = r
-        gains, errors, chisq, valid, max_dev, stats = gfunc
-        gainmap, entires, stds, sow = gain
-        store_file["/{}/Gain/0/data".format(qm)] = gains[...,0]
-        store_file["/{}/GainOffset/0/data".format(qm)] = gains[...,1]
-        store_file["/{}/Flat/0/data".format(qm)] = flatsff[qm]
-        store_file["/{}/Entries/0/data".format(qm)] = entires
-        store_file["/{}/BadPixels/0/data".format(qm)] = mask_g[qm]
-    store_file.close()
-```
-%% Cell type:code id: tags:
+ax1 = fig.add_subplot(132)
+_ = xana.histPlot(ax1,fit_result['g2mean'].flatten(),
+#                  bins=45,range=[80 ,140],
+                  bins= num_bins,range=[frange[0] ,frange[1]],
+                  log=True,color='red',histtype='stepfilled')
+plt.xlabel("g2 mean [ADU]")
-``` python
+ax2 = fig.add_subplot(133)
-proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
+_ = xana.histPlot(ax2,fit_result['g3mean'].flatten(),
-file_loc = proposal + ' ' + ' '.join(list(map(str,runs)))
+                  bins= num_bins,range=[frange[0] ,frange[1]],
+                  log=True,color='red',histtype='stepfilled')
+_ = plt.xlabel("g3 mean [ADU]")
 ```
 %% Cell type:code id: tags:
 ``` python
-if db_output:
+fig = plt.figure(figsize=(16,5))
-    for i, r in enumerate(res_gain):
+ax = fig.add_subplot(131)
-        ii = list(modules)[i]
+_ = xana.histPlot(ax,fit_result['g0sigma'].flatten(),
-        qm = "Q{}M{}".format(ii//4+1, ii%4+1)
+                  bins=45,range=[0 ,50],
+                  log=True,color='red',histtype='stepfilled')
-        gain, pks, std, gfunc = r
+plt.xlabel("g1 sigma [ADU]")
-        gains, errors, chisq, valid, max_dev, stats = gfunc
-        gainmap, entires, stds, sow = gain
-        det = getattr(Detectors, dinstance)
-        device = getattr(det, qm)
-        # gains related
-        metadata = ConstantMetaData()
-        cgain = Constants.AGIPD.SlopesFF()
-        cgain.data = gains
-        metadata.calibration_constant = cgain
-        # set the operating condition
-        condition = Conditions.Illuminated.AGIPD(memory_cells, bias_voltage, 9.2,
-                                                 pixels_x=512, pixels_y=128, beam_energy=None,
-                                                 acquisition_rate=acqrate, gain_setting=gain_setting)
-        metadata.detector_condition = condition
-        # specify the a version for this constant
-        if creation_time is None:
-            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
-        else:
-            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
-        metadata.calibration_constant_version.raw_data_location = file_loc
+ax1 = fig.add_subplot(132)
-        metadata.send(cal_db_interface, timeout=300000)
+_ = xana.histPlot(ax1,fit_result['g1sigma'].flatten(),
+                  bins=45,range=[0 ,50],
+                  log=True,color='red',histtype='stepfilled')
+plt.xlabel("g2 sigma [ADU]")
+ax2 = fig.add_subplot(133)
-        # bad pixels
+_ = xana.histPlot(ax2,fit_result['g2sigma'].flatten(),
-        metadata = ConstantMetaData()
+                  bins=45,range=[0 ,50],
-        bp = Constants.AGIPD.BadPixelsFF()
+                  log=True,color='red',histtype='stepfilled')
-        bp.data = mask_g[qm]
+_ = plt.xlabel("g3 sigma [ADU]")
-        metadata.calibration_constant = bp
-        # set the operating condition
-        condition = Conditions.Illuminated.AGIPD(memory_cells, bias_voltage, 9.2,
-                                                 pixels_x=512, pixels_y=128, beam_energy=None,
-                                                 acquisition_rate=acqrate, gain_setting=gain_setting)
-        metadata.detector_condition = condition
-        # specify the a version for this constant
-        if creation_time is None:
-            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
-        else:
-            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
-        metadata.calibration_constant_version.raw_data_location = file_loc
-        metadata.send(cal_db_interface, timeout=300000)
 ```
 %% Cell type:markdown id: tags:
-## Sanity check ##
+## Summary across pixels ##
-Finally, we perform a correction of the data used to derive the gain constants with said constants. We expected an improvement in peak FWHM, if constants have been deduced correctly.
+Mean and median values are calculated across all pixels for each memory cell.
 %% Cell type:code id: tags:
 ``` python
-def hist_single_module(fbase, runs, sequences, il_mode, profile, limit_trains, memory_cells, rawversion, instrument, inp):
+def plot_error_band(key, x, ax):
-    channel, offset, thresholds, relgain, noise, pc = inp
-    gain, pks, std, gfunc = relgain
-    gains, errors, chisq, valid, max_dev, stats = gfunc
-    import XFELDetAna.xfelpycaltools as xcal
-    import numpy as np
-    import h5py
-    import copy
-    from XFELDetAna.util import env
-    env.iprofile = profile
-    sensor_size = [128, 512]
-    block_size = sensor_size
-    def baseline_correct_via_noise(d, noise, g, baseline_corr_noise_threshold=1000):
-        """ Correct baseline shifts by evaluating position of the noise peak
-        :param: d: the data to correct, should be a single image
-        :param noise: the mean noise for the cell id of `d`
-        :param g: gain array matching `d` array
-        Correction is done by identifying the left-most (significant) peak
-        in the histogram of `d` and shifting it to be centered on 0.
-        This is done via a continous wavelet transform (CWT), using a Ricker
-        wavelet.
-        Only high gain data is evaulated, and data larger than 50 ADU,
-        equivalent of roughly a 9 keV photon is ignored.
-        Two passes are executed: the first shift is accurate to 10 ADU and
-        will catch baseline shifts smaller then -2000 ADU. CWT is evaluated
-        for peaks of widths one, three and five time the noise.
-        The baseline is then shifted by the position of the summed CWT maximum.
-        In a second pass histogram is evaluated within a range of
-        +- 5*noise of the initial shift value, for peak of width noise.
-        Baseline shifts larger than the maximum threshold or positive shifts
-        larger 10 are discarded, and the original data is returned, otherwise
-        the shift corrected data is returned.
-        """
-        import copy
-        from scipy.signal import cwt, ricker
-        # we work on a copy to be able to filter values by setting them to
-        # np.nan
-        dd = copy.copy(d)
-        dd[g != 0] = np.nan  # only high gain data
-        dd[dd > 50] = np.nan  # only noise data
-        h, e = np.histogram(dd, bins=210, range=(-2000, 100))
-        c = (e[1:] + e[:-1]) / 2
-        try:
+    cdata = np.copy(fit_result[key])
-            cwtmatr = cwt(h, ricker, [noise, 3. * noise, 5. * noise])
+    cdata[fit_result['mask']>0] = np.nan
-        except:
-            return d
-        cwtall = np.sum(cwtmatr, axis=0)
-        marg = np.argmax(cwtall)
-        pc = c[marg]
-        high_res_range = (dd > pc - 5 * noise) & (dd < pc + 5 * noise)
-        dd[~high_res_range] = np.nan
-        h, e = np.histogram(dd, bins=200,
-                            range=(pc - 5 * noise, pc + 5 * noise))
-        c = (e[1:] + e[:-1]) / 2
-        try:
-            cwtmatr = cwt(h, ricker, [noise, ])
-        except:
-            return d
-        marg = np.argmax(cwtmatr)
-        pc = c[marg]
-        # too large shift to be easily decernable via noise
-        if pc > 10 or pc < -baseline_corr_noise_threshold:
-            return d
-        return d - pc
-    def read_fun(filename, channel):
-        infile = h5py.File(filename, "r", driver="core")
-        if rawversion == 2:
-            count = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)])
-            first = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first".format(instrument, channel)])
-            last_index = int(first[count != 0][-1]+count[count != 0][-1])
-            first_index = int(first[count != 0][0])
-        else:
-            status = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)])
-            if np.count_nonzero(status != 0) == 0:
-                return
-            last = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last".format(instrument, channel)])
-            last_index = int(last[status != 0][-1])
-            first_index = int(last[status != 0][0])
-        if limit_trains is not None:
-            last_index = min(limit_trains*memory_cells+first_index, last_index)
-        im = np.array(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(instrument, channel)][first_index:last_index,...])
-        carr = infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId".format(instrument, channel)][first_index:last_index]
-        cells = np.squeeze(np.array(carr))
-        infile.close()
-        if il_mode:
-            ga = im[1::2, 0, ...]
-            im = im[0::2, 0, ...].astype(np.float32)
-        else:
-            ga = im[:, 1, ...]
-            im = im[:, 0, ...].astype(np.float32)
-        im = np.rollaxis(im, 2)
-        im = np.rollaxis(im, 2, 1)
-        ga = np.rollaxis(ga, 2)
-        ga = np.rollaxis(ga, 2, 1)
-        return im, ga, cells
-    offset_cor = xcal.OffsetCorrection(sensor_size, offset, nCells=memory_cells, blockSize=block_size, gains=[0,1,2])
-    offset_cor.debug()
-    hist_calc = xcal.HistogramCalculator(sensor_size, bins=2000, range=(0, 2000), blockSize=block_size)
-    hist_calc.debug()
-    hist_calc_uncorr = xcal.HistogramCalculator(sensor_size, bins=2000, range=(0, 2000), blockSize=block_size)
-    hist_calc_uncorr.debug()
-    for run in runs:
-        for seq in sequences[:2]:
-            fname = fbase.format(run, run.upper(), channel, seq)
+    mean = np.nanmean(cdata, axis=(1,2))
+    median = np.nanmedian(cdata, axis=(1,2))
+    std = np.nanstd(cdata, axis=(1,2))
+    mad = np.nanmedian(np.abs(cdata - median[:,None,None]), axis=(1,2))
-            d, ga, c = read_fun(fname, channel)
+    ax0 = fig.add_subplot(111)
+    ax0.plot(x, mean, 'k', color='#3F7F4C', label=" mean value ")
+    ax0.plot(x, median, 'o', color='red', label=" median value ")
+    ax0.fill_between(x, mean-std, mean+std,
+                     alpha=0.6, edgecolor='#3F7F4C', facecolor='#7EFF99',
+                     linewidth=1, linestyle='dashdot', antialiased=True,
+                     label=" mean value $ \pm $ std ")
-            # we need to do proper gain thresholding
+    ax0.fill_between(x, median-mad, median+mad,
-            vidx = (c < offset.shape[2]) & (c < thresholds.shape[2])
+                     alpha=0.3, edgecolor='red', facecolor='red',
-            c = c[vidx]
+                     linewidth=1, linestyle='dashdot', antialiased=True,
-            d = d[:,:,vidx]
+                     label=" median value $ \pm $ mad ")
-            ga = ga[:,:,vidx]
-            g = np.zeros(ga.shape, np.uint8)
+    if f'error_{key}' in fit_result:
-            g[...] = 2
+        cerr = np.copy(fit_result[f'error_{key}'])
+        cerr[fit_result['mask']>0] = np.nan
-            g[ga < thresholds[...,c,1]] = 1
+        meanerr = np.nanmean(cerr, axis=(1,2))
-            g[ga < thresholds[...,c,0]] = 0
+        ax0.fill_between(x, mean-meanerr, mean+meanerr,
-            d = offset_cor.correct(d, cellTable=c, gainMap=g)
+                 alpha=0.6, edgecolor='#089FFF', facecolor='#089FFF',
-            for i in range(d.shape[2]):
+                 linewidth=1, linestyle='dashdot', antialiased=True,
-                mn_noise = np.nanmean(noise[...,c[i]])
+                 label=" mean fit error ")
-                d[...,i] = baseline_correct_via_noise(d[...,i],
-                                                      mn_noise,
-                                                      g[..., i])
-            d *= np.moveaxis(pc[c,...], 0, 2)
-            hist_calc_uncorr.fill(d)
+x = np.linspace(*cell_range, n_cells)
-            d = (d-gains[..., 1][...,None])/gains[..., 0][...,None]
-            #d = d/gains[..., 0][...,None]
-            hist_calc.fill(d)
-    h, e, c, _ = hist_calc.get()
+for i, key in enumerate(['g0mean', 'g1mean', 'gain', 'chi2_ndof']):
-    hu  = hist_calc_uncorr.get()
-    return h, e, c, hu[0]
-inp = []
+    fig = plt.figure(figsize=(10, 5))
-idx = 0
+    ax0 = fig.add_subplot(111)
-for i in modules:
+    plot_error_band(key, x, ax0)
-    qm = "Q{}M{}".format(i//4+1, i%4+1)
-    inp.append((i, offset_g[qm], thresholds_g[qm], res_gain[idx], noise_g[qm][...,0], pc_g[qm][0,...]))
+    ax0.set_xlabel('Memory Cell ID', fontsize=14)
-    idx += 1
+    ax0.set_ylabel(labels[i], fontsize=14)
+    ax0.grid()
-p = partial(hist_single_module, fbase, runs, sequences, IL_MODE, profile, limit_trains, memory_cells, rawversion, instrument)
+    _ = ax0.legend()
-#res = view.map_sync(p, inp)
-res = list(map(p, inp))
 ```
-%% Cell type:code id: tags:
+%% Cell type:markdown id: tags:
-``` python
-from iminuit import Minuit
-from iminuit.util import make_func_code, describe
-from IPython.display import HTML, display
-import tabulate
-# fitting
-par_ests = {}
-par_ests["mu0"] = 0
-par_ests["mu1"] = 50
-par_ests["mu2"] = 100
-par_ests["limit_mu0"] = [-5, 5]
-par_ests["limit_mu1"] = [35, 65]
-par_ests["limit_mu2"] = [100, 150]
-par_ests["s0"] = 10
-par_ests["s1"] = 10
-par_ests["s2"] = 10
-par_ests["limit_A0"] = [1e5, 1e12]
-par_ests["limit_A1"] = [1e4, 1e10]
-par_ests["limit_A2"] = [1e3, 1e10]
-par_ests["throw_nan"] = False
-par_ests["pedantic"] = False
-par_ests["print_level"] = 1
-def gaussian3(x, mu0, s0, A0, mu1, s1, A1, mu2, s2, A2):
-    return (A0/np.sqrt(2*np.pi*s0**2)*np.exp(-0.5*((x-mu0)/s0)**2) +
-            A1/np.sqrt(2*np.pi*s1**2)*np.exp(-0.5*((x-mu1)/s1)**2) +
-            A2/np.sqrt(2*np.pi*s2**2)*np.exp(-0.5*((x-mu2)/s2)**2))
-f_sig = describe(gaussian3)[1:]
-class _Chi2Functor:
-    def __init__(self, f, x, y, err):
-        self.f = f
-        self.x = x
-        self.y = y
-        self.err = err
-        f_sig = describe(f)
-        # this is how you fake function
-        # signature dynamically
-        self.func_code = make_func_code(
-            f_sig[1:])  # docking off independent variable
-        self.func_defaults = None  # this keeps numpy.vectorize happy
-    def __call__(self, *arg):
-        # notice that it accept variable length
-        # positional arguments
-        # chi2 = sum((y-self.f(x,*arg))**2 for x,y in zip(self.x,self.y))
-        return np.sum(((self.f(self.x, *arg) - self.y) ** 2) / self.err)
-d = []
-y_range_max = 0
-table = []
-headers = ['Module',
-           'FWHM (cor.) [ADU]', 'Separation (cor.) [$\sigma$]',
-           'FWHM (uncor.) [ADU]', 'Separation (uncor.) [$\sigma$]',
-           'Improvement'
-          ]
-for i, r in enumerate(res):
-    qm = "Q{}M{}".format(i//4+1, i%4+1)
-    row = []
-    row.append(qm)
-    h, e, c, hu = r
-    d.append({
-            'x': c,
-            'y': h,
-            'drawstyle': 'steps-mid',
-            'label': '{}: corrected'.format(qm),
-            'marker': '.',
-            'color': 'blue',
-        })
-    idx = (h > 0) & np.isfinite(h)
-    h_non_zero = h[idx]
-    c_non_zero = c[idx]
-    par_ests["A0"] = np.float(h[np.argmin(abs(c-0))])
-    par_ests["A1"] = np.float(h[np.argmin(abs(c-50))])
-    par_ests["A2"] = np.float(h[np.argmin(abs(c-100))])
-    wrapped = _Chi2Functor(gaussian3, c_non_zero, h_non_zero,
-                                     np.sqrt(h_non_zero))
-    m = Minuit(wrapped, **par_ests)
-    fmin = m.migrad()
-    xt = np.arange(0, 200)
-    yt = gaussian3(xt, m.values['mu0'],  m.values['s0'],  m.values['A0'],
-                   m.values['mu1'],  m.values['s1'],  m.values['A1'],
-                   m.values['mu2'],  m.values['s2'],  m.values['A2'])
-    d.append({
-            'x': xt,
-            'y': yt,
-            'label': '{}: corrected (fit)'.format(qm),
-            'color': 'green',
-            'drawstyle': 'line',
-            'linewidth': 2
-        })
-    d.append({
-            'x': c,
-            'y': hu,
-            'drawstyle': 'steps-mid',
-            'label': '{}: uncorrected'.format(qm),
-            'marker': '.',
-            'color': 'grey',
-            'alpha': 0.5
-        })
-    row += [m.values['s1']*2.35, (m.values['mu1']-m.values['mu0'])/m.values['s1']]
-    idx = (hu > 0) & np.isfinite(hu)
-    h_non_zero = hu[idx]
-    c_non_zero = c[idx]
-    wrapped = _Chi2Functor(gaussian3, c_non_zero, h_non_zero,
-                                     np.sqrt(h_non_zero))
-    m = Minuit(wrapped, **par_ests)
-    fmin = m.migrad()
-    xt = np.arange(0, 200)
-    yt = gaussian3(xt, m.values['mu0'],  m.values['s0'],  m.values['A0'],
-                   m.values['mu1'],  m.values['s1'],  m.values['A1'],
-                   m.values['mu2'],  m.values['s2'],  m.values['A2'])
-    d.append({
-            'x': xt,
-            'y': yt,
-            'label': '{}: uncorrected (fit)'.format(qm),
-            'color': 'red',
-            'linewidth': 2
-        })
-    row += [m.values['s1']*2.35, (m.values['mu1']-m.values['mu0'])/m.values['s1']]
-    row.append("{:0.2f} %".format(100*(row[3]/row[1]-1)))
-    y_range_max = max(y_range_max, np.max(h[c>25])*1.5)
-    # output table
-    table.append(row)
-fig = xana.simplePlot(d, y_log=False, figsize="2col",
-                      aspect=2,
-                      x_range=(0, 200),
-                      legend='top-right-frame',
-                      y_range=(0, y_range_max),
-                      x_label='Energy (ADU)',
-                      y_label='Counts')
-display(HTML(tabulate.tabulate(table, tablefmt='html', headers=headers,
+## Cut flow ##
-                               numalign="right", floatfmt="0.2f")))
-```
 %% Cell type:code id: tags:
 ``` python
+fig = plt.figure(figsize=(10, 5))
+ax = fig.add_subplot(111)
+n_bars = 8
+x = np.arange(n_bars)
+width = 0.3
+msk = fit_result['mask']
+n_fits = np.prod(msk.shape)
+y = [any_in(msk, BadPixelsFF.FIT_FAILED.value),
+     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value),
+     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
+           BadPixelsFF.CHI2_THRESHOLD.value),
+     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
+           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value),
+     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
+           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
+           BadPixelsFF.NOISE_PEAK_THRESHOLD.value),
+     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
+           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
+           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value),
+     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
+           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
+           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value
+           | BadPixelsFF.NO_ENTRY.value),
+     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
+           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
+           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value
+           | BadPixelsFF.NO_ENTRY.value| BadPixelsFF.GAIN_DEVIATION.value)
+    ]
+y2 = [any_in(msk, BadPixelsFF.FIT_FAILED.value),
+     any_in(msk, BadPixelsFF.ACCURATE_COVAR.value),
+     any_in(msk, BadPixelsFF.CHI2_THRESHOLD.value),
+     any_in(msk, BadPixelsFF.GAIN_THRESHOLD.value),
+     any_in(msk, BadPixelsFF.NOISE_PEAK_THRESHOLD.value),
+     any_in(msk, BadPixelsFF.PEAK_WIDTH_THRESHOLD.value),
+     any_in(msk, BadPixelsFF.NO_ENTRY.value),
+     any_in(msk, BadPixelsFF.GAIN_DEVIATION.value)
+    ]
+y = (1 - np.sum(y, axis=(1,2,3))/n_fits)*100
+y2 = (1 - np.sum(y2, axis=(1,2,3))/n_fits)*100
+labels = ['Fit failes',
+         'Accurate covar',
+         'Chi2/nDOF',
+         'Gain',
+         'Noise peak',
+         'Peak width',
+         'No Entry',
+         'Gain deviation']
+ax.bar(x, y2, width, label='Only this cut')
+ax.bar(x, y, width, label='Cut flow')
+plt.xticks(x, labels, rotation=90)
+plt.ylim(y[5]-0.5,100)
+plt.grid(True)
+plt.legend()
+plt.show()
 ```

--- a/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_Summary.ipynb
+++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_Summary.ipynb
+%% Cell type:markdown id: tags:
+# Gain Characterization Summary #
+%% Cell type:code id: tags:
+``` python
+in_folder = "" # in this notebook, in_folder is not used as the data source is in the destination folder
+out_folder = "/gpfs/exfel/exp/SPB/202030/p900138/scratch/karnem/r0203_r0204_v02"  # the folder to output to, required
+hist_file_template = "hists_m{:02d}_sum.h5"
+modules = [10] # modules to correct, set to -1 for all, range allowed
+image_data_path = ""
+run = 449 # runs of image data used to create histograms
+karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
+karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
+receiver_id = "{}CH0" # inset for receiver devices
+path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
+h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
+h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images
+h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information
+karabo_id_control = "MID_IRU_AGIPD1M1" # karabo-id for control device
+karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation
+use_dir_creation_date = True # use the creation data of the input dir for database queries
+cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
+cal_db_timeout = 30000 # in milli seconds
+local_output = True # output constants locally
+db_output = False # output constants to database
+# Fit parameters
+peak_range = [-30,30,35,65,80,130,145,200] # where to look for the peaks, [a0, b0, a1, b1, ...] exactly 8 elements
+peak_width_range = [0, 30, 0, 35, 0, 40, 0, 45] # fit limits on the peak widths, [a0, b0, a1, b1, ...] exactly 8 elements
+# Bad-pixel thresholds
+d0_lim = [10, 70] # hard limits for d0 value (distance between noise and first peak)
+peak_width_lim = [0.97, 1.43, 1.03, 1.57] # hard limits on the peak widths, [a0, b0, a1, b1, ...] in units of the noise peak. 4 parameters.
+chi2_lim = [0,3.0] # Hard limit on chi2/nDOF value
+cell_range = [1,5] # range of cell to be considered, [0,0] for all
+pixel_range = [0,0,512,128] # range of pixels x1,y1,x2,y2 to consider [0,0,512,128] for all
+max_bins = 250 # Maximum number of bins to consider
+batch_size = [1,8,8] # batch size: [cell,x,y]
+n_peaks_fit = 4 # Number of gaussian peaks to fit including noise peak
+fix_peaks = True # Fix distance between photon peaks
+# Detector conditions
+max_cells = 0 # number of memory cells used, set to 0 to automatically infer
+bias_voltage = 300 # Bias voltage
+acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
+gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
+photon_energy = 9.2 # photon energy in keV
+```
+%% Cell type:code id: tags:
+``` python
+import glob
+from multiprocessing import Pool
+import re
+import os
+import traceback
+import warnings
+from dateutil import parser
+from extra_data import RunDirectory, stack_detector_data
+from extra_geom import AGIPD_1MGeometry
+import h5py
+from iCalibrationDB import Detectors, Conditions, Constants
+from iminuit import Minuit
+from IPython.display import HTML, display, Markdown, Latex
+import matplotlib.pyplot as plt
+import numpy as np
+import tabulate
+from XFELDetAna.plotting.heatmap import heatmapPlot
+from XFELDetAna.plotting.simpleplot import simplePlot
+from cal_tools.ana_tools import get_range, save_dict_to_hdf5
+from cal_tools.agipdlib import get_acq_rate, get_bias_voltage, get_gain_setting, get_num_cells
+from cal_tools.agipdutils_ff import any_in, fit_n_peaks, gaussian_sum, get_starting_parameters
+from cal_tools.tools import get_dir_creation_date, send_to_db
+from cal_tools.enums import BadPixels, BadPixelsFF
+%matplotlib inline
+warnings.filterwarnings('ignore')
+```
+%% Cell type:code id: tags:
+``` python
+# Get operation conditions
+filename = glob.glob(f"{image_data_path}/r{run:04d}/*-AGIPD[0-1][0-9]-*")[0]
+channel = int(re.findall(r".*-AGIPD([0-9]+)-.*", filename)[0])
+control_fname = f'{image_data_path}/r{run:04d}/RAW-R{run:04d}-{karabo_da_control}-S00000.h5'
+# Evaluate number of memory cells
+mem_cells = get_num_cells(filename, karabo_id, channel)
+if mem_cells is None:
+    raise ValueError(f"No raw images found in {filename}")
+# Evaluate aquisition rate
+if acq_rate == 0.:
+    acq_rate = get_acq_rate((filename, karabo_id, channel))
+# Evaluate creation time
+creation_time = None
+if use_dir_creation_date:
+    creation_time = get_dir_creation_date(image_data_path, run)
+# Evaluate gain setting
+if gain_setting == 0.1:
+    if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):
+        print("Set gain-setting to None for runs taken before 2020-01-31")
+        gain_setting = None
+    else:
+        try:
+            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
+        except Exception as e:
+            print(f'Error while reading gain setting from: \n{control_fname}')
+            print(e)
+            print("Set gain settion to 0")
+            gain_setting = 0
+# Evaluate detector instance for mapping
+instrument = karabo_id.split("_")[0]
+if instrument == "SPB":
+    dinstance = "AGIPD1M1"
+else:
+    dinstance = "AGIPD1M2"
+print(f"Using {creation_time} as creation time")
+print(f"Operating conditions are:\n• Bias voltage: {bias_voltage}\n• Memory cells: {mem_cells}\n"
+              f"• Acquisition rate: {acq_rate}\n• Gain setting: {gain_setting}\n• Photon Energy: {photon_energy}\n")
+```
+%% Cell type:code id: tags:
+``` python
+# Load constants for all modules
+keys = ['g0mean', 'g1mean', 'gain', 'chi2_ndof', 'mask']
+fit_data = {}
+labels = {'g0mean': 'Noise peak position [ADU]',
+          'g1mean': 'First photon peak [ADU]',
+          'gain': f"Gain [ADU/keV], $\gamma$={photon_energy} [keV]",
+          'chi2_ndof': '$\chi^2$/nDOF',
+          'mask': 'Fraction of bad pixels over cells' }
+modules = []
+for mod in range(16):
+    qm = f"Q{mod // 4 + 1}M{mod % 4 + 1}"
+    fit_data[mod] = {}
+    try:
+        hf = h5py.File(f'{out_folder}/fits_m{mod:02d}.h5', 'r')
+        shape = hf['data/g0mean'].shape
+        for key in keys:
+            fit_data[mod][key] = hf[f'data/{key}'][()]
+        #print(shape)
+        print(f"{in_folder}/{hist_file_template.format(mod)}")
+        modules.append(mod)
+    except Exception as e:
+        err = f"Error: {e}\nError traceback: {traceback.format_exc()}"
+        print(f"No fit data available for module {qm}")
+```
+%% Cell type:code id: tags:
+``` python
+# Calculate SlopesFF and BadPixels to be send to DB
+bpmask = {}
+slopesFF = {}
+for mod in modules:
+    bpmask[mod] = np.zeros(fit_data[mod]['mask'].shape).astype(np.int32)
+    bpmask[mod][ any_in(fit_data[mod]['mask'], BadPixelsFF.NO_ENTRY.value) ] = BadPixels.FF_NO_ENTRIES.value
+    bpmask[mod][ any_in(fit_data[mod]['mask'],
+                        BadPixelsFF.GAIN_DEVIATION.value) ] |= BadPixels.FF_GAIN_DEVIATION.value
+    bpmask[mod][ any_in(fit_data[mod]['mask'],
+                        BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
+                        BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
+                        BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value) ] |= BadPixels.FF_GAIN_EVAL_ERROR.value
+    # Set value for bad pixel to average across pixels for a given module
+    slopesFF[mod] = np.copy(fit_data[mod]['gain'])
+    slopesFF[mod][fit_data[mod]['mask']>0] = np.nan
+    gain_mean = np.nanmean(slopesFF[mod], axis=(1,2))
+    for i in range(slopesFF[mod].shape[0]):
+        slopesFF[mod][i][ fit_data[mod]['mask'][i] > 0 ] = gain_mean[i]
+```
+%% Cell type:code id: tags:
+``` python
+# Send constants to DB
+def send_const(mod):
+    try:
+        device = getattr(getattr(Detectors, dinstance), f'Q{mod // 4 + 1}M{mod % 4 + 1}')
+        # gain
+        constant = Constants.AGIPD.SlopesFF()
+        constant.data = np.moveaxis(np.moveaxis(slopesFF[mod],0,2),0,1)
+        send_to_db(device, constant, condition, file_loc,
+                   cal_db_interface, creation_time,
+                   verbosity=1, timeout=cal_db_timeout)
+        # bad pixels
+        constant_bp = Constants.AGIPD.BadPixelsFF()
+        constant_bp.data = np.moveaxis(np.moveaxis(bpmask[mod],0,2),0,1)
+        send_to_db(device, constant_bp, condition, file_loc,
+                   cal_db_interface, creation_time,
+                   verbosity=1, timeout=cal_db_timeout)
+    except Exception as e:
+        err = f"Error: {e}\nError traceback: {traceback.format_exc()}"
+        when = None
+# set the operating condition
+condition = Conditions.Illuminated.AGIPD(mem_cells, bias_voltage, 9.2,
+                                         pixels_x=512, pixels_y=128, beam_energy=None,
+                                         acquisition_rate=acq_rate, gain_setting=gain_setting)
+# Check, if we have a shape we expect
+if db_output:
+    if slopesFF[modules[0]].shape == (mem_cells, 512, 128):
+        with Pool(processes=len(modules)) as pool:
+            const_out = pool.map(send_const, modules)
+    else:
+        print(f"Constants are not sent to the DB because of the shape mismatsh")
+        print(f"Expected {(mem_cells, 512, 128)}, observed {slopesFF[modules[0]].shape}")
+condition_dict ={}
+for entry in condition.to_dict()['parameters']:
+    key = entry.pop('parameter_name')
+    del entry['description']
+    del entry['flg_available']
+    condition_dict[key] = entry
+# Create the same file structure as database constants files, in which
+# each constant type has its corresponding condition and data.
+if local_output:
+    for mod in modules:
+        qm = f"Q{mod // 4 + 1}M{mod % 4 + 1}"
+        device = getattr(getattr(Detectors, dinstance), qm).device_name
+        file = f"{out_folder}/slopesff_bpmask_module_{qm}.h5"
+        dic = {
+            device:{
+               'SlopesFF': {
+                   0:{
+                       'condition': condition_dict,
+                       'data': np.moveaxis(np.moveaxis(slopesFF[mod],0,2),0,1)}
+               },
+               'BadPixelsFF':{
+                   0:{
+                       'condition': condition_dict,
+                       'data': np.moveaxis(np.moveaxis(bpmask[mod],0,2),0,1)}
+               },
+           }
+        }
+        save_dict_to_hdf5(dic, file)
+```
+%% Cell type:code id: tags:
+``` python
+# Define AGIPD geometry
+geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
+    (-525, 625),
+    (-550, -10),
+    (520, -160),
+    (542.5, 475),
+])
+```
+%% Cell type:code id: tags:
+``` python
+# Create the arrays that will be used for figures.
+# A dictionary contains all the data for each of the processing stages (gains, mean, slopesFF...).
+# Each array correponds to the data for all 16 modules.
+# These are updated with their fit/slopes data in the following loops.
+if cell_range==[0,0]:
+    cell_range[1] = shape[0]
+const_data = {}
+for key in keys:
+    const_data[key] = np.full((16,shape[0],512,128), np.nan)
+    for i in range(16):
+        if key in fit_data[i]:
+            const_data[key][i,:,pixel_range[0]:pixel_range[2],
+                               pixel_range[1]:pixel_range[3]] = fit_data[i][key]
+const_data['slopesFF'] = np.full((16,shape[0],512,128), np.nan)
+labels['slopesFF'] = f'slopesFF [ADU/keV], $\gamma$={photon_energy} [keV]'
+for i in range(16):
+    if i in slopesFF:
+        const_data['slopesFF'][i,:,pixel_range[0]:pixel_range[2],
+                               pixel_range[1]:pixel_range[3]] = slopesFF[i]
+```
+%% Cell type:markdown id: tags:
+## Summary across pixels ##
+%% Cell type:code id: tags:
+``` python
+for key in const_data.keys():
+    fig = plt.figure(figsize=(20,20))
+    ax = fig.add_subplot(111)
+    if key=='mask':
+        data = np.nanmean(const_data[key]>0, axis=1)
+        vmin, vmax = (0,1)
+    else:
+        data = np.nanmean(const_data[key], axis=1)
+        vmin, vmax = get_range(data, 5)
+    ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax, figsize=(20,20))
+    _ = ax.set_title(labels[key])
+```
+%% Cell type:markdown id: tags:
+## Summary across cells ##
+Good pixels only.
+%% Cell type:code id: tags:
+``` python
+for key in const_data.keys():
+    data = np.copy(const_data[key])
+    if key=='mask':
+        data = data>0
+    else:
+        data[const_data['mask']>0] = np.nan
+    d = []
+    for i in range(16):
+        d.append({'x': np.arange(data[i].shape[0]),
+                  'y': np.nanmean(data[i], axis=(1,2)),
+                  'drawstyle': 'steps-pre',
+                  'label': f'{i}',
+                  'linewidth': 2,
+                  'linestyle': '--' if i>7 else '-'
+                  })
+    fig = plt.figure(figsize=(15, 6))
+    ax = fig.add_subplot(111)
+    _ = simplePlot(d, xrange=(-12, 510),
+                        x_label='Memory Cell ID',
+                        y_label=labels[key],
+                        use_axis=ax,
+                        legend='top-left-frame-ncol8',)
+    ylim = ax.get_ylim()
+    ax.set_ylim(ylim[0], ylim[1] + np.abs(ylim[1]-ylim[0])*0.2)
+```
+%% Cell type:markdown id: tags:
+## Summary table ##
+%% Cell type:code id: tags:
+``` python
+table = []
+for i in modules:
+    table.append((i,
+                  f"{np.nanmean(slopesFF[i]):0.1f} +- {np.nanstd(slopesFF[i]):0.2f}",
+                  f"{np.nanmean(bpmask[i]>0)*100:0.1f} ({np.nansum(bpmask[i]>0)})"
+                        ))
+md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
+                                     headers=["Module", "Gain [ADU/keV]", "Bad pixels [%(Count)]"])))
+```
+%% Cell type:markdown id: tags:
+## Performance plots
+%% Cell type:code id: tags:
+``` python
+def get_trains_data(run_folder, source, include, tid=None):
+    """
+    Load single train for all module
+    :param run_folder: Path to folder with data
+    :param source: Data source to be loaded
+    :param include: Inset of file name to be considered
+    :param tid: Train Id to be loaded. First train is considered if None is given
+    """
+    run_data = RunDirectory(run_folder, include)
+    if tid:
+        tid, data = run_data.select('*/DET/*', source).train_from_id(tid)
+        return tid, stack_detector_data(data, source)
+    else:
+        for tid, data in run_data.select('*/DET/*', source).trains(require_all=True):
+            return tid, stack_detector_data(data, source)
+    return None, None
+include = '*S00000*'
+tid, orig = get_trains_data(f'{image_data_path}/r{run:04d}/', 'image.data', include)
+orig = orig[cell_range[0]:cell_range[1], ...]
+```
+%% Cell type:code id: tags:
+``` python
+# FIXME: mask bad pixels from median
+# mask = const_data['BadPixelsFF']
+corrections = const_data['slopesFF'] # (16,shape[0],512,128) shape[0]= cell_range[1]-cell_range[0] /
+corrections = np.moveaxis(corrections,1,0) # (shape[0],16,512,128)
+rel_corr = corrections/np.nanmedian(corrections)
+corrected = orig / rel_corr
+```
+%% Cell type:markdown id: tags:
+### Mean value not corrected (train 0)
+%% Cell type:code id: tags:
+``` python
+fig = plt.figure(figsize=(20,20))
+ax = fig.add_subplot(111)
+odata = np.nanmean(orig, axis=0)
+vmin, vmax = get_range(odata, 5)
+ax = geom.plot_data_fast(odata, ax=ax, cmap="jet", vmin=vmin, vmax=vmax, figsize=(20,20))
+_ = ax.set_title("Original data, mean across one train")
+```
+%% Cell type:markdown id: tags:
+### Mean value corrected (train 0)
+%% Cell type:code id: tags:
+``` python
+fig = plt.figure(figsize=(20,20))
+ax = fig.add_subplot(111)
+cdata = np.nanmean(corrected, axis=0)
+vmin, vmax = get_range(cdata, 5)
+ax = geom.plot_data_fast(cdata, ax=ax, cmap="jet", vmin=vmin, vmax=vmax, figsize=(20,20))
+_ = ax.set_title("Corrected data, mean across one train")
+```
+%% Cell type:markdown id: tags:
+### Histogram of corrected and uncorrected spectrum (train 0)
+%% Cell type:code id: tags:
+``` python
+######################################
+#            FIT PEAKS
+######################################
+limits = np.reshape(peak_range,(4,2))
+x_range = [limits[0][0], limits[-1][-1]]
+nb = x_range[1] - x_range[0]+1
+sel = ~np.isnan(corrected)
+fig = plt.figure(figsize=(20, 10))
+ax = fig.add_subplot(111)
+y,xe, _ = ax.hist(corrected[sel].flatten(), bins=nb, range=x_range, label='corrected', alpha=0.5)
+# get the bin centers from the bin edges
+xc=xe[:-1]+(xe[1]-xe[0])/2
+pars, _ = get_starting_parameters(xc, y, limits,4)
+minuit = fit_n_peaks(xc, y, pars, x_range,fix_d01=False)
+pc = minuit.args
+yfc = multi_gauss(xc,4, *pc)
+plt.plot(xc, yfc, label='corrected fit')
+y,_, _ = ax.hist(orig[sel].flatten(), bins=nb, range=x_range, label='original',alpha=0.5)
+minuit = fit_n_peaks(xc, y, pars, x_range,fix_d01=False)
+po = minuit.args
+yfo = multi_gauss(xc,4, *po)
+plt.plot(xc, yfo, label='original fit')
+plt.title(f"Signal spectrum, first train")
+plt.xlabel('[ADU]')
+plt.legend()
+plt.yscale('log')
+plt.show()
+```
+%% Cell type:code id: tags:
+``` python
+table = []
+for i in range(4):
+    table.append((f"Sigma{i} ",
+                  f"{po[2+3*i]:0.2f} ",
+                  f"{pc[2+3*i]:0.2f} "))
+md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
+                                     headers=["Parameter", "Value (original data)", "Value (corrected data)"])))
+```
+%% Cell type:markdown id: tags:
+# Gain Characterization Summary #
+%% Cell type:code id: tags:
+``` python
+in_folder = "" # in this notebook, in_folder is not used as the data source is in the destination folder
+out_folder = "/gpfs/exfel/exp/SPB/202030/p900138/scratch/karnem/r0203_r0204_v02"  # the folder to output to, required
+hist_file_template = "hists_m{:02d}_sum.h5"
+modules = [10] # modules to correct, set to -1 for all, range allowed
+image_data_path = ""
+run = 449 # runs of image data used to create histograms
+karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
+karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
+receiver_id = "{}CH0" # inset for receiver devices
+path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
+h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
+h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images
+h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information
+karabo_id_control = "MID_IRU_AGIPD1M1" # karabo-id for control device
+karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation
+use_dir_creation_date = True # use the creation data of the input dir for database queries
+cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
+cal_db_timeout = 30000 # in milli seconds
+local_output = True # output constants locally
+db_output = False # output constants to database
+# Fit parameters
+peak_range = [-30,30,35,65,80,130,145,200] # where to look for the peaks, [a0, b0, a1, b1, ...] exactly 8 elements
+peak_width_range = [0, 30, 0, 35, 0, 40, 0, 45] # fit limits on the peak widths, [a0, b0, a1, b1, ...] exactly 8 elements
+# Bad-pixel thresholds
+d0_lim = [10, 70] # hard limits for d0 value (distance between noise and first peak)
+peak_width_lim = [0.97, 1.43, 1.03, 1.57] # hard limits on the peak widths, [a0, b0, a1, b1, ...] in units of the noise peak. 4 parameters.
+chi2_lim = [0,3.0] # Hard limit on chi2/nDOF value
+cell_range = [1,5] # range of cell to be considered, [0,0] for all
+pixel_range = [0,0,512,128] # range of pixels x1,y1,x2,y2 to consider [0,0,512,128] for all
+max_bins = 250 # Maximum number of bins to consider
+batch_size = [1,8,8] # batch size: [cell,x,y]
+n_peaks_fit = 4 # Number of gaussian peaks to fit including noise peak
+fix_peaks = True # Fix distance between photon peaks
+# Detector conditions
+max_cells = 0 # number of memory cells used, set to 0 to automatically infer
+bias_voltage = 300 # Bias voltage
+acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
+gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
+photon_energy = 9.2 # photon energy in keV
+```
+%% Cell type:code id: tags:
+``` python
+import glob
+from multiprocessing import Pool
+import re
+import os
+import traceback
+import warnings
+from dateutil import parser
+from extra_data import RunDirectory, stack_detector_data
+from extra_geom import AGIPD_1MGeometry
+import h5py
+from iCalibrationDB import Detectors, Conditions, Constants
+from iminuit import Minuit
+from IPython.display import HTML, display, Markdown, Latex
+import matplotlib.pyplot as plt
+import numpy as np
+import tabulate
+from XFELDetAna.plotting.heatmap import heatmapPlot
+from XFELDetAna.plotting.simpleplot import simplePlot
+from cal_tools.ana_tools import get_range, save_dict_to_hdf5
+from cal_tools.agipdlib import get_acq_rate, get_bias_voltage, get_gain_setting, get_num_cells
+from cal_tools.agipdutils_ff import any_in, fit_n_peaks, gaussian_sum, get_starting_parameters
+from cal_tools.tools import get_dir_creation_date, send_to_db
+from cal_tools.enums import BadPixels, BadPixelsFF
+%matplotlib inline
+warnings.filterwarnings('ignore')
+```
+%% Cell type:code id: tags:
+``` python
+# Get operation conditions
+filename = glob.glob(f"{image_data_path}/r{run:04d}/*-AGIPD[0-1][0-9]-*")[0]
+channel = int(re.findall(r".*-AGIPD([0-9]+)-.*", filename)[0])
+control_fname = f'{image_data_path}/r{run:04d}/RAW-R{run:04d}-{karabo_da_control}-S00000.h5'
+# Evaluate number of memory cells
+mem_cells = get_num_cells(filename, karabo_id, channel)
+if mem_cells is None:
+    raise ValueError(f"No raw images found in {filename}")
+# Evaluate aquisition rate
+if acq_rate == 0.:
+    acq_rate = get_acq_rate((filename, karabo_id, channel))
+# Evaluate creation time
+creation_time = None
+if use_dir_creation_date:
+    creation_time = get_dir_creation_date(image_data_path, run)
+# Evaluate gain setting
+if gain_setting == 0.1:
+    if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):
+        print("Set gain-setting to None for runs taken before 2020-01-31")
+        gain_setting = None
+    else:
+        try:
+            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
+        except Exception as e:
+            print(f'Error while reading gain setting from: \n{control_fname}')
+            print(e)
+            print("Set gain settion to 0")
+            gain_setting = 0
+# Evaluate detector instance for mapping
+instrument = karabo_id.split("_")[0]
+if instrument == "SPB":
+    dinstance = "AGIPD1M1"
+else:
+    dinstance = "AGIPD1M2"
+print(f"Using {creation_time} as creation time")
+print(f"Operating conditions are:\n• Bias voltage: {bias_voltage}\n• Memory cells: {mem_cells}\n"
+              f"• Acquisition rate: {acq_rate}\n• Gain setting: {gain_setting}\n• Photon Energy: {photon_energy}\n")
+```
+%% Cell type:code id: tags:
+``` python
+# Load constants for all modules
+keys = ['g0mean', 'g1mean', 'gain', 'chi2_ndof', 'mask']
+fit_data = {}
+labels = {'g0mean': 'Noise peak position [ADU]',
+          'g1mean': 'First photon peak [ADU]',
+          'gain': f"Gain [ADU/keV], $\gamma$={photon_energy} [keV]",
+          'chi2_ndof': '$\chi^2$/nDOF',
+          'mask': 'Fraction of bad pixels over cells' }
+modules = []
+for mod in range(16):
+    qm = f"Q{mod // 4 + 1}M{mod % 4 + 1}"
+    fit_data[mod] = {}
+    try:
+        hf = h5py.File(f'{out_folder}/fits_m{mod:02d}.h5', 'r')
+        shape = hf['data/g0mean'].shape
+        for key in keys:
+            fit_data[mod][key] = hf[f'data/{key}'][()]
+        #print(shape)
+        print(f"{in_folder}/{hist_file_template.format(mod)}")
+        modules.append(mod)
+    except Exception as e:
+        err = f"Error: {e}\nError traceback: {traceback.format_exc()}"
+        print(f"No fit data available for module {qm}")
+```
+%% Cell type:code id: tags:
+``` python
+# Calculate SlopesFF and BadPixels to be send to DB
+bpmask = {}
+slopesFF = {}
+for mod in modules:
+    bpmask[mod] = np.zeros(fit_data[mod]['mask'].shape).astype(np.int32)
+    bpmask[mod][ any_in(fit_data[mod]['mask'], BadPixelsFF.NO_ENTRY.value) ] = BadPixels.FF_NO_ENTRIES.value
+    bpmask[mod][ any_in(fit_data[mod]['mask'],
+                        BadPixelsFF.GAIN_DEVIATION.value) ] |= BadPixels.FF_GAIN_DEVIATION.value
+    bpmask[mod][ any_in(fit_data[mod]['mask'],
+                        BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
+                        BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
+                        BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value) ] |= BadPixels.FF_GAIN_EVAL_ERROR.value
+    # Set value for bad pixel to average across pixels for a given module
+    slopesFF[mod] = np.copy(fit_data[mod]['gain'])
+    slopesFF[mod][fit_data[mod]['mask']>0] = np.nan
+    gain_mean = np.nanmean(slopesFF[mod], axis=(1,2))
+    for i in range(slopesFF[mod].shape[0]):
+        slopesFF[mod][i][ fit_data[mod]['mask'][i] > 0 ] = gain_mean[i]
+```
+%% Cell type:code id: tags:
+``` python
+# Send constants to DB
+def send_const(mod):
+    try:
+        device = getattr(getattr(Detectors, dinstance), f'Q{mod // 4 + 1}M{mod % 4 + 1}')
+        # gain
+        constant = Constants.AGIPD.SlopesFF()
+        constant.data = np.moveaxis(np.moveaxis(slopesFF[mod],0,2),0,1)
+        send_to_db(device, constant, condition, file_loc,
+                   cal_db_interface, creation_time,
+                   verbosity=1, timeout=cal_db_timeout)
+        # bad pixels
+        constant_bp = Constants.AGIPD.BadPixelsFF()
+        constant_bp.data = np.moveaxis(np.moveaxis(bpmask[mod],0,2),0,1)
+        send_to_db(device, constant_bp, condition, file_loc,
+                   cal_db_interface, creation_time,
+                   verbosity=1, timeout=cal_db_timeout)
+    except Exception as e:
+        err = f"Error: {e}\nError traceback: {traceback.format_exc()}"
+        when = None
+# set the operating condition
+condition = Conditions.Illuminated.AGIPD(mem_cells, bias_voltage, 9.2,
+                                         pixels_x=512, pixels_y=128, beam_energy=None,
+                                         acquisition_rate=acq_rate, gain_setting=gain_setting)
+# Check, if we have a shape we expect
+if db_output:
+    if slopesFF[modules[0]].shape == (mem_cells, 512, 128):
+        with Pool(processes=len(modules)) as pool:
+            const_out = pool.map(send_const, modules)
+    else:
+        print(f"Constants are not sent to the DB because of the shape mismatsh")
+        print(f"Expected {(mem_cells, 512, 128)}, observed {slopesFF[modules[0]].shape}")
+condition_dict ={}
+for entry in condition.to_dict()['parameters']:
+    key = entry.pop('parameter_name')
+    del entry['description']
+    del entry['flg_available']
+    condition_dict[key] = entry
+# Create the same file structure as database constants files, in which
+# each constant type has its corresponding condition and data.
+if local_output:
+    for mod in modules:
+        qm = f"Q{mod // 4 + 1}M{mod % 4 + 1}"
+        device = getattr(getattr(Detectors, dinstance), qm).device_name
+        file = f"{out_folder}/slopesff_bpmask_module_{qm}.h5"
+        dic = {
+            device:{
+               'SlopesFF': {
+                   0:{
+                       'condition': condition_dict,
+                       'data': np.moveaxis(np.moveaxis(slopesFF[mod],0,2),0,1)}
+               },
+               'BadPixelsFF':{
+                   0:{
+                       'condition': condition_dict,
+                       'data': np.moveaxis(np.moveaxis(bpmask[mod],0,2),0,1)}
+               },
+           }
+        }
+        save_dict_to_hdf5(dic, file)
+```
+%% Cell type:code id: tags:
+``` python
+# Define AGIPD geometry
+geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
+    (-525, 625),
+    (-550, -10),
+    (520, -160),
+    (542.5, 475),
+])
+```
+%% Cell type:code id: tags:
+``` python
+# Create the arrays that will be used for figures.
+# A dictionary contains all the data for each of the processing stages (gains, mean, slopesFF...).
+# Each array correponds to the data for all 16 modules.
+# These are updated with their fit/slopes data in the following loops.
+if cell_range==[0,0]:
+    cell_range[1] = shape[0]
+const_data = {}
+for key in keys:
+    const_data[key] = np.full((16,shape[0],512,128), np.nan)
+    for i in range(16):
+        if key in fit_data[i]:
+            const_data[key][i,:,pixel_range[0]:pixel_range[2],
+                               pixel_range[1]:pixel_range[3]] = fit_data[i][key]
+const_data['slopesFF'] = np.full((16,shape[0],512,128), np.nan)
+labels['slopesFF'] = f'slopesFF [ADU/keV], $\gamma$={photon_energy} [keV]'
+for i in range(16):
+    if i in slopesFF:
+        const_data['slopesFF'][i,:,pixel_range[0]:pixel_range[2],
+                               pixel_range[1]:pixel_range[3]] = slopesFF[i]
+```
+%% Cell type:markdown id: tags:
+## Summary across pixels ##
+%% Cell type:code id: tags:
+``` python
+for key in const_data.keys():
+    fig = plt.figure(figsize=(20,20))
+    ax = fig.add_subplot(111)
+    if key=='mask':
+        data = np.nanmean(const_data[key]>0, axis=1)
+        vmin, vmax = (0,1)
+    else:
+        data = np.nanmean(const_data[key], axis=1)
+        vmin, vmax = get_range(data, 5)
+    ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax, figsize=(20,20))
+    _ = ax.set_title(labels[key])
+```
+%% Cell type:markdown id: tags:
+## Summary across cells ##
+Good pixels only.
+%% Cell type:code id: tags:
+``` python
+for key in const_data.keys():
+    data = np.copy(const_data[key])
+    if key=='mask':
+        data = data>0
+    else:
+        data[const_data['mask']>0] = np.nan
+    d = []
+    for i in range(16):
+        d.append({'x': np.arange(data[i].shape[0]),
+                  'y': np.nanmean(data[i], axis=(1,2)),
+                  'drawstyle': 'steps-pre',
+                  'label': f'{i}',
+                  'linewidth': 2,
+                  'linestyle': '--' if i>7 else '-'
+                  })
+    fig = plt.figure(figsize=(15, 6))
+    ax = fig.add_subplot(111)
+    _ = simplePlot(d, xrange=(-12, 510),
+                        x_label='Memory Cell ID',
+                        y_label=labels[key],
+                        use_axis=ax,
+                        legend='top-left-frame-ncol8',)
+    ylim = ax.get_ylim()
+    ax.set_ylim(ylim[0], ylim[1] + np.abs(ylim[1]-ylim[0])*0.2)
+```
+%% Cell type:markdown id: tags:
+## Summary table ##
+%% Cell type:code id: tags:
+``` python
+table = []
+for i in modules:
+    table.append((i,
+                  f"{np.nanmean(slopesFF[i]):0.1f} +- {np.nanstd(slopesFF[i]):0.2f}",
+                  f"{np.nanmean(bpmask[i]>0)*100:0.1f} ({np.nansum(bpmask[i]>0)})"
+                        ))
+md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
+                                     headers=["Module", "Gain [ADU/keV]", "Bad pixels [%(Count)]"])))
+```
+%% Cell type:markdown id: tags:
+## Performance plots
+%% Cell type:code id: tags:
+``` python
+def get_trains_data(run_folder, source, include, tid=None):
+    """
+    Load single train for all module
+    :param run_folder: Path to folder with data
+    :param source: Data source to be loaded
+    :param include: Inset of file name to be considered
+    :param tid: Train Id to be loaded. First train is considered if None is given
+    """
+    run_data = RunDirectory(run_folder, include)
+    if tid:
+        tid, data = run_data.select('*/DET/*', source).train_from_id(tid)
+        return tid, stack_detector_data(data, source)
+    else:
+        for tid, data in run_data.select('*/DET/*', source).trains(require_all=True):
+            return tid, stack_detector_data(data, source)
+    return None, None
+include = '*S00000*'
+tid, orig = get_trains_data(f'{image_data_path}/r{run:04d}/', 'image.data', include)
+orig = orig[cell_range[0]:cell_range[1], ...]
+```
+%% Cell type:code id: tags:
+``` python
+# FIXME: mask bad pixels from median
+# mask = const_data['BadPixelsFF']
+corrections = const_data['slopesFF'] # (16,shape[0],512,128) shape[0]= cell_range[1]-cell_range[0] /
+corrections = np.moveaxis(corrections,1,0) # (shape[0],16,512,128)
+rel_corr = corrections/np.nanmedian(corrections)
+corrected = orig / rel_corr
+```
+%% Cell type:markdown id: tags:
+### Mean value not corrected (train 0)
+%% Cell type:code id: tags:
+``` python
+fig = plt.figure(figsize=(20,20))
+ax = fig.add_subplot(111)
+odata = np.nanmean(orig, axis=0)
+vmin, vmax = get_range(odata, 5)
+ax = geom.plot_data_fast(odata, ax=ax, cmap="jet", vmin=vmin, vmax=vmax, figsize=(20,20))
+_ = ax.set_title("Original data, mean across one train")
+```
+%% Cell type:markdown id: tags:
+### Mean value corrected (train 0)
+%% Cell type:code id: tags:
+``` python
+fig = plt.figure(figsize=(20,20))
+ax = fig.add_subplot(111)
+cdata = np.nanmean(corrected, axis=0)
+vmin, vmax = get_range(cdata, 5)
+ax = geom.plot_data_fast(cdata, ax=ax, cmap="jet", vmin=vmin, vmax=vmax, figsize=(20,20))
+_ = ax.set_title("Corrected data, mean across one train")
+```
+%% Cell type:markdown id: tags:
+### Histogram of corrected and uncorrected spectrum (train 0)
+%% Cell type:code id: tags:
+``` python
+######################################
+#            FIT PEAKS
+######################################
+limits = np.reshape(peak_range,(4,2))
+x_range = [limits[0][0], limits[-1][-1]]
+nb = x_range[1] - x_range[0]+1
+sel = ~np.isnan(corrected)
+fig = plt.figure(figsize=(20, 10))
+ax = fig.add_subplot(111)
+y,xe, _ = ax.hist(corrected[sel].flatten(), bins=nb, range=x_range, label='corrected', alpha=0.5)
+# get the bin centers from the bin edges
+xc=xe[:-1]+(xe[1]-xe[0])/2
+pars, _ = get_starting_parameters(xc, y, limits,4)
+minuit = fit_n_peaks(xc, y, pars, x_range,fix_d01=False)
+pc = minuit.args
+yfc = multi_gauss(xc,4, *pc)
+plt.plot(xc, yfc, label='corrected fit')
+y,_, _ = ax.hist(orig[sel].flatten(), bins=nb, range=x_range, label='original',alpha=0.5)
+minuit = fit_n_peaks(xc, y, pars, x_range,fix_d01=False)
+po = minuit.args
+yfo = multi_gauss(xc,4, *po)
+plt.plot(xc, yfo, label='original fit')
+plt.title(f"Signal spectrum, first train")
+plt.xlabel('[ADU]')
+plt.legend()
+plt.yscale('log')
+plt.show()
+```
+%% Cell type:code id: tags:
+``` python
+table = []
+for i in range(4):
+    table.append((f"Sigma{i} ",
+                  f"{po[2+3*i]:0.2f} ",
+                  f"{pc[2+3*i]:0.2f} "))
+md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
+                                     headers=["Parameter", "Value (original data)", "Value (corrected data)"])))
+```
--- a/notebooks/AGIPD/playground/Characterize_AGIPD_Gain_FlatFields_NBC.ipynb
+++ b/notebooks/AGIPD/playground/Characterize_AGIPD_Gain_FlatFields_NBC.ipynb
+%% Cell type:markdown id: tags:
+# Gain Characterization (Flat Fields) #
+The following code characterizes the gain of the AGIPD detector from flat field data, i.e. data with X-rays of defined intensity. This data should fullfil the following requirements:
+* intensity should be such that single photon peaks are visible
+* data for all modules should be present
+* no shadowing should occur on any of the modules
+* each pixel should have at minimum arround 100 events per photon peak per memory cell
+* if central beam edges are visible, they should not be significantly more intense
+Characterization is done by a weighted average algorithm, which evaluates the peak locations for all pixels
+and memory cells of a given module. These locations are then fitted to a linear function of the average peak
+location in each module, such that it yield a relative gain correction.
+%% Cell type:code id: tags:
+``` python
+# the following lines should be adjusted depending on data
+in_folder = '/gpfs/exfel/exp/MID/201931/p900091/raw/' # path to input data, required
+modules = [3] # modules to work on, required, range allowed
+out_folder = "/gpfs/exfel/exp/MID/201931/p900091/usr/FF/2.2" # path to output to, required
+runs = [484, 485,] # runs to use, required, range allowed
+sequences = [0,1,2,3]#,4,5,6,7,8] #,5,6,7,8,9,10] # sequences files to use, range allowed
+cluster_profile = "noDB" # The ipcluster profile to use
+local_output = True # output constants locally
+db_output = False # output constants to database
+bias_voltage = 300 # detector bias voltage
+cal_db_interface = "tcp://max-exfl016:8026#8035"  # the database interface to use
+mem_cells = 0  # number of memory cells used
+interlaced = False # assume interlaced data format, for data prior to Dec. 2017
+fit_hook = True # fit a hook function to medium gain slope
+rawversion = 2 # RAW file format version
+instrument = "MID"
+photon_energy = 9.2 # the photon energy in keV
+offset_store = "" # for file-baed access
+high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h
+db_input = True # retreive data from calibration database, setting offset-store will overwrite this
+deviation_threshold = 0.75 # peaks with an absolute location deviation larger than the medium are are considere bad
+acqrate = 0. # acquisition rate
+use_dir_creation_date = True
+creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC.ms e.g. 2019-07-04 11:02:41.00
+gain_setting = 0.1 # gain setting can have value 0 or 1, Default=0.1 for no (None) gain-setting
+karabo_da_control = "AGIPD1MCTRL00" # karabo DA for control infromation
+h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information
+```
+%% Cell type:code id: tags:
+``` python
+# std library imports
+from datetime import datetime
+import dateutil
+from functools import partial
+import warnings
+warnings.filterwarnings('ignore')
+import os
+import h5py
+# numpy and matplot lib specific
+import numpy as np
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+%matplotlib inline
+# parallel processing via ipcluster
+# make sure a cluster is running with ipcluster start --n=32, give it a while to start
+from ipyparallel import Client
+client = Client(profile=cluster_profile)
+view = client[:]
+view.use_dill()
+# pyDetLib imports
+import XFELDetAna.xfelpycaltools as xcal
+import XFELDetAna.xfelpyanatools as xana
+from XFELDetAna.util import env
+env.iprofile = cluster_profile
+profile = cluster_profile
+from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions
+from cal_tools.agipdlib import get_num_cells, get_acq_rate, get_gain_setting
+from cal_tools.enums import BadPixels
+from cal_tools.influx import InfluxLogger
+from cal_tools.plotting import show_overview, plot_badpix_3d
+from cal_tools.tools import gain_map_files, parse_runs, run_prop_seq_from_path, get_notebook_name, get_dir_creation_date, get_random_db_interface
+```
+%% Cell type:code id: tags:
+``` python
+# usually no need to change these lines
+sensor_size = [128, 512]
+block_size = [128, 512]
+QUADRANTS = 4
+MODULES_PER_QUAD = 4
+DET_FILE_INSET = "AGIPD"
+# Define constant creation time.
+if creation_time:
+    try:
+        creation_time = datetime.strptime(creation_time, '%Y-%m-%d %H:%M:%S.%f')
+    except Exception as e:
+        print(f"creation_time value error: {e}."
+               "Use same format as YYYY-MM-DD HR:MN:SC.ms e.g. 2019-07-04 11:02:41.00/n")
+        creation_time = None
+        print("Given creation time wont be used.")
+else:
+    creation_time = None
+if not creation_time and use_dir_creation_date:
+    ntries = 100
+    while ntries > 0:
+        try:
+            creation_time = get_dir_creation_date(in_folder, runs[0])
+            break
+        except OSError:
+            pass
+        ntries -= 1
+print("Using {} as creation time".format(creation_time))
+runs = parse_runs(runs)
+if offset_store != "":
+    db_input = False
+else:
+    db_input = True
+limit_trains = 20
+limit_trains_eval = None
+print("Parameters are:")
+print("Runs: {}".format(runs))
+print("Modules: {}".format(modules))
+print("Sequences: {}".format(sequences))
+print("Using DB: {}".format(db_output))
+if instrument == "SPB":
+    loc = "SPB_DET_AGIPD1M-1"
+    dinstance = "AGIPD1M1"
+    karabo_id_control = "SPB_IRU_AGIPD1M1"
+else:
+    loc = "MID_DET_AGIPD1M-1"
+    dinstance = "AGIPD1M2"
+    karabo_id_control = "MID_EXP_AGIPD1M1"
+cal_db_interface = get_random_db_interface(cal_db_interface)
+# these lines can usually stay as is
+fbase = "{}/{{}}/RAW-{{}}-AGIPD{{:02d}}-S{{:05d}}.h5".format(in_folder)
+gains = np.arange(3)
+run, prop, seq = run_prop_seq_from_path(in_folder)
+logger = InfluxLogger(detector="AGIPD", instrument=instrument, mem_cells=mem_cells,
+                      notebook=get_notebook_name(), proposal=prop)
+# extract the memory cells and acquisition rate from
+# the file of the first module, first sequence and first run
+channel = 0
+fname = fbase.format(runs[0], runs[0].upper(), channel, sequences[0])
+if acqrate == 0.:
+    acqrate = get_acq_rate(fname, loc, channel)
+if mem_cells == 0:
+    cells = get_num_cells(fname, loc, channel)
+    mem_cells = cells  # avoid setting twice
+IL_MODE = interlaced
+max_cells = mem_cells if not interlaced else mem_cells*2
+memory_cells = mem_cells
+print("Interlaced mode: {}".format(IL_MODE))
+cells = np.arange(max_cells)
+print(f"Acquisition rate and memory cells set from file {fname} are "
+      f"{acqrate} MHz and {max_cells}, respectively")
+```
+%% Cell type:code id: tags:
+``` python
+control_fname = f'{in_folder}/{runs[0]}/RAW-{runs[0].upper()}-{karabo_da_control}-S00000.h5'
+if "{" in h5path_ctrl:
+    h5path_ctrl = h5path_ctrl.format(karabo_id_control)
+if gain_setting == 0.1:
+    if creation_time.replace(tzinfo=None) < dateutil.parser.parse('2020-01-31'):
+        print("Set gain-setting to None for runs taken before 2020-01-31")
+        gain_setting = None
+    else:
+        try:
+            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
+        except Exception as e:
+            print(f'Error while reading gain setting from: \n{control_fname}')
+            print(e)
+            print("Gain setting is not found in the control information")
+            print("Data will not be processed")
+            sequences = []
+print(f"Gain setting: {gain_setting}")
+```
+%% Cell type:markdown id: tags:
+For the characterization offset maps for each module are needed. The are read in either locally or through the XFEL calibration database.
+%% Cell type:code id: tags:
+``` python
+from dateutil import parser
+offset_g = {}
+noise_g = {}
+thresholds_g = {}
+pc_g = {}
+if not db_input:
+    print("Offset, noise and thresholds have been read in from: {}".format(offset_store))
+    store_file = h5py.File(offset_store, "r")
+    for i in modules:
+        qm = "Q{}M{}".format(i//4+1, i%4+1)
+        offset_g[qm] = np.array(store_file["{}/Offset/0/data".format(qm)])
+        noise_g[qm] = np.array(store_file["{}/Noise/0/data".format(qm)])
+        thresholds_g[qm] = np.array(store_file["{}/Threshold/0/data".format(qm)])
+    store_file.close()
+else:
+    print("Offset, noise and thresholds have been read in from calibration database:")
+    first_ex = True
+    for i in modules:
+        qm = "Q{}M{}".format(i//4+1, i%4+1)
+        metadata = ConstantMetaData()
+        offset = Constants.AGIPD.Offset()
+        metadata.calibration_constant = offset
+        det = getattr(Detectors, dinstance)
+        # set the operating condition
+        condition = Conditions.Dark.AGIPD(memory_cells=max_cells, bias_voltage=bias_voltage,
+                                          acquisition_rate=acqrate, gain_setting=gain_setting)
+        metadata.detector_condition = condition
+        # specify the a version for this constant
+        if creation_time is None:
+            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
+        else:
+            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
+        metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=300000)
+        offset_g[qm] = offset.data
+        if first_ex:
+            print("Offset map was injected on: {}".format(metadata.calibration_constant_version.begin_at))
+        metadata = ConstantMetaData()
+        noise = Constants.AGIPD.Noise()
+        metadata.calibration_constant = noise
+        # set the operating condition
+        condition = Conditions.Dark.AGIPD(memory_cells=max_cells, bias_voltage=bias_voltage,
+                                          acquisition_rate=acqrate, gain_setting=gain_setting)
+        metadata.detector_condition = condition
+        # specify the a version for this constant
+        if creation_time is None:
+            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
+        else:
+            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
+        metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=300000)
+        noise_g[qm] = noise.data
+        if first_ex:
+            print("Noise map was injected on: {}".format(metadata.calibration_constant_version.begin_at))
+        metadata = ConstantMetaData()
+        thresholds = Constants.AGIPD.ThresholdsDark()
+        metadata.calibration_constant = thresholds
+        # set the operating condition
+        condition = Conditions.Dark.AGIPD(memory_cells=max_cells, bias_voltage=bias_voltage,
+                                          acquisition_rate=acqrate, gain_setting=gain_setting)
+        metadata.detector_condition = condition
+        # specify the a version for this constant
+        if creation_time is None:
+            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
+        else:
+            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
+        metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=300000)
+        thresholds_g[qm] = thresholds.data
+        if first_ex:
+            print("Threshold map was injected on: {}".format(metadata.calibration_constant_version.begin_at))
+        metadata = ConstantMetaData()
+        pc = Constants.AGIPD.SlopesPC()
+        metadata.calibration_constant = pc
+        # set the operating condition
+        condition = Conditions.Dark.AGIPD(memory_cells=max_cells, bias_voltage=bias_voltage,
+                                          acquisition_rate=acqrate, gain_setting=gain_setting)
+        metadata.detector_condition = condition
+        # specify the a version for this constant
+        if creation_time is None:
+            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
+        else:
+            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
+        metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=300000)
+        pc_g[qm] = np.nanmedian(pc.data[0,...])/pc.data
+        if first_ex:
+            print("PC map was injected on: {}".format(metadata.calibration_constant_version.begin_at))
+            first_ex = False
+```
+%% Cell type:markdown id: tags:
+## Initial peak estimates ##
+First initial peak estimates need to be defined. This is done by inspecting histograms created from (a subset of) the offset-corrected data for peak locations and peak heights.
+%% Cell type:code id: tags:
+``` python
+def hist_single_module(fbase, runs, sequences, sensor_size, memory_cells, block_size,
+                       il_mode, limit_trains, profile, rawversion, instrument, inp):
+    """ This function calculates a per-pixel histogram for a single module
+    Runs and sequences give the data to calculate histogram from
+    """
+    channel, offset, thresholds, pc, noise = inp
+    import XFELDetAna.xfelpycaltools as xcal
+    import numpy as np
+    import h5py
+    from XFELDetAna.util import env
+    env.iprofile = profile
+    def baseline_correct_via_noise(d, noise, g, baseline_corr_noise_threshold=1000):
+        """ Correct baseline shifts by evaluating position of the noise peak
+        :param: d: the data to correct, should be a single image
+        :param noise: the mean noise for the cell id of `d`
+        :param g: gain array matching `d` array
+        Correction is done by identifying the left-most (significant) peak
+        in the histogram of `d` and shifting it to be centered on 0.
+        This is done via a continous wavelet transform (CWT), using a Ricker
+        wavelet.
+        Only high gain data is evaulated, and data larger than 50 ADU,
+        equivalent of roughly a 9 keV photon is ignored.
+        Two passes are executed: the first shift is accurate to 10 ADU and
+        will catch baseline shifts smaller then -2000 ADU. CWT is evaluated
+        for peaks of widths one, three and five time the noise.
+        The baseline is then shifted by the position of the summed CWT maximum.
+        In a second pass histogram is evaluated within a range of
+        +- 5*noise of the initial shift value, for peak of width noise.
+        Baseline shifts larger than the maximum threshold or positive shifts
+        larger 10 are discarded, and the original data is returned, otherwise
+        the shift corrected data is returned.
+        """
+        import copy
+        from scipy.signal import cwt, ricker
+        # we work on a copy to be able to filter values by setting them to
+        # np.nan
+        dd = copy.copy(d)
+        dd[g != 0] = np.nan  # only high gain data
+        dd[dd > 50] = np.nan  # only noise data
+        h, e = np.histogram(dd, bins=210, range=(-2000, 100))
+        c = (e[1:] + e[:-1]) / 2
+        try:
+            cwtmatr = cwt(h, ricker, [noise, 3. * noise, 5. * noise])
+        except:
+            return d
+        cwtall = np.sum(cwtmatr, axis=0)
+        marg = np.argmax(cwtall)
+        pc = c[marg]
+        high_res_range = (dd > pc - 5 * noise) & (dd < pc + 5 * noise)
+        dd[~high_res_range] = np.nan
+        h, e = np.histogram(dd, bins=200,
+                            range=(pc - 5 * noise, pc + 5 * noise))
+        c = (e[1:] + e[:-1]) / 2
+        try:
+            cwtmatr = cwt(h, ricker, [noise, ])
+        except:
+            return d
+        marg = np.argmax(cwtmatr)
+        pc = c[marg]
+        # too large shift to be easily decernable via noise
+        if pc > 10 or pc < -baseline_corr_noise_threshold:
+            return d
+        return d - pc
+    # function needs to be inline for parallell processing
+    def read_fun(filename, channel):
+        """ A reader function used by pyDetLib
+        """
+        infile = h5py.File(filename, "r", driver="core")
+        if rawversion == 2:
+            count = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)])
+            first = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first".format(instrument, channel)])
+            last_index = int(first[count != 0][-1]+count[count != 0][-1])
+            first_index = int(first[count != 0][0])
+        else:
+            status = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)])
+            if np.count_nonzero(status != 0) == 0:
+                return
+            last = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last".format(instrument, channel)])
+            last_index = int(last[status != 0][-1])
+            first_index = int(last[status != 0][0])
+        if limit_trains is not None:
+            last_index = min(limit_trains*memory_cells+first_index, last_index)
+        im = np.array(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(instrument, channel)][first_index:last_index,...])
+        carr = infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId".format(instrument, channel)][first_index:last_index]
+        cells = np.squeeze(np.array(carr))
+        infile.close()
+        if il_mode:
+            ga = im[1::2, 0, ...]
+            im = im[0::2, 0, ...].astype(np.float32)
+        else:
+            ga = im[:, 1, ...]
+            im = im[:, 0, ...].astype(np.float32)
+        im = np.rollaxis(im, 2)
+        im = np.rollaxis(im, 2, 1)
+        ga = np.rollaxis(ga, 2)
+        ga = np.rollaxis(ga, 2, 1)
+        return im, ga, cells
+    offset_cor = xcal.OffsetCorrection(sensor_size,
+                                       offset,
+                                       nCells=memory_cells,
+                                       blockSize=block_size,
+                                       gains=[0,1,2])
+    offset_cor.mapper = offset_cor._view.map_sync
+    offset_cor.debug() # force non-parallel processing since outer function will run concurrently
+    hist_calc = xcal.HistogramCalculator(sensor_size,
+                                         bins=4000,
+                                         range=(-4000, 8000),
+                                         blockSize=block_size)
+    hist_calc.mapper = hist_calc._view.map_sync
+    hist_calc.debug() # force non-parallel processing since outer function will run concurrently
+    for run in runs:
+        for seq in sequences:
+            fname = fbase.format(run, run.upper(), channel, seq)
+            d, ga, c = read_fun(fname, channel)
+            vidx = (c < offset.shape[2]) & (c < thresholds.shape[2])
+            c = c[vidx]
+            d = d[:,:,vidx]
+            ga = ga[:,:,vidx]
+            # we need to do proper gain thresholding
+            g = np.zeros(ga.shape, np.uint8)
+            g[...] = 2
+            g[ga < thresholds[...,c,1]] = 1
+            g[ga < thresholds[...,c,0]] = 0
+            d = offset_cor.correct(d, cellTable=c, gainMap=g)
+            for i in range(d.shape[2]):
+                mn_noise = np.nanmean(noise[...,c[i]])
+                d[...,i] = baseline_correct_via_noise(d[...,i],
+                                                      mn_noise,
+                                                      g[..., i])
+            d *= np.moveaxis(pc[c,...], 0, 2)
+            hist_calc.fill(d)
+    h, e, c, _ = hist_calc.get()
+    return h, e, c
+inp = []
+for i in modules:
+    qm = "Q{}M{}".format(i//4+1, i%4+1)
+    inp.append((i, offset_g[qm], thresholds_g[qm], pc_g[qm][0,...], noise_g[qm][...,0]))
+p = partial(hist_single_module, fbase, runs, sequences,
+            sensor_size, memory_cells, block_size, IL_MODE, limit_trains, profile, rawversion, instrument)
+res_uncorr = list(map(p, inp))
+```
+%% Cell type:code id: tags:
+``` python
+d = []
+qms = []
+for i, r in enumerate(res_uncorr):
+    ii = list(modules)[i]
+    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
+    qms.append(qm)
+    h, e, c = r
+    d.append({
+            'x': c,
+            'y': h,
+            'drawstyle': 'steps-mid'
+        })
+fig = xana.simplePlot(d, y_log=False,
+                      figsize="2col",
+                      aspect=2,
+                      x_range=(-50, 500),
+                      x_label="Intensity (ADU)",
+                      y_label="Counts")
+fig.savefig("{}/FF_module_{}_peak_pos.png".format(out_folder, ",".join(qms)))
+```
+%% Cell type:code id: tags:
+``` python
+# these should be quite stable
+peak_estimates = [0, 55, 105, 155]
+peak_heights = [5e7, 5e6, 1e6, 5e5]
+peak_sigma = [5., 5.,5., 5.,]
+print("Using the following peak estimates: {}".format(peak_estimates))
+```
+%% Cell type:markdown id: tags:
+## Calculate relative gain per module ##
+Using the so obtained estimates, we calculate the relative gain per module. For this we use the weighted average method implemented in pyDetLib.
+%% Cell type:code id: tags:
+``` python
+block_size = [64, 64]
+def relgain_single_module(fbase, runs, sequences, peak_estimates,
+                          peak_heights, peak_sigma, memory_cells, sensor_size,
+                          block_size, il_mode, profile, limit_trains_eval, rawversion, instrument, inp):
+    """ A function for calculated the relative gain of a single AGIPD module
+    """
+    # import needed inline for parallel processing
+    import XFELDetAna.xfelpycaltools as xcal
+    import numpy as np
+    import h5py
+    from XFELDetAna.util import env
+    env.iprofile = profile
+    channel, offset, thresholds, noise, pc = inp
+    def baseline_correct_via_noise(d, noise, g, baseline_corr_noise_threshold=1000):
+        """ Correct baseline shifts by evaluating position of the noise peak
+        :param: d: the data to correct, should be a single image
+        :param noise: the mean noise for the cell id of `d`
+        :param g: gain array matching `d` array
+        Correction is done by identifying the left-most (significant) peak
+        in the histogram of `d` and shifting it to be centered on 0.
+        This is done via a continous wavelet transform (CWT), using a Ricker
+        wavelet.
+        Only high gain data is evaulated, and data larger than 50 ADU,
+        equivalent of roughly a 9 keV photon is ignored.
+        Two passes are executed: the first shift is accurate to 10 ADU and
+        will catch baseline shifts smaller then -2000 ADU. CWT is evaluated
+        for peaks of widths one, three and five time the noise.
+        The baseline is then shifted by the position of the summed CWT maximum.
+        In a second pass histogram is evaluated within a range of
+        +- 5*noise of the initial shift value, for peak of width noise.
+        Baseline shifts larger than the maximum threshold or positive shifts
+        larger 10 are discarded, and the original data is returned, otherwise
+        the shift corrected data is returned.
+        """
+        import copy
+        from scipy.signal import cwt, ricker
+        # we work on a copy to be able to filter values by setting them to
+        # np.nan
+        dd = copy.copy(d)
+        dd[g != 0] = np.nan  # only high gain data
+        dd[dd > 50] = np.nan  # only noise data
+        h, e = np.histogram(dd, bins=210, range=(-2000, 100))
+        c = (e[1:] + e[:-1]) / 2
+        try:
+            cwtmatr = cwt(h, ricker, [noise, 3. * noise, 5. * noise])
+        except:
+            return d
+        cwtall = np.sum(cwtmatr, axis=0)
+        marg = np.argmax(cwtall)
+        pc = c[marg]
+        high_res_range = (dd > pc - 5 * noise) & (dd < pc + 5 * noise)
+        dd[~high_res_range] = np.nan
+        h, e = np.histogram(dd, bins=200,
+                            range=(pc - 5 * noise, pc + 5 * noise))
+        c = (e[1:] + e[:-1]) / 2
+        try:
+            cwtmatr = cwt(h, ricker, [noise, ])
+        except:
+            return d
+        marg = np.argmax(cwtmatr)
+        pc = c[marg]
+        # too large shift to be easily decernable via noise
+        if pc > 10 or pc < -baseline_corr_noise_threshold:
+            return d
+        return d - pc
+    # function needs to be inline for parallell processing
+    def read_fun(filename, channel):
+        """ A reader function used by pyDetLib
+        """
+        infile = h5py.File(filename, "r", driver="core")
+        if rawversion == 2:
+            count = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)])
+            first = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first".format(instrument, channel)])
+            last_index = int(first[count != 0][-1]+count[count != 0][-1])
+            first_index = int(first[count != 0][0])
+        else:
+            status = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)])
+            if np.count_nonzero(status != 0) == 0:
+                return
+            last = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last".format(instrument, channel)])
+            last_index = int(last[status != 0][-1])
+            first_index = int(last[status != 0][0])
+        if limit_trains is not None:
+            last_index = min(limit_trains*memory_cells+first_index, last_index)
+        im = np.array(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(instrument, channel)][first_index:last_index,...])
+        carr = infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId".format(instrument, channel)][first_index:last_index]
+        cells = np.squeeze(np.array(carr))
+        infile.close()
+        if il_mode:
+            ga = im[1::2, 0, ...]
+            im = im[0::2, 0, ...].astype(np.float32)
+        else:
+            ga = im[:, 1, ...]
+            im = im[:, 0, ...].astype(np.float32)
+        im = np.rollaxis(im, 2)
+        im = np.rollaxis(im, 2, 1)
+        ga = np.rollaxis(ga, 2)
+        ga = np.rollaxis(ga, 2, 1)
+        return im, ga, cells
+    offset_cor = xcal.OffsetCorrection(sensor_size, offset, nCells=memory_cells,
+                                       blockSize=block_size, gains=[0,1,2])
+    offset_cor.mapper = offset_cor._view.map_sync
+    rel_gain = xcal.GainMapCalculator(sensor_size,
+                                      peak_estimates,
+                                      peak_sigma,
+                                      nCells=memory_cells,
+                                      peakHeights = peak_heights,
+                                      noiseMap=noise,
+                                      deviationType="relative")
+    rel_gain.mapper = rel_gain._view.map_sync
+    for run in runs:
+        for seq in sequences:
+            fname = fbase.format(run, run.upper(), channel, seq)
+            d, ga, c = read_fun(fname, channel)
+            # we need to do proper gain thresholding
+            vidx = (c < offset.shape[2]) & (c < thresholds.shape[2])
+            c = c[vidx]
+            d = d[:,:,vidx]
+            ga = ga[:,:,vidx]
+            # we need to do proper gain thresholding
+            g = np.zeros(ga.shape, np.uint8)
+            g[...] = 2
+            g[ga < thresholds[...,c,1]] = 1
+            g[ga < thresholds[...,c,0]] = 0
+            d = offset_cor.correct(d, cellTable=c, gainMap=g)
+            for i in range(d.shape[2]):
+                mn_noise = np.nanmean(noise[...,c[i]])
+                d[...,i] = baseline_correct_via_noise(d[...,i],
+                                                      mn_noise,
+                                                      g[..., i])
+            d *= np.moveaxis(pc[c,...], 0, 2)
+            rel_gain.fill(d, cellTable=c)
+    gain_map = rel_gain.get()
+    gain_map_func = rel_gain.getUsingFunc(inverse=False)
+    pks, stds = rel_gain.getRawPeaks()
+    return gain_map, pks, stds, gain_map_func
+inp = []
+for i in modules:
+    qm = "Q{}M{}".format(i//4+1, i%4+1)
+    inp.append((i, offset_g[qm], thresholds_g[qm], noise_g[qm][...,0], pc_g[qm][0,...]))
+start = datetime.now()
+p = partial(relgain_single_module, fbase, runs, sequences,
+            peak_estimates, peak_heights, peak_sigma, memory_cells,
+            sensor_size, block_size, IL_MODE, profile, limit_trains_eval, rawversion, instrument)
+res_gain = list(map(p, inp)) # don't run concurently as inner function are parelllized
+duration = (datetime.now()-start).total_seconds()
+logger.runtime_summary_entry(success=True, runtime=duration)
+logger.send()
+```
+%% Cell type:markdown id: tags:
+Finally, we inspect the results, by plotting the number of entries, peak locations and resulting gain maps for each peak. In the course of doing so, we identify bad pixels by either having 0 entries for a peak, or having `nan` values for the peak location.
+%% Cell type:code id: tags:
+``` python
+gain_m = {}
+flatsff = {}
+gainoff_g = {}
+entries_g = {}
+peaks_g = {}
+mask_g = {}
+cell_to_preview = 4
+masks_eval_peaks = [1, 2]
+global_eval_peaks = [1]
+global_meds = {}
+for i, r in enumerate(res_gain):
+    ii = list(modules)[i]
+    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
+    gain, pks, std, gfunc = r
+    gains, errors, chisq, valid, max_dev, stats = gfunc
+    _, entries, stds, sow = gain
+    gain_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells))
+    gain_mdb = np.zeros((gains.shape[0], gains.shape[1], memory_cells))
+    entries_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells, 5))
+    gainoff_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells))
+    mask_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells), np.uint32)
+    gainoff_g[qm] = gainoff_db
+    gain_m[qm] = gain_mdb
+    entries_g[qm] = entries_db
+    peaks_g[qm] = pks
+    # create a mask for unregular pixels
+    # first bit set if first peak has nan entries
+    for pk in masks_eval_peaks:
+        mask_db[~(np.isfinite(gain_mdb) & np.isfinite(gainoff_db))] |= BadPixels.FF_GAIN_EVAL_ERROR.value
+        mask_db[(np.abs(1-gain_mdb/np.nanmedian(gain_mdb) > deviation_threshold) )] |= BadPixels.FF_GAIN_DEVIATION.value
+    # second bit set if entries are 0 for first peak
+    mask_db[entries[...,1] == 0] |= BadPixels.FF_NO_ENTRIES.value
+    zero_oc = np.count_nonzero((entries > 0).astype(np.int), axis=3)
+    mask_db[zero_oc <= len(peak_estimates)-2] |= BadPixels.FF_NO_ENTRIES.value
+    # third bit set if entries of a given adc show significant noise
+    stds = []
+    for ii in range(8):
+        for jj in range(8):
+            stds.append(np.std(entries_db[ii*16:(ii+1)*16,jj*64+2:(jj+1)*64-2,:,1], axis=(0,1)))
+    avg_stds = np.median(np.array(stds), axis=0)
+    for ii in range(8):
+        for jj in range(8):
+            std = np.std(entries_db[ii*16:(ii+1)*16,jj*64+2:(jj+1)*64-2,:,1], axis=(0,1))
+            if np.any(std > 5*avg_stds):
+                mask_db[ii*16:(ii+1)*16,jj*64:(jj+1)*64,std > 5*avg_stds] |= BadPixels.FF_GAIN_DEVIATION
+    mask_g[qm] = mask_db
+    flat = np.zeros((gains.shape[0], gains.shape[1], memory_cells, 3))
+    for j in range(2,len(peak_estimates)):
+        flat[...,j-2] = np.mean(entries[...,j]/np.mean(entries[...,j]))
+    flat = np.mean(flat, axis=3)
+    #flat_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells))
+    #for j in range(2):
+    #    flat_db[...,j::2] = flat
+    flatsff[qm] = flat
+    global_meds[qm] = np.nanmedian(pks[...,global_eval_peaks][np.max(mask_db, axis=2) != 0])
+```
+%% Cell type:markdown id: tags:
+## Evaluated peak locations ##
+The following plot shows the evaluated peak locations for each peak. Peak ids increase downward:
+%% Cell type:code id: tags:
+``` python
+from mpl_toolkits.axes_grid1 import AxesGrid
+cell_to_preview = 4
+for qm, data in peaks_g.items():
+    print("The module shown is: {}".format(qm))
+    print("The cell shown is: {}".format(cell_to_preview))
+    print("Evaluated peaks: {}".format(data.shape[-1]))
+    fig = plt.figure(figsize=(15,15))
+    grid = AxesGrid(fig, 111,
+                    nrows_ncols=(data.shape[-1], 1),
+                    axes_pad=0.1,
+                    share_all=True,
+                    label_mode="L",
+                    cbar_location="right",
+                    cbar_mode="each",
+                    cbar_size="7%",
+                    cbar_pad="2%")
+    for j in range(data.shape[-1]):
+        d = data[...,cell_to_preview,j]
+        d[~np.isfinite(d)] = 0
+        im = grid[j].imshow(d, interpolation="nearest", vmin=0.9*np.nanmedian(d), vmax=max(1.1*np.nanmedian(d), 50))
+        cb = grid.cbar_axes[j].colorbar(im)
+        cb.set_label_text("Peak location (ADU)")
+```
+%% Cell type:markdown id: tags:
+## Gain Slopes And Offsets ##
+The gain slopes and offsets are deduced by fitting a linear function $y = mx + b$ to the evaluated peaks. Gains are normalized to all pixels and all memory cells of a module by using the average peak locations a $x$ values. Thus slopes centered around 1 are to be expected.
+%% Cell type:code id: tags:
+``` python
+for i, r in enumerate(res_gain):
+    ii = list(modules)[i]
+    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
+    gain, pks, std, gfunc = r
+    gains, errors, chisq, valid, max_dev, stats = gfunc
+    _, entries, stds, sow = gain
+    fig = plt.figure(figsize=(15,8))
+    ax = fig.add_subplot(211)
+    im = ax.imshow(gains[...,0], interpolation="nearest", vmin=0.85, vmax=1.15)
+    cb = fig.colorbar(im)
+    cb.set_label("Gain slope m")
+    fig.savefig("{}/gain_m_mod{}.png".format(out_folder, qm))
+    ax = fig.add_subplot(212)
+    ax.hist(gains[...,0].flatten(), range=(0.5, 1.5), bins=100)
+    ax.set_ylabel("Occurences")
+    ax.set_xlabel("Gain slope m")
+    ax.semilogy()
+```
+%% Cell type:markdown id: tags:
+The gain offsets b are expected to be centered around 0 and minimal, as data is already offset substracted.
+%% Cell type:code id: tags:
+``` python
+for i, r in enumerate(res_gain):
+    ii = list(modules)[i]
+    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
+    gain, pks, std, gfunc = r
+    gains, errors, chisq, valid, max_dev, stats = gfunc
+    _, entries, stds, sow = gain
+    fig = plt.figure(figsize=(15,8))
+    ax = fig.add_subplot(211)
+    im = ax.imshow(gains[...,1], interpolation="nearest", vmin=-25, vmax=25)
+    cb = fig.colorbar(im)
+    cb.set_label("Gain offset b")
+    fig.savefig("{}/gain_b_mod{}.png".format(out_folder, qm))
+    ax = fig.add_subplot(212)
+    ax.hist(gains[...,1].flatten(), range=(-25, 25), bins=100)
+    ax.set_ylabel("Occurences")
+    ax.set_xlabel("Gain offset b")
+    ax.semilogy()
+```
+%% Cell type:markdown id: tags:
+## Bad Pixels ##
+Bad pixels are defined as any of the following criteria:
+* the gain evaluation did not converge, or location of noise peak deviates by more than than the deviation threshold from the median location.
+* the number of entries for the noise peak in 0.
+* the standard deviation of the number of entries is larger than 5 times the standard deviation for the ASIC the pixel is on.
+%% Cell type:code id: tags:
+``` python
+print("The deviation threshold is: {}".format(deviation_threshold))
+```
+%% Cell type:code id: tags:
+``` python
+for i, r in enumerate(res_gain):
+    ii = list(modules)[i]
+    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
+    mask_db = mask_g[qm]
+    fig = plt.figure(figsize=(15,8))
+    ax = fig.add_subplot(211)
+    im = ax.imshow(np.log2(mask_db[...,cell_to_preview]), interpolation="nearest", vmin=0, vmax=32)
+    cb = fig.colorbar(im)
+    cb.set_label("Mask value")
+    fig.savefig("{}/mask_mod{}.png".format(out_folder, qm))
+    ax = fig.add_subplot(212)
+    ax.hist(np.log2(mask_db.flatten()), range=(0, 32), bins=32, normed=True)
+    ax.set_ylabel("Occurences")
+    ax.set_xlabel("Mask value")
+    ax.semilogy()
+```
+%% Cell type:code id: tags:
+``` python
+cols = {BadPixels.FF_GAIN_EVAL_ERROR.value: (BadPixels.FF_GAIN_EVAL_ERROR.name, '#FF000080'),
+        BadPixels.FF_GAIN_DEVIATION.value: (BadPixels.FF_GAIN_DEVIATION.name, '#0000FF80'),
+        BadPixels.FF_NO_ENTRIES.value: (BadPixels.FF_NO_ENTRIES.name, '#00FF0080'),
+        BadPixels.FF_GAIN_EVAL_ERROR.value |
+        BadPixels.FF_GAIN_DEVIATION.value: ('EVAL+DEV', '#DD00DD80'),
+        BadPixels.FF_GAIN_EVAL_ERROR.value |
+        BadPixels.FF_NO_ENTRIES.value: ('EVAL+NO_ENTRY', '#00DDDD80'),
+        BadPixels.FF_GAIN_DEVIATION.value |
+        BadPixels.FF_NO_ENTRIES.value: ('DEV+NO_ENTRY', '#DDDD0080'),
+       }
+rebin = 32 if not high_res_badpix_3d else 2
+gain = 0
+for mod, data in mask_g.items():
+    plot_badpix_3d(data, cols, title=mod, rebin_fac=rebin)
+```
+%% Cell type:code id: tags:
+``` python
+if local_output:
+    ofile = "{}/agipd_gain_store_{}_modules_{}.h5".format(out_folder,
+                                                          "_".join([str(r) for r in runs]),
+                                                          "_".join([str(m) for m in modules]))
+    store_file = h5py.File(ofile, "w")
+    for i, r in enumerate(res_gain):
+        ii = list(modules)[i]
+        qm = "Q{}M{}".format(ii//4+1, ii%4+1)
+        gain, pks, std, gfunc = r
+        gains, errors, chisq, valid, max_dev, stats = gfunc
+        gainmap, entires, stds, sow = gain
+        store_file["/{}/Gain/0/data".format(qm)] = gains[...,0]
+        store_file["/{}/GainOffset/0/data".format(qm)] = gains[...,1]
+        store_file["/{}/Flat/0/data".format(qm)] = flatsff[qm]
+        store_file["/{}/Entries/0/data".format(qm)] = entires
+        store_file["/{}/BadPixels/0/data".format(qm)] = mask_g[qm]
+    store_file.close()
+```
+%% Cell type:code id: tags:
+``` python
+proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
+file_loc = proposal + ' ' + ' '.join(list(map(str,runs)))
+```
+%% Cell type:code id: tags:
+``` python
+if db_output:
+    for i, r in enumerate(res_gain):
+        ii = list(modules)[i]
+        qm = "Q{}M{}".format(ii//4+1, ii%4+1)
+        gain, pks, std, gfunc = r
+        gains, errors, chisq, valid, max_dev, stats = gfunc
+        gainmap, entires, stds, sow = gain
+        det = getattr(Detectors, dinstance)
+        device = getattr(det, qm)
+        # gains related
+        metadata = ConstantMetaData()
+        cgain = Constants.AGIPD.SlopesFF()
+        cgain.data = gains
+        metadata.calibration_constant = cgain
+        # set the operating condition
+        condition = Conditions.Illuminated.AGIPD(memory_cells, bias_voltage, 9.2,
+                                                 pixels_x=512, pixels_y=128, beam_energy=None,
+                                                 acquisition_rate=acqrate, gain_setting=gain_setting)
+        metadata.detector_condition = condition
+        # specify the a version for this constant
+        if creation_time is None:
+            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
+        else:
+            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
+        metadata.calibration_constant_version.raw_data_location = file_loc
+        metadata.send(cal_db_interface, timeout=300000)
+        # bad pixels
+        metadata = ConstantMetaData()
+        bp = Constants.AGIPD.BadPixelsFF()
+        bp.data = mask_g[qm]
+        metadata.calibration_constant = bp
+        # set the operating condition
+        condition = Conditions.Illuminated.AGIPD(memory_cells, bias_voltage, 9.2,
+                                                 pixels_x=512, pixels_y=128, beam_energy=None,
+                                                 acquisition_rate=acqrate, gain_setting=gain_setting)
+        metadata.detector_condition = condition
+        # specify the a version for this constant
+        if creation_time is None:
+            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
+        else:
+            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
+        metadata.calibration_constant_version.raw_data_location = file_loc
+        metadata.send(cal_db_interface, timeout=300000)
+```
+%% Cell type:markdown id: tags:
+## Sanity check ##
+Finally, we perform a correction of the data used to derive the gain constants with said constants. We expected an improvement in peak FWHM, if constants have been deduced correctly.
+%% Cell type:code id: tags:
+``` python
+def hist_single_module(fbase, runs, sequences, il_mode, profile, limit_trains, memory_cells, rawversion, instrument, inp):
+    channel, offset, thresholds, relgain, noise, pc = inp
+    gain, pks, std, gfunc = relgain
+    gains, errors, chisq, valid, max_dev, stats = gfunc
+    import XFELDetAna.xfelpycaltools as xcal
+    import numpy as np
+    import h5py
+    import copy
+    from XFELDetAna.util import env
+    env.iprofile = profile
+    sensor_size = [128, 512]
+    block_size = sensor_size
+    def baseline_correct_via_noise(d, noise, g, baseline_corr_noise_threshold=1000):
+        """ Correct baseline shifts by evaluating position of the noise peak
+        :param: d: the data to correct, should be a single image
+        :param noise: the mean noise for the cell id of `d`
+        :param g: gain array matching `d` array
+        Correction is done by identifying the left-most (significant) peak
+        in the histogram of `d` and shifting it to be centered on 0.
+        This is done via a continous wavelet transform (CWT), using a Ricker
+        wavelet.
+        Only high gain data is evaulated, and data larger than 50 ADU,
+        equivalent of roughly a 9 keV photon is ignored.
+        Two passes are executed: the first shift is accurate to 10 ADU and
+        will catch baseline shifts smaller then -2000 ADU. CWT is evaluated
+        for peaks of widths one, three and five time the noise.
+        The baseline is then shifted by the position of the summed CWT maximum.
+        In a second pass histogram is evaluated within a range of
+        +- 5*noise of the initial shift value, for peak of width noise.
+        Baseline shifts larger than the maximum threshold or positive shifts
+        larger 10 are discarded, and the original data is returned, otherwise
+        the shift corrected data is returned.
+        """
+        import copy
+        from scipy.signal import cwt, ricker
+        # we work on a copy to be able to filter values by setting them to
+        # np.nan
+        dd = copy.copy(d)
+        dd[g != 0] = np.nan  # only high gain data
+        dd[dd > 50] = np.nan  # only noise data
+        h, e = np.histogram(dd, bins=210, range=(-2000, 100))
+        c = (e[1:] + e[:-1]) / 2
+        try:
+            cwtmatr = cwt(h, ricker, [noise, 3. * noise, 5. * noise])
+        except:
+            return d
+        cwtall = np.sum(cwtmatr, axis=0)
+        marg = np.argmax(cwtall)
+        pc = c[marg]
+        high_res_range = (dd > pc - 5 * noise) & (dd < pc + 5 * noise)
+        dd[~high_res_range] = np.nan
+        h, e = np.histogram(dd, bins=200,
+                            range=(pc - 5 * noise, pc + 5 * noise))
+        c = (e[1:] + e[:-1]) / 2
+        try:
+            cwtmatr = cwt(h, ricker, [noise, ])
+        except:
+            return d
+        marg = np.argmax(cwtmatr)
+        pc = c[marg]
+        # too large shift to be easily decernable via noise
+        if pc > 10 or pc < -baseline_corr_noise_threshold:
+            return d
+        return d - pc
+    def read_fun(filename, channel):
+        infile = h5py.File(filename, "r", driver="core")
+        if rawversion == 2:
+            count = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)])
+            first = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first".format(instrument, channel)])
+            last_index = int(first[count != 0][-1]+count[count != 0][-1])
+            first_index = int(first[count != 0][0])
+        else:
+            status = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)])
+            if np.count_nonzero(status != 0) == 0:
+                return
+            last = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last".format(instrument, channel)])
+            last_index = int(last[status != 0][-1])
+            first_index = int(last[status != 0][0])
+        if limit_trains is not None:
+            last_index = min(limit_trains*memory_cells+first_index, last_index)
+        im = np.array(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(instrument, channel)][first_index:last_index,...])
+        carr = infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId".format(instrument, channel)][first_index:last_index]
+        cells = np.squeeze(np.array(carr))
+        infile.close()
+        if il_mode:
+            ga = im[1::2, 0, ...]
+            im = im[0::2, 0, ...].astype(np.float32)
+        else:
+            ga = im[:, 1, ...]
+            im = im[:, 0, ...].astype(np.float32)
+        im = np.rollaxis(im, 2)
+        im = np.rollaxis(im, 2, 1)
+        ga = np.rollaxis(ga, 2)
+        ga = np.rollaxis(ga, 2, 1)
+        return im, ga, cells
+    offset_cor = xcal.OffsetCorrection(sensor_size, offset, nCells=memory_cells, blockSize=block_size, gains=[0,1,2])
+    offset_cor.debug()
+    hist_calc = xcal.HistogramCalculator(sensor_size, bins=2000, range=(0, 2000), blockSize=block_size)
+    hist_calc.debug()
+    hist_calc_uncorr = xcal.HistogramCalculator(sensor_size, bins=2000, range=(0, 2000), blockSize=block_size)
+    hist_calc_uncorr.debug()
+    for run in runs:
+        for seq in sequences[:2]:
+            fname = fbase.format(run, run.upper(), channel, seq)
+            d, ga, c = read_fun(fname, channel)
+            # we need to do proper gain thresholding
+            vidx = (c < offset.shape[2]) & (c < thresholds.shape[2])
+            c = c[vidx]
+            d = d[:,:,vidx]
+            ga = ga[:,:,vidx]
+            g = np.zeros(ga.shape, np.uint8)
+            g[...] = 2
+            g[ga < thresholds[...,c,1]] = 1
+            g[ga < thresholds[...,c,0]] = 0
+            d = offset_cor.correct(d, cellTable=c, gainMap=g)
+            for i in range(d.shape[2]):
+                mn_noise = np.nanmean(noise[...,c[i]])
+                d[...,i] = baseline_correct_via_noise(d[...,i],
+                                                      mn_noise,
+                                                      g[..., i])
+            d *= np.moveaxis(pc[c,...], 0, 2)
+            hist_calc_uncorr.fill(d)
+            d = (d-gains[..., 1][...,None])/gains[..., 0][...,None]
+            #d = d/gains[..., 0][...,None]
+            hist_calc.fill(d)
+    h, e, c, _ = hist_calc.get()
+    hu  = hist_calc_uncorr.get()
+    return h, e, c, hu[0]
+inp = []
+idx = 0
+for i in modules:
+    qm = "Q{}M{}".format(i//4+1, i%4+1)
+    inp.append((i, offset_g[qm], thresholds_g[qm], res_gain[idx], noise_g[qm][...,0], pc_g[qm][0,...]))
+    idx += 1
+p = partial(hist_single_module, fbase, runs, sequences, IL_MODE, profile, limit_trains, memory_cells, rawversion, instrument)
+#res = view.map_sync(p, inp)
+res = list(map(p, inp))
+```
+%% Cell type:code id: tags:
+``` python
+from iminuit import Minuit
+from iminuit.util import make_func_code, describe
+from IPython.display import HTML, display
+import tabulate
+# fitting
+par_ests = {}
+par_ests["mu0"] = 0
+par_ests["mu1"] = 50
+par_ests["mu2"] = 100
+par_ests["limit_mu0"] = [-5, 5]
+par_ests["limit_mu1"] = [35, 65]
+par_ests["limit_mu2"] = [100, 150]
+par_ests["s0"] = 10
+par_ests["s1"] = 10
+par_ests["s2"] = 10
+par_ests["limit_A0"] = [1e5, 1e12]
+par_ests["limit_A1"] = [1e4, 1e10]
+par_ests["limit_A2"] = [1e3, 1e10]
+par_ests["throw_nan"] = False
+par_ests["pedantic"] = False
+par_ests["print_level"] = 1
+def gaussian3(x, mu0, s0, A0, mu1, s1, A1, mu2, s2, A2):
+    return (A0/np.sqrt(2*np.pi*s0**2)*np.exp(-0.5*((x-mu0)/s0)**2) +
+            A1/np.sqrt(2*np.pi*s1**2)*np.exp(-0.5*((x-mu1)/s1)**2) +
+            A2/np.sqrt(2*np.pi*s2**2)*np.exp(-0.5*((x-mu2)/s2)**2))
+f_sig = describe(gaussian3)[1:]
+class _Chi2Functor:
+    def __init__(self, f, x, y, err):
+        self.f = f
+        self.x = x
+        self.y = y
+        self.err = err
+        f_sig = describe(f)
+        # this is how you fake function
+        # signature dynamically
+        self.func_code = make_func_code(
+            f_sig[1:])  # docking off independent variable
+        self.func_defaults = None  # this keeps numpy.vectorize happy
+    def __call__(self, *arg):
+        # notice that it accept variable length
+        # positional arguments
+        # chi2 = sum((y-self.f(x,*arg))**2 for x,y in zip(self.x,self.y))
+        return np.sum(((self.f(self.x, *arg) - self.y) ** 2) / self.err)
+d = []
+y_range_max = 0
+table = []
+headers = ['Module',
+           'FWHM (cor.) [ADU]', 'Separation (cor.) [$\sigma$]',
+           'FWHM (uncor.) [ADU]', 'Separation (uncor.) [$\sigma$]',
+           'Improvement'
+          ]
+for i, r in enumerate(res):
+    qm = "Q{}M{}".format(i//4+1, i%4+1)
+    row = []
+    row.append(qm)
+    h, e, c, hu = r
+    d.append({
+            'x': c,
+            'y': h,
+            'drawstyle': 'steps-mid',
+            'label': '{}: corrected'.format(qm),
+            'marker': '.',
+            'color': 'blue',
+        })
+    idx = (h > 0) & np.isfinite(h)
+    h_non_zero = h[idx]
+    c_non_zero = c[idx]
+    par_ests["A0"] = np.float(h[np.argmin(abs(c-0))])
+    par_ests["A1"] = np.float(h[np.argmin(abs(c-50))])
+    par_ests["A2"] = np.float(h[np.argmin(abs(c-100))])
+    wrapped = _Chi2Functor(gaussian3, c_non_zero, h_non_zero,
+                                     np.sqrt(h_non_zero))
+    m = Minuit(wrapped, **par_ests)
+    fmin = m.migrad()
+    xt = np.arange(0, 200)
+    yt = gaussian3(xt, m.values['mu0'],  m.values['s0'],  m.values['A0'],
+                   m.values['mu1'],  m.values['s1'],  m.values['A1'],
+                   m.values['mu2'],  m.values['s2'],  m.values['A2'])
+    d.append({
+            'x': xt,
+            'y': yt,
+            'label': '{}: corrected (fit)'.format(qm),
+            'color': 'green',
+            'drawstyle': 'line',
+            'linewidth': 2
+        })
+    d.append({
+            'x': c,
+            'y': hu,
+            'drawstyle': 'steps-mid',
+            'label': '{}: uncorrected'.format(qm),
+            'marker': '.',
+            'color': 'grey',
+            'alpha': 0.5
+        })
+    row += [m.values['s1']*2.35, (m.values['mu1']-m.values['mu0'])/m.values['s1']]
+    idx = (hu > 0) & np.isfinite(hu)
+    h_non_zero = hu[idx]
+    c_non_zero = c[idx]
+    wrapped = _Chi2Functor(gaussian3, c_non_zero, h_non_zero,
+                                     np.sqrt(h_non_zero))
+    m = Minuit(wrapped, **par_ests)
+    fmin = m.migrad()
+    xt = np.arange(0, 200)
+    yt = gaussian3(xt, m.values['mu0'],  m.values['s0'],  m.values['A0'],
+                   m.values['mu1'],  m.values['s1'],  m.values['A1'],
+                   m.values['mu2'],  m.values['s2'],  m.values['A2'])
+    d.append({
+            'x': xt,
+            'y': yt,
+            'label': '{}: uncorrected (fit)'.format(qm),
+            'color': 'red',
+            'linewidth': 2
+        })
+    row += [m.values['s1']*2.35, (m.values['mu1']-m.values['mu0'])/m.values['s1']]
+    row.append("{:0.2f} %".format(100*(row[3]/row[1]-1)))
+    y_range_max = max(y_range_max, np.max(h[c>25])*1.5)
+    # output table
+    table.append(row)
+fig = xana.simplePlot(d, y_log=False, figsize="2col",
+                      aspect=2,
+                      x_range=(0, 200),
+                      legend='top-right-frame',
+                      y_range=(0, y_range_max),
+                      x_label='Energy (ADU)',
+                      y_label='Counts')
+display(HTML(tabulate.tabulate(table, tablefmt='html', headers=headers,
+                               numalign="right", floatfmt="0.2f")))
+```
+%% Cell type:code id: tags:
+``` python
+```
+%% Cell type:markdown id: tags:
+# Gain Characterization (Flat Fields) #
+The following code characterizes the gain of the AGIPD detector from flat field data, i.e. data with X-rays of defined intensity. This data should fullfil the following requirements:
+* intensity should be such that single photon peaks are visible
+* data for all modules should be present
+* no shadowing should occur on any of the modules
+* each pixel should have at minimum arround 100 events per photon peak per memory cell
+* if central beam edges are visible, they should not be significantly more intense
+Characterization is done by a weighted average algorithm, which evaluates the peak locations for all pixels
+and memory cells of a given module. These locations are then fitted to a linear function of the average peak
+location in each module, such that it yield a relative gain correction.
+%% Cell type:code id: tags:
+``` python
+# the following lines should be adjusted depending on data
+in_folder = '/gpfs/exfel/exp/MID/201931/p900091/raw/' # path to input data, required
+modules = [3] # modules to work on, required, range allowed
+out_folder = "/gpfs/exfel/exp/MID/201931/p900091/usr/FF/2.2" # path to output to, required
+runs = [484, 485,] # runs to use, required, range allowed
+sequences = [0,1,2,3]#,4,5,6,7,8] #,5,6,7,8,9,10] # sequences files to use, range allowed
+cluster_profile = "noDB" # The ipcluster profile to use
+local_output = True # output constants locally
+db_output = False # output constants to database
+bias_voltage = 300 # detector bias voltage
+cal_db_interface = "tcp://max-exfl016:8026#8035"  # the database interface to use
+mem_cells = 0  # number of memory cells used
+interlaced = False # assume interlaced data format, for data prior to Dec. 2017
+fit_hook = True # fit a hook function to medium gain slope
+rawversion = 2 # RAW file format version
+instrument = "MID"
+photon_energy = 9.2 # the photon energy in keV
+offset_store = "" # for file-baed access
+high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h
+db_input = True # retreive data from calibration database, setting offset-store will overwrite this
+deviation_threshold = 0.75 # peaks with an absolute location deviation larger than the medium are are considere bad
+acqrate = 0. # acquisition rate
+use_dir_creation_date = True
+creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC.ms e.g. 2019-07-04 11:02:41.00
+gain_setting = 0.1 # gain setting can have value 0 or 1, Default=0.1 for no (None) gain-setting
+karabo_da_control = "AGIPD1MCTRL00" # karabo DA for control infromation
+h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information
+```
+%% Cell type:code id: tags:
+``` python
+# std library imports
+from datetime import datetime
+import dateutil
+from functools import partial
+import warnings
+warnings.filterwarnings('ignore')
+import os
+import h5py
+# numpy and matplot lib specific
+import numpy as np
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+%matplotlib inline
+# parallel processing via ipcluster
+# make sure a cluster is running with ipcluster start --n=32, give it a while to start
+from ipyparallel import Client
+client = Client(profile=cluster_profile)
+view = client[:]
+view.use_dill()
+# pyDetLib imports
+import XFELDetAna.xfelpycaltools as xcal
+import XFELDetAna.xfelpyanatools as xana
+from XFELDetAna.util import env
+env.iprofile = cluster_profile
+profile = cluster_profile
+from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions
+from cal_tools.agipdlib import get_num_cells, get_acq_rate, get_gain_setting
+from cal_tools.enums import BadPixels
+from cal_tools.influx import InfluxLogger
+from cal_tools.plotting import show_overview, plot_badpix_3d
+from cal_tools.tools import gain_map_files, parse_runs, run_prop_seq_from_path, get_notebook_name, get_dir_creation_date, get_random_db_interface
+```
+%% Cell type:code id: tags:
+``` python
+# usually no need to change these lines
+sensor_size = [128, 512]
+block_size = [128, 512]
+QUADRANTS = 4
+MODULES_PER_QUAD = 4
+DET_FILE_INSET = "AGIPD"
+# Define constant creation time.
+if creation_time:
+    try:
+        creation_time = datetime.strptime(creation_time, '%Y-%m-%d %H:%M:%S.%f')
+    except Exception as e:
+        print(f"creation_time value error: {e}."
+               "Use same format as YYYY-MM-DD HR:MN:SC.ms e.g. 2019-07-04 11:02:41.00/n")
+        creation_time = None
+        print("Given creation time wont be used.")
+else:
+    creation_time = None
+if not creation_time and use_dir_creation_date:
+    ntries = 100
+    while ntries > 0:
+        try:
+            creation_time = get_dir_creation_date(in_folder, runs[0])
+            break
+        except OSError:
+            pass
+        ntries -= 1
+print("Using {} as creation time".format(creation_time))
+runs = parse_runs(runs)
+if offset_store != "":
+    db_input = False
+else:
+    db_input = True
+limit_trains = 20
+limit_trains_eval = None
+print("Parameters are:")
+print("Runs: {}".format(runs))
+print("Modules: {}".format(modules))
+print("Sequences: {}".format(sequences))
+print("Using DB: {}".format(db_output))
+if instrument == "SPB":
+    loc = "SPB_DET_AGIPD1M-1"
+    dinstance = "AGIPD1M1"
+    karabo_id_control = "SPB_IRU_AGIPD1M1"
+else:
+    loc = "MID_DET_AGIPD1M-1"
+    dinstance = "AGIPD1M2"
+    karabo_id_control = "MID_EXP_AGIPD1M1"
+cal_db_interface = get_random_db_interface(cal_db_interface)
+# these lines can usually stay as is
+fbase = "{}/{{}}/RAW-{{}}-AGIPD{{:02d}}-S{{:05d}}.h5".format(in_folder)
+gains = np.arange(3)
+run, prop, seq = run_prop_seq_from_path(in_folder)
+logger = InfluxLogger(detector="AGIPD", instrument=instrument, mem_cells=mem_cells,
+                      notebook=get_notebook_name(), proposal=prop)
+# extract the memory cells and acquisition rate from
+# the file of the first module, first sequence and first run
+channel = 0
+fname = fbase.format(runs[0], runs[0].upper(), channel, sequences[0])
+if acqrate == 0.:
+    acqrate = get_acq_rate(fname, loc, channel)
+if mem_cells == 0:
+    cells = get_num_cells(fname, loc, channel)
+    mem_cells = cells  # avoid setting twice
+IL_MODE = interlaced
+max_cells = mem_cells if not interlaced else mem_cells*2
+memory_cells = mem_cells
+print("Interlaced mode: {}".format(IL_MODE))
+cells = np.arange(max_cells)
+print(f"Acquisition rate and memory cells set from file {fname} are "
+      f"{acqrate} MHz and {max_cells}, respectively")
+```
+%% Cell type:code id: tags:
+``` python
+control_fname = f'{in_folder}/{runs[0]}/RAW-{runs[0].upper()}-{karabo_da_control}-S00000.h5'
+if "{" in h5path_ctrl:
+    h5path_ctrl = h5path_ctrl.format(karabo_id_control)
+if gain_setting == 0.1:
+    if creation_time.replace(tzinfo=None) < dateutil.parser.parse('2020-01-31'):
+        print("Set gain-setting to None for runs taken before 2020-01-31")
+        gain_setting = None
+    else:
+        try:
+            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
+        except Exception as e:
+            print(f'Error while reading gain setting from: \n{control_fname}')
+            print(e)
+            print("Gain setting is not found in the control information")
+            print("Data will not be processed")
+            sequences = []
+print(f"Gain setting: {gain_setting}")
+```
+%% Cell type:markdown id: tags:
+For the characterization offset maps for each module are needed. The are read in either locally or through the XFEL calibration database.
+%% Cell type:code id: tags:
+``` python
+from dateutil import parser
+offset_g = {}
+noise_g = {}
+thresholds_g = {}
+pc_g = {}
+if not db_input:
+    print("Offset, noise and thresholds have been read in from: {}".format(offset_store))
+    store_file = h5py.File(offset_store, "r")
+    for i in modules:
+        qm = "Q{}M{}".format(i//4+1, i%4+1)
+        offset_g[qm] = np.array(store_file["{}/Offset/0/data".format(qm)])
+        noise_g[qm] = np.array(store_file["{}/Noise/0/data".format(qm)])
+        thresholds_g[qm] = np.array(store_file["{}/Threshold/0/data".format(qm)])
+    store_file.close()
+else:
+    print("Offset, noise and thresholds have been read in from calibration database:")
+    first_ex = True
+    for i in modules:
+        qm = "Q{}M{}".format(i//4+1, i%4+1)
+        metadata = ConstantMetaData()
+        offset = Constants.AGIPD.Offset()
+        metadata.calibration_constant = offset
+        det = getattr(Detectors, dinstance)
+        # set the operating condition
+        condition = Conditions.Dark.AGIPD(memory_cells=max_cells, bias_voltage=bias_voltage,
+                                          acquisition_rate=acqrate, gain_setting=gain_setting)
+        metadata.detector_condition = condition
+        # specify the a version for this constant
+        if creation_time is None:
+            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
+        else:
+            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
+        metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=300000)
+        offset_g[qm] = offset.data
+        if first_ex:
+            print("Offset map was injected on: {}".format(metadata.calibration_constant_version.begin_at))
+        metadata = ConstantMetaData()
+        noise = Constants.AGIPD.Noise()
+        metadata.calibration_constant = noise
+        # set the operating condition
+        condition = Conditions.Dark.AGIPD(memory_cells=max_cells, bias_voltage=bias_voltage,
+                                          acquisition_rate=acqrate, gain_setting=gain_setting)
+        metadata.detector_condition = condition
+        # specify the a version for this constant
+        if creation_time is None:
+            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
+        else:
+            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
+        metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=300000)
+        noise_g[qm] = noise.data
+        if first_ex:
+            print("Noise map was injected on: {}".format(metadata.calibration_constant_version.begin_at))
+        metadata = ConstantMetaData()
+        thresholds = Constants.AGIPD.ThresholdsDark()
+        metadata.calibration_constant = thresholds
+        # set the operating condition
+        condition = Conditions.Dark.AGIPD(memory_cells=max_cells, bias_voltage=bias_voltage,
+                                          acquisition_rate=acqrate, gain_setting=gain_setting)
+        metadata.detector_condition = condition
+        # specify the a version for this constant
+        if creation_time is None:
+            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
+        else:
+            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
+        metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=300000)
+        thresholds_g[qm] = thresholds.data
+        if first_ex:
+            print("Threshold map was injected on: {}".format(metadata.calibration_constant_version.begin_at))
+        metadata = ConstantMetaData()
+        pc = Constants.AGIPD.SlopesPC()
+        metadata.calibration_constant = pc
+        # set the operating condition
+        condition = Conditions.Dark.AGIPD(memory_cells=max_cells, bias_voltage=bias_voltage,
+                                          acquisition_rate=acqrate, gain_setting=gain_setting)
+        metadata.detector_condition = condition
+        # specify the a version for this constant
+        if creation_time is None:
+            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
+        else:
+            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
+        metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=300000)
+        pc_g[qm] = np.nanmedian(pc.data[0,...])/pc.data
+        if first_ex:
+            print("PC map was injected on: {}".format(metadata.calibration_constant_version.begin_at))
+            first_ex = False
+```
+%% Cell type:markdown id: tags:
+## Initial peak estimates ##
+First initial peak estimates need to be defined. This is done by inspecting histograms created from (a subset of) the offset-corrected data for peak locations and peak heights.
+%% Cell type:code id: tags:
+``` python
+def hist_single_module(fbase, runs, sequences, sensor_size, memory_cells, block_size,
+                       il_mode, limit_trains, profile, rawversion, instrument, inp):
+    """ This function calculates a per-pixel histogram for a single module
+    Runs and sequences give the data to calculate histogram from
+    """
+    channel, offset, thresholds, pc, noise = inp
+    import XFELDetAna.xfelpycaltools as xcal
+    import numpy as np
+    import h5py
+    from XFELDetAna.util import env
+    env.iprofile = profile
+    def baseline_correct_via_noise(d, noise, g, baseline_corr_noise_threshold=1000):
+        """ Correct baseline shifts by evaluating position of the noise peak
+        :param: d: the data to correct, should be a single image
+        :param noise: the mean noise for the cell id of `d`
+        :param g: gain array matching `d` array
+        Correction is done by identifying the left-most (significant) peak
+        in the histogram of `d` and shifting it to be centered on 0.
+        This is done via a continous wavelet transform (CWT), using a Ricker
+        wavelet.
+        Only high gain data is evaulated, and data larger than 50 ADU,
+        equivalent of roughly a 9 keV photon is ignored.
+        Two passes are executed: the first shift is accurate to 10 ADU and
+        will catch baseline shifts smaller then -2000 ADU. CWT is evaluated
+        for peaks of widths one, three and five time the noise.
+        The baseline is then shifted by the position of the summed CWT maximum.
+        In a second pass histogram is evaluated within a range of
+        +- 5*noise of the initial shift value, for peak of width noise.
+        Baseline shifts larger than the maximum threshold or positive shifts
+        larger 10 are discarded, and the original data is returned, otherwise
+        the shift corrected data is returned.
+        """
+        import copy
+        from scipy.signal import cwt, ricker
+        # we work on a copy to be able to filter values by setting them to
+        # np.nan
+        dd = copy.copy(d)
+        dd[g != 0] = np.nan  # only high gain data
+        dd[dd > 50] = np.nan  # only noise data
+        h, e = np.histogram(dd, bins=210, range=(-2000, 100))
+        c = (e[1:] + e[:-1]) / 2
+        try:
+            cwtmatr = cwt(h, ricker, [noise, 3. * noise, 5. * noise])
+        except:
+            return d
+        cwtall = np.sum(cwtmatr, axis=0)
+        marg = np.argmax(cwtall)
+        pc = c[marg]
+        high_res_range = (dd > pc - 5 * noise) & (dd < pc + 5 * noise)
+        dd[~high_res_range] = np.nan
+        h, e = np.histogram(dd, bins=200,
+                            range=(pc - 5 * noise, pc + 5 * noise))
+        c = (e[1:] + e[:-1]) / 2
+        try:
+            cwtmatr = cwt(h, ricker, [noise, ])
+        except:
+            return d
+        marg = np.argmax(cwtmatr)
+        pc = c[marg]
+        # too large shift to be easily decernable via noise
+        if pc > 10 or pc < -baseline_corr_noise_threshold:
+            return d
+        return d - pc
+    # function needs to be inline for parallell processing
+    def read_fun(filename, channel):
+        """ A reader function used by pyDetLib
+        """
+        infile = h5py.File(filename, "r", driver="core")
+        if rawversion == 2:
+            count = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)])
+            first = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first".format(instrument, channel)])
+            last_index = int(first[count != 0][-1]+count[count != 0][-1])
+            first_index = int(first[count != 0][0])
+        else:
+            status = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)])
+            if np.count_nonzero(status != 0) == 0:
+                return
+            last = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last".format(instrument, channel)])
+            last_index = int(last[status != 0][-1])
+            first_index = int(last[status != 0][0])
+        if limit_trains is not None:
+            last_index = min(limit_trains*memory_cells+first_index, last_index)
+        im = np.array(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(instrument, channel)][first_index:last_index,...])
+        carr = infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId".format(instrument, channel)][first_index:last_index]
+        cells = np.squeeze(np.array(carr))
+        infile.close()
+        if il_mode:
+            ga = im[1::2, 0, ...]
+            im = im[0::2, 0, ...].astype(np.float32)
+        else:
+            ga = im[:, 1, ...]
+            im = im[:, 0, ...].astype(np.float32)
+        im = np.rollaxis(im, 2)
+        im = np.rollaxis(im, 2, 1)
+        ga = np.rollaxis(ga, 2)
+        ga = np.rollaxis(ga, 2, 1)
+        return im, ga, cells
+    offset_cor = xcal.OffsetCorrection(sensor_size,
+                                       offset,
+                                       nCells=memory_cells,
+                                       blockSize=block_size,
+                                       gains=[0,1,2])
+    offset_cor.mapper = offset_cor._view.map_sync
+    offset_cor.debug() # force non-parallel processing since outer function will run concurrently
+    hist_calc = xcal.HistogramCalculator(sensor_size,
+                                         bins=4000,
+                                         range=(-4000, 8000),
+                                         blockSize=block_size)
+    hist_calc.mapper = hist_calc._view.map_sync
+    hist_calc.debug() # force non-parallel processing since outer function will run concurrently
+    for run in runs:
+        for seq in sequences:
+            fname = fbase.format(run, run.upper(), channel, seq)
+            d, ga, c = read_fun(fname, channel)
+            vidx = (c < offset.shape[2]) & (c < thresholds.shape[2])
+            c = c[vidx]
+            d = d[:,:,vidx]
+            ga = ga[:,:,vidx]
+            # we need to do proper gain thresholding
+            g = np.zeros(ga.shape, np.uint8)
+            g[...] = 2
+            g[ga < thresholds[...,c,1]] = 1
+            g[ga < thresholds[...,c,0]] = 0
+            d = offset_cor.correct(d, cellTable=c, gainMap=g)
+            for i in range(d.shape[2]):
+                mn_noise = np.nanmean(noise[...,c[i]])
+                d[...,i] = baseline_correct_via_noise(d[...,i],
+                                                      mn_noise,
+                                                      g[..., i])
+            d *= np.moveaxis(pc[c,...], 0, 2)
+            hist_calc.fill(d)
+    h, e, c, _ = hist_calc.get()
+    return h, e, c
+inp = []
+for i in modules:
+    qm = "Q{}M{}".format(i//4+1, i%4+1)
+    inp.append((i, offset_g[qm], thresholds_g[qm], pc_g[qm][0,...], noise_g[qm][...,0]))
+p = partial(hist_single_module, fbase, runs, sequences,
+            sensor_size, memory_cells, block_size, IL_MODE, limit_trains, profile, rawversion, instrument)
+res_uncorr = list(map(p, inp))
+```
+%% Cell type:code id: tags:
+``` python
+d = []
+qms = []
+for i, r in enumerate(res_uncorr):
+    ii = list(modules)[i]
+    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
+    qms.append(qm)
+    h, e, c = r
+    d.append({
+            'x': c,
+            'y': h,
+            'drawstyle': 'steps-mid'
+        })
+fig = xana.simplePlot(d, y_log=False,
+                      figsize="2col",
+                      aspect=2,
+                      x_range=(-50, 500),
+                      x_label="Intensity (ADU)",
+                      y_label="Counts")
+fig.savefig("{}/FF_module_{}_peak_pos.png".format(out_folder, ",".join(qms)))
+```
+%% Cell type:code id: tags:
+``` python
+# these should be quite stable
+peak_estimates = [0, 55, 105, 155]
+peak_heights = [5e7, 5e6, 1e6, 5e5]
+peak_sigma = [5., 5.,5., 5.,]
+print("Using the following peak estimates: {}".format(peak_estimates))
+```
+%% Cell type:markdown id: tags:
+## Calculate relative gain per module ##
+Using the so obtained estimates, we calculate the relative gain per module. For this we use the weighted average method implemented in pyDetLib.
+%% Cell type:code id: tags:
+``` python
+block_size = [64, 64]
+def relgain_single_module(fbase, runs, sequences, peak_estimates,
+                          peak_heights, peak_sigma, memory_cells, sensor_size,
+                          block_size, il_mode, profile, limit_trains_eval, rawversion, instrument, inp):
+    """ A function for calculated the relative gain of a single AGIPD module
+    """
+    # import needed inline for parallel processing
+    import XFELDetAna.xfelpycaltools as xcal
+    import numpy as np
+    import h5py
+    from XFELDetAna.util import env
+    env.iprofile = profile
+    channel, offset, thresholds, noise, pc = inp
+    def baseline_correct_via_noise(d, noise, g, baseline_corr_noise_threshold=1000):
+        """ Correct baseline shifts by evaluating position of the noise peak
+        :param: d: the data to correct, should be a single image
+        :param noise: the mean noise for the cell id of `d`
+        :param g: gain array matching `d` array
+        Correction is done by identifying the left-most (significant) peak
+        in the histogram of `d` and shifting it to be centered on 0.
+        This is done via a continous wavelet transform (CWT), using a Ricker
+        wavelet.
+        Only high gain data is evaulated, and data larger than 50 ADU,
+        equivalent of roughly a 9 keV photon is ignored.
+        Two passes are executed: the first shift is accurate to 10 ADU and
+        will catch baseline shifts smaller then -2000 ADU. CWT is evaluated
+        for peaks of widths one, three and five time the noise.
+        The baseline is then shifted by the position of the summed CWT maximum.
+        In a second pass histogram is evaluated within a range of
+        +- 5*noise of the initial shift value, for peak of width noise.
+        Baseline shifts larger than the maximum threshold or positive shifts
+        larger 10 are discarded, and the original data is returned, otherwise
+        the shift corrected data is returned.
+        """
+        import copy
+        from scipy.signal import cwt, ricker
+        # we work on a copy to be able to filter values by setting them to
+        # np.nan
+        dd = copy.copy(d)
+        dd[g != 0] = np.nan  # only high gain data
+        dd[dd > 50] = np.nan  # only noise data
+        h, e = np.histogram(dd, bins=210, range=(-2000, 100))
+        c = (e[1:] + e[:-1]) / 2
+        try:
+            cwtmatr = cwt(h, ricker, [noise, 3. * noise, 5. * noise])
+        except:
+            return d
+        cwtall = np.sum(cwtmatr, axis=0)
+        marg = np.argmax(cwtall)
+        pc = c[marg]
+        high_res_range = (dd > pc - 5 * noise) & (dd < pc + 5 * noise)
+        dd[~high_res_range] = np.nan
+        h, e = np.histogram(dd, bins=200,
+                            range=(pc - 5 * noise, pc + 5 * noise))
+        c = (e[1:] + e[:-1]) / 2
+        try:
+            cwtmatr = cwt(h, ricker, [noise, ])
+        except:
+            return d
+        marg = np.argmax(cwtmatr)
+        pc = c[marg]
+        # too large shift to be easily decernable via noise
+        if pc > 10 or pc < -baseline_corr_noise_threshold:
+            return d
+        return d - pc
+    # function needs to be inline for parallell processing
+    def read_fun(filename, channel):
+        """ A reader function used by pyDetLib
+        """
+        infile = h5py.File(filename, "r", driver="core")
+        if rawversion == 2:
+            count = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)])
+            first = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first".format(instrument, channel)])
+            last_index = int(first[count != 0][-1]+count[count != 0][-1])
+            first_index = int(first[count != 0][0])
+        else:
+            status = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)])
+            if np.count_nonzero(status != 0) == 0:
+                return
+            last = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last".format(instrument, channel)])
+            last_index = int(last[status != 0][-1])
+            first_index = int(last[status != 0][0])
+        if limit_trains is not None:
+            last_index = min(limit_trains*memory_cells+first_index, last_index)
+        im = np.array(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(instrument, channel)][first_index:last_index,...])
+        carr = infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId".format(instrument, channel)][first_index:last_index]
+        cells = np.squeeze(np.array(carr))
+        infile.close()
+        if il_mode:
+            ga = im[1::2, 0, ...]
+            im = im[0::2, 0, ...].astype(np.float32)
+        else:
+            ga = im[:, 1, ...]
+            im = im[:, 0, ...].astype(np.float32)
+        im = np.rollaxis(im, 2)
+        im = np.rollaxis(im, 2, 1)
+        ga = np.rollaxis(ga, 2)
+        ga = np.rollaxis(ga, 2, 1)
+        return im, ga, cells
+    offset_cor = xcal.OffsetCorrection(sensor_size, offset, nCells=memory_cells,
+                                       blockSize=block_size, gains=[0,1,2])
+    offset_cor.mapper = offset_cor._view.map_sync
+    rel_gain = xcal.GainMapCalculator(sensor_size,
+                                      peak_estimates,
+                                      peak_sigma,
+                                      nCells=memory_cells,
+                                      peakHeights = peak_heights,
+                                      noiseMap=noise,
+                                      deviationType="relative")
+    rel_gain.mapper = rel_gain._view.map_sync
+    for run in runs:
+        for seq in sequences:
+            fname = fbase.format(run, run.upper(), channel, seq)
+            d, ga, c = read_fun(fname, channel)
+            # we need to do proper gain thresholding
+            vidx = (c < offset.shape[2]) & (c < thresholds.shape[2])
+            c = c[vidx]
+            d = d[:,:,vidx]
+            ga = ga[:,:,vidx]
+            # we need to do proper gain thresholding
+            g = np.zeros(ga.shape, np.uint8)
+            g[...] = 2
+            g[ga < thresholds[...,c,1]] = 1
+            g[ga < thresholds[...,c,0]] = 0
+            d = offset_cor.correct(d, cellTable=c, gainMap=g)
+            for i in range(d.shape[2]):
+                mn_noise = np.nanmean(noise[...,c[i]])
+                d[...,i] = baseline_correct_via_noise(d[...,i],
+                                                      mn_noise,
+                                                      g[..., i])
+            d *= np.moveaxis(pc[c,...], 0, 2)
+            rel_gain.fill(d, cellTable=c)
+    gain_map = rel_gain.get()
+    gain_map_func = rel_gain.getUsingFunc(inverse=False)
+    pks, stds = rel_gain.getRawPeaks()
+    return gain_map, pks, stds, gain_map_func
+inp = []
+for i in modules:
+    qm = "Q{}M{}".format(i//4+1, i%4+1)
+    inp.append((i, offset_g[qm], thresholds_g[qm], noise_g[qm][...,0], pc_g[qm][0,...]))
+start = datetime.now()
+p = partial(relgain_single_module, fbase, runs, sequences,
+            peak_estimates, peak_heights, peak_sigma, memory_cells,
+            sensor_size, block_size, IL_MODE, profile, limit_trains_eval, rawversion, instrument)
+res_gain = list(map(p, inp)) # don't run concurently as inner function are parelllized
+duration = (datetime.now()-start).total_seconds()
+logger.runtime_summary_entry(success=True, runtime=duration)
+logger.send()
+```
+%% Cell type:markdown id: tags:
+Finally, we inspect the results, by plotting the number of entries, peak locations and resulting gain maps for each peak. In the course of doing so, we identify bad pixels by either having 0 entries for a peak, or having `nan` values for the peak location.
+%% Cell type:code id: tags:
+``` python
+gain_m = {}
+flatsff = {}
+gainoff_g = {}
+entries_g = {}
+peaks_g = {}
+mask_g = {}
+cell_to_preview = 4
+masks_eval_peaks = [1, 2]
+global_eval_peaks = [1]
+global_meds = {}
+for i, r in enumerate(res_gain):
+    ii = list(modules)[i]
+    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
+    gain, pks, std, gfunc = r
+    gains, errors, chisq, valid, max_dev, stats = gfunc
+    _, entries, stds, sow = gain
+    gain_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells))
+    gain_mdb = np.zeros((gains.shape[0], gains.shape[1], memory_cells))
+    entries_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells, 5))
+    gainoff_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells))
+    mask_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells), np.uint32)
+    gainoff_g[qm] = gainoff_db
+    gain_m[qm] = gain_mdb
+    entries_g[qm] = entries_db
+    peaks_g[qm] = pks
+    # create a mask for unregular pixels
+    # first bit set if first peak has nan entries
+    for pk in masks_eval_peaks:
+        mask_db[~(np.isfinite(gain_mdb) & np.isfinite(gainoff_db))] |= BadPixels.FF_GAIN_EVAL_ERROR.value
+        mask_db[(np.abs(1-gain_mdb/np.nanmedian(gain_mdb) > deviation_threshold) )] |= BadPixels.FF_GAIN_DEVIATION.value
+    # second bit set if entries are 0 for first peak
+    mask_db[entries[...,1] == 0] |= BadPixels.FF_NO_ENTRIES.value
+    zero_oc = np.count_nonzero((entries > 0).astype(np.int), axis=3)
+    mask_db[zero_oc <= len(peak_estimates)-2] |= BadPixels.FF_NO_ENTRIES.value
+    # third bit set if entries of a given adc show significant noise
+    stds = []
+    for ii in range(8):
+        for jj in range(8):
+            stds.append(np.std(entries_db[ii*16:(ii+1)*16,jj*64+2:(jj+1)*64-2,:,1], axis=(0,1)))
+    avg_stds = np.median(np.array(stds), axis=0)
+    for ii in range(8):
+        for jj in range(8):
+            std = np.std(entries_db[ii*16:(ii+1)*16,jj*64+2:(jj+1)*64-2,:,1], axis=(0,1))
+            if np.any(std > 5*avg_stds):
+                mask_db[ii*16:(ii+1)*16,jj*64:(jj+1)*64,std > 5*avg_stds] |= BadPixels.FF_GAIN_DEVIATION
+    mask_g[qm] = mask_db
+    flat = np.zeros((gains.shape[0], gains.shape[1], memory_cells, 3))
+    for j in range(2,len(peak_estimates)):
+        flat[...,j-2] = np.mean(entries[...,j]/np.mean(entries[...,j]))
+    flat = np.mean(flat, axis=3)
+    #flat_db = np.zeros((gains.shape[0], gains.shape[1], memory_cells))
+    #for j in range(2):
+    #    flat_db[...,j::2] = flat
+    flatsff[qm] = flat
+    global_meds[qm] = np.nanmedian(pks[...,global_eval_peaks][np.max(mask_db, axis=2) != 0])
+```
+%% Cell type:markdown id: tags:
+## Evaluated peak locations ##
+The following plot shows the evaluated peak locations for each peak. Peak ids increase downward:
+%% Cell type:code id: tags:
+``` python
+from mpl_toolkits.axes_grid1 import AxesGrid
+cell_to_preview = 4
+for qm, data in peaks_g.items():
+    print("The module shown is: {}".format(qm))
+    print("The cell shown is: {}".format(cell_to_preview))
+    print("Evaluated peaks: {}".format(data.shape[-1]))
+    fig = plt.figure(figsize=(15,15))
+    grid = AxesGrid(fig, 111,
+                    nrows_ncols=(data.shape[-1], 1),
+                    axes_pad=0.1,
+                    share_all=True,
+                    label_mode="L",
+                    cbar_location="right",
+                    cbar_mode="each",
+                    cbar_size="7%",
+                    cbar_pad="2%")
+    for j in range(data.shape[-1]):
+        d = data[...,cell_to_preview,j]
+        d[~np.isfinite(d)] = 0
+        im = grid[j].imshow(d, interpolation="nearest", vmin=0.9*np.nanmedian(d), vmax=max(1.1*np.nanmedian(d), 50))
+        cb = grid.cbar_axes[j].colorbar(im)
+        cb.set_label_text("Peak location (ADU)")
+```
+%% Cell type:markdown id: tags:
+## Gain Slopes And Offsets ##
+The gain slopes and offsets are deduced by fitting a linear function $y = mx + b$ to the evaluated peaks. Gains are normalized to all pixels and all memory cells of a module by using the average peak locations a $x$ values. Thus slopes centered around 1 are to be expected.
+%% Cell type:code id: tags:
+``` python
+for i, r in enumerate(res_gain):
+    ii = list(modules)[i]
+    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
+    gain, pks, std, gfunc = r
+    gains, errors, chisq, valid, max_dev, stats = gfunc
+    _, entries, stds, sow = gain
+    fig = plt.figure(figsize=(15,8))
+    ax = fig.add_subplot(211)
+    im = ax.imshow(gains[...,0], interpolation="nearest", vmin=0.85, vmax=1.15)
+    cb = fig.colorbar(im)
+    cb.set_label("Gain slope m")
+    fig.savefig("{}/gain_m_mod{}.png".format(out_folder, qm))
+    ax = fig.add_subplot(212)
+    ax.hist(gains[...,0].flatten(), range=(0.5, 1.5), bins=100)
+    ax.set_ylabel("Occurences")
+    ax.set_xlabel("Gain slope m")
+    ax.semilogy()
+```
+%% Cell type:markdown id: tags:
+The gain offsets b are expected to be centered around 0 and minimal, as data is already offset substracted.
+%% Cell type:code id: tags:
+``` python
+for i, r in enumerate(res_gain):
+    ii = list(modules)[i]
+    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
+    gain, pks, std, gfunc = r
+    gains, errors, chisq, valid, max_dev, stats = gfunc
+    _, entries, stds, sow = gain
+    fig = plt.figure(figsize=(15,8))
+    ax = fig.add_subplot(211)
+    im = ax.imshow(gains[...,1], interpolation="nearest", vmin=-25, vmax=25)
+    cb = fig.colorbar(im)
+    cb.set_label("Gain offset b")
+    fig.savefig("{}/gain_b_mod{}.png".format(out_folder, qm))
+    ax = fig.add_subplot(212)
+    ax.hist(gains[...,1].flatten(), range=(-25, 25), bins=100)
+    ax.set_ylabel("Occurences")
+    ax.set_xlabel("Gain offset b")
+    ax.semilogy()
+```
+%% Cell type:markdown id: tags:
+## Bad Pixels ##
+Bad pixels are defined as any of the following criteria:
+* the gain evaluation did not converge, or location of noise peak deviates by more than than the deviation threshold from the median location.
+* the number of entries for the noise peak in 0.
+* the standard deviation of the number of entries is larger than 5 times the standard deviation for the ASIC the pixel is on.
+%% Cell type:code id: tags:
+``` python
+print("The deviation threshold is: {}".format(deviation_threshold))
+```
+%% Cell type:code id: tags:
+``` python
+for i, r in enumerate(res_gain):
+    ii = list(modules)[i]
+    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
+    mask_db = mask_g[qm]
+    fig = plt.figure(figsize=(15,8))
+    ax = fig.add_subplot(211)
+    im = ax.imshow(np.log2(mask_db[...,cell_to_preview]), interpolation="nearest", vmin=0, vmax=32)
+    cb = fig.colorbar(im)
+    cb.set_label("Mask value")
+    fig.savefig("{}/mask_mod{}.png".format(out_folder, qm))
+    ax = fig.add_subplot(212)
+    ax.hist(np.log2(mask_db.flatten()), range=(0, 32), bins=32, normed=True)
+    ax.set_ylabel("Occurences")
+    ax.set_xlabel("Mask value")
+    ax.semilogy()
+```
+%% Cell type:code id: tags:
+``` python
+cols = {BadPixels.FF_GAIN_EVAL_ERROR.value: (BadPixels.FF_GAIN_EVAL_ERROR.name, '#FF000080'),
+        BadPixels.FF_GAIN_DEVIATION.value: (BadPixels.FF_GAIN_DEVIATION.name, '#0000FF80'),
+        BadPixels.FF_NO_ENTRIES.value: (BadPixels.FF_NO_ENTRIES.name, '#00FF0080'),
+        BadPixels.FF_GAIN_EVAL_ERROR.value |
+        BadPixels.FF_GAIN_DEVIATION.value: ('EVAL+DEV', '#DD00DD80'),
+        BadPixels.FF_GAIN_EVAL_ERROR.value |
+        BadPixels.FF_NO_ENTRIES.value: ('EVAL+NO_ENTRY', '#00DDDD80'),
+        BadPixels.FF_GAIN_DEVIATION.value |
+        BadPixels.FF_NO_ENTRIES.value: ('DEV+NO_ENTRY', '#DDDD0080'),
+       }
+rebin = 32 if not high_res_badpix_3d else 2
+gain = 0
+for mod, data in mask_g.items():
+    plot_badpix_3d(data, cols, title=mod, rebin_fac=rebin)
+```
+%% Cell type:code id: tags:
+``` python
+if local_output:
+    ofile = "{}/agipd_gain_store_{}_modules_{}.h5".format(out_folder,
+                                                          "_".join([str(r) for r in runs]),
+                                                          "_".join([str(m) for m in modules]))
+    store_file = h5py.File(ofile, "w")
+    for i, r in enumerate(res_gain):
+        ii = list(modules)[i]
+        qm = "Q{}M{}".format(ii//4+1, ii%4+1)
+        gain, pks, std, gfunc = r
+        gains, errors, chisq, valid, max_dev, stats = gfunc
+        gainmap, entires, stds, sow = gain
+        store_file["/{}/Gain/0/data".format(qm)] = gains[...,0]
+        store_file["/{}/GainOffset/0/data".format(qm)] = gains[...,1]
+        store_file["/{}/Flat/0/data".format(qm)] = flatsff[qm]
+        store_file["/{}/Entries/0/data".format(qm)] = entires
+        store_file["/{}/BadPixels/0/data".format(qm)] = mask_g[qm]
+    store_file.close()
+```
+%% Cell type:code id: tags:
+``` python
+proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
+file_loc = proposal + ' ' + ' '.join(list(map(str,runs)))
+```
+%% Cell type:code id: tags:
+``` python
+if db_output:
+    for i, r in enumerate(res_gain):
+        ii = list(modules)[i]
+        qm = "Q{}M{}".format(ii//4+1, ii%4+1)
+        gain, pks, std, gfunc = r
+        gains, errors, chisq, valid, max_dev, stats = gfunc
+        gainmap, entires, stds, sow = gain
+        det = getattr(Detectors, dinstance)
+        device = getattr(det, qm)
+        # gains related
+        metadata = ConstantMetaData()
+        cgain = Constants.AGIPD.SlopesFF()
+        cgain.data = gains
+        metadata.calibration_constant = cgain
+        # set the operating condition
+        condition = Conditions.Illuminated.AGIPD(memory_cells, bias_voltage, 9.2,
+                                                 pixels_x=512, pixels_y=128, beam_energy=None,
+                                                 acquisition_rate=acqrate, gain_setting=gain_setting)
+        metadata.detector_condition = condition
+        # specify the a version for this constant
+        if creation_time is None:
+            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
+        else:
+            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
+        metadata.calibration_constant_version.raw_data_location = file_loc
+        metadata.send(cal_db_interface, timeout=300000)
+        # bad pixels
+        metadata = ConstantMetaData()
+        bp = Constants.AGIPD.BadPixelsFF()
+        bp.data = mask_g[qm]
+        metadata.calibration_constant = bp
+        # set the operating condition
+        condition = Conditions.Illuminated.AGIPD(memory_cells, bias_voltage, 9.2,
+                                                 pixels_x=512, pixels_y=128, beam_energy=None,
+                                                 acquisition_rate=acqrate, gain_setting=gain_setting)
+        metadata.detector_condition = condition
+        # specify the a version for this constant
+        if creation_time is None:
+            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))
+        else:
+            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)
+        metadata.calibration_constant_version.raw_data_location = file_loc
+        metadata.send(cal_db_interface, timeout=300000)
+```
+%% Cell type:markdown id: tags:
+## Sanity check ##
+Finally, we perform a correction of the data used to derive the gain constants with said constants. We expected an improvement in peak FWHM, if constants have been deduced correctly.
+%% Cell type:code id: tags:
+``` python
+def hist_single_module(fbase, runs, sequences, il_mode, profile, limit_trains, memory_cells, rawversion, instrument, inp):
+    channel, offset, thresholds, relgain, noise, pc = inp
+    gain, pks, std, gfunc = relgain
+    gains, errors, chisq, valid, max_dev, stats = gfunc
+    import XFELDetAna.xfelpycaltools as xcal
+    import numpy as np
+    import h5py
+    import copy
+    from XFELDetAna.util import env
+    env.iprofile = profile
+    sensor_size = [128, 512]
+    block_size = sensor_size
+    def baseline_correct_via_noise(d, noise, g, baseline_corr_noise_threshold=1000):
+        """ Correct baseline shifts by evaluating position of the noise peak
+        :param: d: the data to correct, should be a single image
+        :param noise: the mean noise for the cell id of `d`
+        :param g: gain array matching `d` array
+        Correction is done by identifying the left-most (significant) peak
+        in the histogram of `d` and shifting it to be centered on 0.
+        This is done via a continous wavelet transform (CWT), using a Ricker
+        wavelet.
+        Only high gain data is evaulated, and data larger than 50 ADU,
+        equivalent of roughly a 9 keV photon is ignored.
+        Two passes are executed: the first shift is accurate to 10 ADU and
+        will catch baseline shifts smaller then -2000 ADU. CWT is evaluated
+        for peaks of widths one, three and five time the noise.
+        The baseline is then shifted by the position of the summed CWT maximum.
+        In a second pass histogram is evaluated within a range of
+        +- 5*noise of the initial shift value, for peak of width noise.
+        Baseline shifts larger than the maximum threshold or positive shifts
+        larger 10 are discarded, and the original data is returned, otherwise
+        the shift corrected data is returned.
+        """
+        import copy
+        from scipy.signal import cwt, ricker
+        # we work on a copy to be able to filter values by setting them to
+        # np.nan
+        dd = copy.copy(d)
+        dd[g != 0] = np.nan  # only high gain data
+        dd[dd > 50] = np.nan  # only noise data
+        h, e = np.histogram(dd, bins=210, range=(-2000, 100))
+        c = (e[1:] + e[:-1]) / 2
+        try:
+            cwtmatr = cwt(h, ricker, [noise, 3. * noise, 5. * noise])
+        except:
+            return d
+        cwtall = np.sum(cwtmatr, axis=0)
+        marg = np.argmax(cwtall)
+        pc = c[marg]
+        high_res_range = (dd > pc - 5 * noise) & (dd < pc + 5 * noise)
+        dd[~high_res_range] = np.nan
+        h, e = np.histogram(dd, bins=200,
+                            range=(pc - 5 * noise, pc + 5 * noise))
+        c = (e[1:] + e[:-1]) / 2
+        try:
+            cwtmatr = cwt(h, ricker, [noise, ])
+        except:
+            return d
+        marg = np.argmax(cwtmatr)
+        pc = c[marg]
+        # too large shift to be easily decernable via noise
+        if pc > 10 or pc < -baseline_corr_noise_threshold:
+            return d
+        return d - pc
+    def read_fun(filename, channel):
+        infile = h5py.File(filename, "r", driver="core")
+        if rawversion == 2:
+            count = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)])
+            first = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first".format(instrument, channel)])
+            last_index = int(first[count != 0][-1]+count[count != 0][-1])
+            first_index = int(first[count != 0][0])
+        else:
+            status = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)])
+            if np.count_nonzero(status != 0) == 0:
+                return
+            last = np.squeeze(infile["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last".format(instrument, channel)])
+            last_index = int(last[status != 0][-1])
+            first_index = int(last[status != 0][0])
+        if limit_trains is not None:
+            last_index = min(limit_trains*memory_cells+first_index, last_index)
+        im = np.array(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(instrument, channel)][first_index:last_index,...])
+        carr = infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId".format(instrument, channel)][first_index:last_index]
+        cells = np.squeeze(np.array(carr))
+        infile.close()
+        if il_mode:
+            ga = im[1::2, 0, ...]
+            im = im[0::2, 0, ...].astype(np.float32)
+        else:
+            ga = im[:, 1, ...]
+            im = im[:, 0, ...].astype(np.float32)
+        im = np.rollaxis(im, 2)
+        im = np.rollaxis(im, 2, 1)
+        ga = np.rollaxis(ga, 2)
+        ga = np.rollaxis(ga, 2, 1)
+        return im, ga, cells
+    offset_cor = xcal.OffsetCorrection(sensor_size, offset, nCells=memory_cells, blockSize=block_size, gains=[0,1,2])
+    offset_cor.debug()
+    hist_calc = xcal.HistogramCalculator(sensor_size, bins=2000, range=(0, 2000), blockSize=block_size)
+    hist_calc.debug()
+    hist_calc_uncorr = xcal.HistogramCalculator(sensor_size, bins=2000, range=(0, 2000), blockSize=block_size)
+    hist_calc_uncorr.debug()
+    for run in runs:
+        for seq in sequences[:2]:
+            fname = fbase.format(run, run.upper(), channel, seq)
+            d, ga, c = read_fun(fname, channel)
+            # we need to do proper gain thresholding
+            vidx = (c < offset.shape[2]) & (c < thresholds.shape[2])
+            c = c[vidx]
+            d = d[:,:,vidx]
+            ga = ga[:,:,vidx]
+            g = np.zeros(ga.shape, np.uint8)
+            g[...] = 2
+            g[ga < thresholds[...,c,1]] = 1
+            g[ga < thresholds[...,c,0]] = 0
+            d = offset_cor.correct(d, cellTable=c, gainMap=g)
+            for i in range(d.shape[2]):
+                mn_noise = np.nanmean(noise[...,c[i]])
+                d[...,i] = baseline_correct_via_noise(d[...,i],
+                                                      mn_noise,
+                                                      g[..., i])
+            d *= np.moveaxis(pc[c,...], 0, 2)
+            hist_calc_uncorr.fill(d)
+            d = (d-gains[..., 1][...,None])/gains[..., 0][...,None]
+            #d = d/gains[..., 0][...,None]
+            hist_calc.fill(d)
+    h, e, c, _ = hist_calc.get()
+    hu  = hist_calc_uncorr.get()
+    return h, e, c, hu[0]
+inp = []
+idx = 0
+for i in modules:
+    qm = "Q{}M{}".format(i//4+1, i%4+1)
+    inp.append((i, offset_g[qm], thresholds_g[qm], res_gain[idx], noise_g[qm][...,0], pc_g[qm][0,...]))
+    idx += 1
+p = partial(hist_single_module, fbase, runs, sequences, IL_MODE, profile, limit_trains, memory_cells, rawversion, instrument)
+#res = view.map_sync(p, inp)
+res = list(map(p, inp))
+```
+%% Cell type:code id: tags:
+``` python
+from iminuit import Minuit
+from iminuit.util import make_func_code, describe
+from IPython.display import HTML, display
+import tabulate
+# fitting
+par_ests = {}
+par_ests["mu0"] = 0
+par_ests["mu1"] = 50
+par_ests["mu2"] = 100
+par_ests["limit_mu0"] = [-5, 5]
+par_ests["limit_mu1"] = [35, 65]
+par_ests["limit_mu2"] = [100, 150]
+par_ests["s0"] = 10
+par_ests["s1"] = 10
+par_ests["s2"] = 10
+par_ests["limit_A0"] = [1e5, 1e12]
+par_ests["limit_A1"] = [1e4, 1e10]
+par_ests["limit_A2"] = [1e3, 1e10]
+par_ests["throw_nan"] = False
+par_ests["pedantic"] = False
+par_ests["print_level"] = 1
+def gaussian3(x, mu0, s0, A0, mu1, s1, A1, mu2, s2, A2):
+    return (A0/np.sqrt(2*np.pi*s0**2)*np.exp(-0.5*((x-mu0)/s0)**2) +
+            A1/np.sqrt(2*np.pi*s1**2)*np.exp(-0.5*((x-mu1)/s1)**2) +
+            A2/np.sqrt(2*np.pi*s2**2)*np.exp(-0.5*((x-mu2)/s2)**2))
+f_sig = describe(gaussian3)[1:]
+class _Chi2Functor:
+    def __init__(self, f, x, y, err):
+        self.f = f
+        self.x = x
+        self.y = y
+        self.err = err
+        f_sig = describe(f)
+        # this is how you fake function
+        # signature dynamically
+        self.func_code = make_func_code(
+            f_sig[1:])  # docking off independent variable
+        self.func_defaults = None  # this keeps numpy.vectorize happy
+    def __call__(self, *arg):
+        # notice that it accept variable length
+        # positional arguments
+        # chi2 = sum((y-self.f(x,*arg))**2 for x,y in zip(self.x,self.y))
+        return np.sum(((self.f(self.x, *arg) - self.y) ** 2) / self.err)
+d = []
+y_range_max = 0
+table = []
+headers = ['Module',
+           'FWHM (cor.) [ADU]', 'Separation (cor.) [$\sigma$]',
+           'FWHM (uncor.) [ADU]', 'Separation (uncor.) [$\sigma$]',
+           'Improvement'
+          ]
+for i, r in enumerate(res):
+    qm = "Q{}M{}".format(i//4+1, i%4+1)
+    row = []
+    row.append(qm)
+    h, e, c, hu = r
+    d.append({
+            'x': c,
+            'y': h,
+            'drawstyle': 'steps-mid',
+            'label': '{}: corrected'.format(qm),
+            'marker': '.',
+            'color': 'blue',
+        })
+    idx = (h > 0) & np.isfinite(h)
+    h_non_zero = h[idx]
+    c_non_zero = c[idx]
+    par_ests["A0"] = np.float(h[np.argmin(abs(c-0))])
+    par_ests["A1"] = np.float(h[np.argmin(abs(c-50))])
+    par_ests["A2"] = np.float(h[np.argmin(abs(c-100))])
+    wrapped = _Chi2Functor(gaussian3, c_non_zero, h_non_zero,
+                                     np.sqrt(h_non_zero))
+    m = Minuit(wrapped, **par_ests)
+    fmin = m.migrad()
+    xt = np.arange(0, 200)
+    yt = gaussian3(xt, m.values['mu0'],  m.values['s0'],  m.values['A0'],
+                   m.values['mu1'],  m.values['s1'],  m.values['A1'],
+                   m.values['mu2'],  m.values['s2'],  m.values['A2'])
+    d.append({
+            'x': xt,
+            'y': yt,
+            'label': '{}: corrected (fit)'.format(qm),
+            'color': 'green',
+            'drawstyle': 'line',
+            'linewidth': 2
+        })
+    d.append({
+            'x': c,
+            'y': hu,
+            'drawstyle': 'steps-mid',
+            'label': '{}: uncorrected'.format(qm),
+            'marker': '.',
+            'color': 'grey',
+            'alpha': 0.5
+        })
+    row += [m.values['s1']*2.35, (m.values['mu1']-m.values['mu0'])/m.values['s1']]
+    idx = (hu > 0) & np.isfinite(hu)
+    h_non_zero = hu[idx]
+    c_non_zero = c[idx]
+    wrapped = _Chi2Functor(gaussian3, c_non_zero, h_non_zero,
+                                     np.sqrt(h_non_zero))
+    m = Minuit(wrapped, **par_ests)
+    fmin = m.migrad()
+    xt = np.arange(0, 200)
+    yt = gaussian3(xt, m.values['mu0'],  m.values['s0'],  m.values['A0'],
+                   m.values['mu1'],  m.values['s1'],  m.values['A1'],
+                   m.values['mu2'],  m.values['s2'],  m.values['A2'])
+    d.append({
+            'x': xt,
+            'y': yt,
+            'label': '{}: uncorrected (fit)'.format(qm),
+            'color': 'red',
+            'linewidth': 2
+        })
+    row += [m.values['s1']*2.35, (m.values['mu1']-m.values['mu0'])/m.values['s1']]
+    row.append("{:0.2f} %".format(100*(row[3]/row[1]-1)))
+    y_range_max = max(y_range_max, np.max(h[c>25])*1.5)
+    # output table
+    table.append(row)
+fig = xana.simplePlot(d, y_log=False, figsize="2col",
+                      aspect=2,
+                      x_range=(0, 200),
+                      legend='top-right-frame',
+                      y_range=(0, y_range_max),
+                      x_label='Energy (ADU)',
+                      y_label='Counts')
+display(HTML(tabulate.tabulate(table, tablefmt='html', headers=headers,
+                               numalign="right", floatfmt="0.2f")))
+```
+%% Cell type:code id: tags:
+``` python
+```
--- a/tests/test_agipdutils_ff.py
+++ b/tests/test_agipdutils_ff.py
+import numpy as np
+from cal_tools.agipdutils_ff import get_mask, set_par_limits
+def test_get_mask():
+    fit_summary = {
+        'chi2_ndof': 1.674524751845516,
+        'g0n': 6031.641198873036,
+        'error_g0n': 94.63055028459667,
+        'limit_g0n': np.array([0.0, None]),
+        'fix_g0n': False,
+        'g0mean': -13.711814669099589,
+        'error_g0mean': 0.2532017427306297,
+        'limit_g0mean': np.array([-30,  30]),
+        'fix_g0mean': False,
+        'g0sigma': 13.478502058651568,
+        'error_g0sigma': 0.2588135637661919,
+        'limit_g0sigma': np.array([0, 30]),
+        'fix_g0sigma': False,
+        'g1n': 4337.126861254491,
+        'error_g1n': 69.764180118274,
+        'limit_g1n': np.array([0, None]),
+        'fix_g1n': False,
+        'g1mean': 53.90265411499657,
+        'error_g1mean': 0.27585684670864746,
+        'limit_g1mean': None,
+        'fix_g1mean': False,
+        'g1sigma': 15.687448834904817,
+        'error_g1sigma': 0.2951166525483524,
+        'limit_g1sigma': np.array([0, 35]),
+        'fix_g1sigma': False,
+        'g2n': 1542.531700635003,
+        'error_g2n': 43.20145030604567,
+        'limit_g2n': np.array([0, None]),
+        'fix_g2n': False,
+        'g2mean': 120.98535387591575,
+        'error_g2mean': 0.509566354942716,
+        'limit_g2mean': None,
+        'fix_g2mean': False,
+        'g2sigma': 15.550452880533143,
+        'error_g2sigma': 0.5003254358001863,
+        'limit_g2sigma': np.array([0, 40]),
+        'fix_g2sigma': False,
+        'g3n': 1261189.2282171287,
+        'error_g3n': 1261190.2282163086,
+        'limit_g3n': np.array([0, None]),
+        'fix_g3n': False,
+        'g3mean': 348.68766379647343,
+        'error_g3mean': 17.23872285713375,
+        'limit_g3mean': None,
+        'fix_g3mean': False,
+        'g3sigma': 44.83987230934497,
+        'error_g3sigma': 30.956164693249242,
+        'limit_g3sigma': np.array([0, 45]),
+        'fix_g3sigma': False,
+        'fval': 336.5794751209487,
+        'edm': 0.00011660826330754263,
+        'tolerance': 0.1,
+        'nfcn': 4620,
+        'ncalls': 4620,
+        'up': 1.0,
+        'is_valid': True,
+        'has_valid_parameters': True,
+        'has_accurate_covar': True,
+        'has_posdef_covar': True,
+        'has_made_posdef_covar': False,
+        'hesse_failed': False,
+        'has_covariance': True,
+        'is_above_max_edm': False,
+        'has_reached_call_limit': False}
+    peak_lim = [-30, 30]
+    d0_lim = [10, 80]
+    chi2_lim = [0, 3.0]
+    peak_width_lim = np.array([[0.9, 1.55], [0.95, 1.65]])
+    mask = get_mask(fit_summary, peak_lim, d0_lim, chi2_lim, peak_width_lim)
+    assert mask == 0
+def test_set_par_limits():
+    peak_range = np.array([[-30, 30],
+                           [35, 70],
+                           [95, 135],
+                           [145,  220]])
+    peak_norm_range = np.array([[0.0, None],
+                                [0, None],
+                                [0, None],
+                                [0, None]])
+    peak_width_range = np.array([[0, 30],
+                                 [0, 35],
+                                 [0, 40],
+                                 [0, 45]])
+    parameters = {
+        'g0sigma': 9.620186459204016,
+        'g0n': 5659.0,
+        'g0mean': -3.224686340342817,
+        'g1sigma': 8.149415371586683,
+        'g1n': 3612.0,
+        'g1mean': 54.6281838316722,
+        'g2sigma': 9.830124777667839,
+        'g2n': 1442.0,
+        'g2mean': 114.92510402219139,
+        'g3sigma': 15.336595220228498,
+        'g3n': 474.0,
+        'g3mean': 167.0295358649789}
+    expected = {
+        'g0sigma': 9.620186459204016,
+        'g0n': 5659.0,
+        'g0mean': -3.224686340342817,
+        'g1sigma': 8.149415371586683,
+        'g1n': 3612.0,
+        'g1mean': 54.6281838316722,
+        'g2sigma': 9.830124777667839,
+        'g2n': 1442.0,
+        'g2mean': 114.92510402219139,
+        'g3sigma': 15.336595220228498,
+        'g3n': 474.0,
+        'g3mean': 167.0295358649789,
+        'limit_g0n': np.array([0.0, None]),
+        'limit_g0mean': np.array([-30,  30]),
+        'limit_g0sigma': np.array([0, 30]),
+        'limit_g1n': np.array([0, None]),
+        'limit_g1mean': np.array([35, 70]),
+        'limit_g1sigma': np.array([0, 35]),
+        'limit_g2n': np.array([0, None]),
+        'limit_g2mean': np.array([95, 135]),
+        'limit_g2sigma': np.array([0, 40]),
+        'limit_g3n': np.array([0, None]),
+        'limit_g3mean': np.array([145, 220]),
+        'limit_g3sigma': np.array([0, 45])}
+    set_par_limits(parameters, peak_range, peak_norm_range, peak_width_range)
+    assert parameters.keys() == expected.keys()
+    for key in parameters.keys():
+        if isinstance(parameters[key], np.ndarray):
+            assert np.all(parameters[key] == expected[key])
+        else:
+            assert parameters[key] == expected[key]
--- a/xfel_calibrate/notebooks.py
+++ b/xfel_calibrate/notebooks.py
@@ -21,6 +21,8 @@ notebooks = {
        "FF": {
            "notebook":
                "notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_NBC.ipynb",
+            "dep_notebooks": [
+                "notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_Summary.ipynb"],
            "concurrency": {"parameter": "modules",
                            "default concurrency": 16,
                            "cluster cores": 16},
No results found