add missing h5_ctrl path for gainsetting

240dcf5c · Karim Ahmed · bd677d0e · 240dcf5c
Commit 240dcf5c authored 4 years ago by Karim Ahmed
--- a/notebooks/AGIPD/Chracterize_AGIPD_Gain_PC_NBC.ipynb
+++ b/notebooks/AGIPD/Chracterize_AGIPD_Gain_PC_NBC.ipynb
@@ -36,11 +36,11 @@
   },
   "outputs": [],
   "source": [
-    "in_folder = '/gpfs/exfel/exp/MID/201931/p900091/raw' # path to input data, required\n",
+    "in_folder = '/gpfs/exfel/exp/SPB/202030/p900138/raw/' # path to input data, required\n",
    "modules = [1,] # modules to work on, required, range allowed\n",
-    "out_folder = \"/gpfs/exfel/exp/MID/201931/p900091/usr/PC/4.5_250/\" # path to output to, required\n",
-    "runs = [30, 23, 24, 25, 26, 27, 28, 29] # runs to use, required, range allowed\n",
-    "n_sequences = 3 # number of sequence files, starting for 0 to evaluate\n",
+    "out_folder = \"/gpfs/exfel/data/scratch/ahmedk/test/pc\" # path to output to, required\n",
+    "runs = [466, 467, 468, 469, 470, 471, 472, 473] # runs to use, required, range allowed\n",
+    "n_sequences = 1 # number of sequence files, starting for 0 to evaluate\n",
    "cluster_profile = \"noDB\" # The ipcluster profile to use\n",
    "local_output = True # output constants locally\n",
    "db_output = False # output constants to database\n",
@@ -50,13 +50,14 @@
    "interlaced = False # assume interlaced data format, for data prior to Dec. 2017\n",
    "fit_hook = True # fit a hook function to medium gain slope\n",
    "rawversion = 2 # RAW file format version\n",
-    "instrument = \"MID\"\n",
+    "instrument = \"SPB\"\n",
    "high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h\n",
    "acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine\n",
    "use_dir_creation_date = True\n",
    "creation_time = \"\" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC.ms e.g. 2019-07-04 11:02:41.00\n",
    "gain_setting = 0.1 # gain setting can have value 0 or 1, Default=0.1 for no (None) gain-setting\n",
-    "karabo_da_control = \"AGIPD1MCTRL00\" # karabo DA for control infromation"
+    "karabo_da_control = \"AGIPD1MCTRL00\" # karabo DA for control infromation\n",
+    "h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information"
   ]
  },
  {
@@ -124,9 +125,11 @@
    "if instrument == \"SPB\":\n",
    "    loc = \"SPB_DET_AGIPD1M-1\"\n",
    "    dinstance = \"AGIPD1M1\"\n",
+    "    karabo_id_control = \"SPB_IRU_AGIPD1M1\"\n",
    "else:\n",
    "    loc = \"MID_DET_AGIPD1M-1\"\n",
    "    dinstance = \"AGIPD1M2\"\n",
+    "    karabo_id_control = \"MID_EXP_AGIPD1M1\"\n",
    "print(\"Detector in use is {}\".format(loc))"
   ]
  },
@@ -206,6 +209,9 @@
   "source": [
    "control_fname = f'{in_folder}/r{runs[0]:04d}/RAW-R{runs[0]:04d}-{karabo_da_control}-S00000.h5'\n",
    "\n",
+    "if \"{\" in h5path_ctrl:\n",
+    "    h5path_ctrl = h5path_ctrl.format(karabo_id_control)\n",
+    "\n",
    "if gain_setting == 0.1:\n",
    "    if creation_time.replace(tzinfo=None) < dateutil.parser.parse('2020-01-31'):\n",
    "        print(\"Set gain-setting to None for runs taken before 2020-01-31\")\n",

 %% Cell type:markdown id: tags:

 # Characterize AGIPD Pulse Capacitor Data #

 Author: S. Hauf, Version 1.0

 The following code characterizes AGIPD gain via data take with the pulse capacitor source (PCS). The PCS allows scanning through the high and medium gains of AGIPD, by subsequently intecreasing the number of charge pulses from a on-ASIC capicitor, thus increasing the charge a pixel sees in a given integration time.

 Because induced charge does not originate from X-rays on the sensor, the gains evaluated here will later need to be rescaled with gains deduced from X-ray data.

 PCS data is organized into multiple runs, as the on-ASIC current source cannot supply all pixels of a given module with charge at the same time. Hence, only certain pixel rows will have seen charge for a given image. These rows then first need to be combined into single module images again.

 We then use a K-means clustering algorithm to identify components in the resulting per-pixel data series, matching to three general regions:

 * a high gain slope
 * a transition region, where gain switching occurs
 * a medium gain slope.

 The same regions are present in the gain-bit data and are used to deduce the switching threshold.

 The resulting slopes are then fitted with a linear function and a combination of a linear and exponential decay function to determine the relative gains of the pixels with respect to the module. Additionally, we deduce masks for bad pixels form the data.

 %% Cell type:code id: tags:

 ``` python
-in_folder = '/gpfs/exfel/exp/MID/201931/p900091/raw' # path to input data, required
+in_folder = '/gpfs/exfel/exp/SPB/202030/p900138/raw/' # path to input data, required
 modules = [1,] # modules to work on, required, range allowed
-out_folder = "/gpfs/exfel/exp/MID/201931/p900091/usr/PC/4.5_250/" # path to output to, required
-runs = [30, 23, 24, 25, 26, 27, 28, 29] # runs to use, required, range allowed
-n_sequences = 3 # number of sequence files, starting for 0 to evaluate
+out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/pc" # path to output to, required
+runs = [466, 467, 468, 469, 470, 471, 472, 473] # runs to use, required, range allowed
+n_sequences = 1 # number of sequence files, starting for 0 to evaluate
 cluster_profile = "noDB" # The ipcluster profile to use
 local_output = True # output constants locally
 db_output = False # output constants to database
 bias_voltage = 300 # detector bias voltage
 cal_db_interface = "tcp://max-exfl016:8019"  # the database interface to use
 mem_cells = 0.  # number of memory cells used, use 0 to auto-derive
 interlaced = False # assume interlaced data format, for data prior to Dec. 2017
 fit_hook = True # fit a hook function to medium gain slope
 rawversion = 2 # RAW file format version
-instrument = "MID"
+instrument = "SPB"
 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 use_dir_creation_date = True
 creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC.ms e.g. 2019-07-04 11:02:41.00
 gain_setting = 0.1 # gain setting can have value 0 or 1, Default=0.1 for no (None) gain-setting
 karabo_da_control = "AGIPD1MCTRL00" # karabo DA for control infromation
+h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information
 ```

 %% Cell type:code id: tags:

 ``` python
 # imports, usually no need to change anything here
 from datetime import datetime
 import dateutil.parser
 import h5py
 import os


 import numpy as np
 import matplotlib
 matplotlib.use("Qt4Agg")
 import matplotlib.pyplot as plt
 %matplotlib inline
 # make sure a cluster is running with ipcluster start --n=32, give it a while to start
 from ipyparallel import Client

 view = Client(profile=cluster_profile)[:]
 view.use_dill()

 from functools import partial
 import XFELDetAna.xfelpyanatools as xana
 import warnings
 warnings.filterwarnings('ignore')

 from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions
 from cal_tools.tools import gain_map_files, parse_runs, run_prop_seq_from_path, get_notebook_name, get_dir_creation_date
 from cal_tools.influx import InfluxLogger
 from cal_tools.enums import BadPixels
 from cal_tools.plotting import show_overview, plot_badpix_3d
 from cal_tools.agipdlib import get_acq_rate, get_num_cells, get_gain_setting



 IL_MODE = interlaced
 maxcells = mem_cells if not interlaced else mem_cells*2
 cells = mem_cells
 path_temp = in_folder+"/r{:04d}/"
 image_name_temp = 'RAW-R{:04d}-AGIPD{:02d}-S{:05d}.h5'
 seqs = n_sequences
 print("Parameters are:")
 print("Memory cells: {}/{}".format(cells, maxcells))
 print("Runs: {}".format(runs))
 print("Modules: {}".format(modules))
 print("Sequences: {}".format(seqs))
 print("Interlaced mode: {}".format(IL_MODE))


 run, prop, seq = run_prop_seq_from_path(in_folder)
 logger = InfluxLogger(detector="AGIPD", instrument=instrument, mem_cells=mem_cells,
                      notebook=get_notebook_name(), proposal=prop)

 loc = None
 if instrument == "SPB":
    loc = "SPB_DET_AGIPD1M-1"
    dinstance = "AGIPD1M1"
+    karabo_id_control = "SPB_IRU_AGIPD1M1"
 else:
    loc = "MID_DET_AGIPD1M-1"
    dinstance = "AGIPD1M2"
+    karabo_id_control = "MID_EXP_AGIPD1M1"
 print("Detector in use is {}".format(loc))
 ```

 %% Cell type:markdown id: tags:

 ## Read in data and merge ##

 The number of bursts in each sequence file is determined from the sequence files of the first module.

 %% Cell type:code id: tags:

 ``` python
 run = runs[0]
 bursts_per_file = []
 channel = 0

 for seq in range(seqs):
    fname = os.path.join(path_temp.format(run),
                         image_name_temp.format(run, channel, seq))
    print('Reading ',fname)

    if acq_rate == 0.:
        acq_rate = get_acq_rate(fname, loc, channel)
        print("Acquisition rate set from file: {} MHz".format(acq_rate))

    # Define constant creation time.
    if creation_time:
        try:
            creation_time = datetime.strptime(creation_time, '%Y-%m-%d %H:%M:%S.%f')
        except Exception as e:
            print(f"creation_time value error: {e}."
                   "Use same format as YYYY-MM-DD HR:MN:SC.ms e.g. 2019-07-04 11:02:41.00/n")
            creation_time = None
            print("Given creation time wont be used.")
    else:
        creation_time = None

    if not creation_time and use_dir_creation_date:
        creation_time = get_dir_creation_date(in_folder, run)

    if mem_cells == 0:
        cells = get_num_cells(fname, loc, channel)
        maxcells = cells
        mem_cells = cells  # avoid setting twice
        print("Memory cells set from file: {}".format(cells))

    f = h5py.File(fname, 'r', driver='core')
    if rawversion == 2:
        count = np.squeeze(f["/INDEX/{}/DET/{}CH0:xtdf/image/count".format(loc, channel)])
        bursts_per_file.append(np.count_nonzero(count))
    else:
        status = np.squeeze(f["/INDEX/{}/DET/{}CH0:xtdf/image/status".format(loc, channel)])
        bursts_per_file.append(np.count_nonzero(status != 0))
    f.close()
 bursts_per_file = np.array(bursts_per_file)
 print("Bursts per sequence file are: {}".format(bursts_per_file))
 if creation_time:
    print("Using {} as creation time".format(creation_time.isoformat()))
 ```

 %% Cell type:code id: tags:

 ``` python
 control_fname = f'{in_folder}/r{runs[0]:04d}/RAW-R{runs[0]:04d}-{karabo_da_control}-S00000.h5'

+if "{" in h5path_ctrl:
+    h5path_ctrl = h5path_ctrl.format(karabo_id_control)
+
 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < dateutil.parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
        except Exception as e:
            print(f'Error while reading gain setting from: \n{control_fname}')
            print(e)
            print("Gain setting is not found in the control information")
            print("Data will not be processed")
            sequences = []
 print(f"Gain setting: {gain_setting}")
 ```

 %% Cell type:code id: tags:

 ``` python

 def read_and_merge_module_data(cells, path_temp, image_name_temp,
                               runs, seqs, il_mode, rawversion, instrument, channel):
    import h5py
    import numpy as np
    import os


    def cal_bursts_per_file(run, dseq=0):

        bursts_per_file = []
        channel = 0

        for seq in range(dseq, seqs+dseq):
            #print(run, channel, seq)
            fname = os.path.join(path_temp.format(run),
                                 image_name_temp.format(run, channel, seq))
            #print('Reading ',fname)
            with h5py.File(fname, 'r') as f:
                if rawversion == 2:
                    count = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)][()])
                    bursts_per_file.append(np.count_nonzero(count))
                    del count
                else:
                    status = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)][()])
                    bursts_per_file.append(np.count_nonzero(status != 0))
                    del status
        if bursts_per_file[0] == 0:
            return cal_bursts_per_file(run, dseq=dseq+1)  # late start of daq
        return np.array(bursts_per_file), dseq

    #bursts_per_file = np.hstack([0, bursts_per_file])

    bursts_total = np.max([np.sum(cal_bursts_per_file(run)[0]) for run in runs])

    cfac = 2 if il_mode else 1

    def read_raw_data_file(fname, channel, cells = cells, cells_tot = cells, bursts = 250,
                           skip_first_burst = True, first_burst_length = cells):
        data = None
        cellID_all = None
        with h5py.File(fname, 'r') as f:

            #print('Reading ',fname)
            image_path_temp = 'INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data'.format(instrument, channel)
            cellID_path_temp = 'INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId'.format(instrument, channel)
            if rawversion == 2:
                count = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count".format(instrument, channel)])
                first = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first".format(instrument, channel)])
                last_index = int(first[count != 0][-1]+count[count != 0][-1])
                first_index = int(first[count != 0][0])
            else:
                status = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status".format(instrument, channel)])
                if np.count_nonzero(status != 0) == 0:
                    return
                last = np.squeeze(f["/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last".format(instrument, channel)])
                last_index = int(last[status != 0][-1])
                first_index = int(last[status != 0][0])
            #print(first_index, last_index)
            data = f[image_path_temp][first_index:last_index,...][()]

            cellID_all = np.squeeze(f[cellID_path_temp][first_index:last_index,...][()])
            data = data[cellID_all<cells, ...]

        #bursts = int(data.shape[0]/adcells)
        #print('Bursts: ', bursts)
        analog = np.zeros((bursts - skip_first_burst, cells//cfac, 128, 512))
        digital = np.zeros((bursts - skip_first_burst, cells//cfac, 128, 512))
        cellID = np.zeros(( (bursts - skip_first_burst) * cells))
        offset = skip_first_burst * first_burst_length

        for b in range(min(bursts, data.shape[0]//cells-1)  - skip_first_burst-1):
            try:

                analog[b, : cells//cfac, ...] = np.swapaxes(data[b * cells_tot + offset : b * cells_tot  + cells + offset : cfac,
                                                         0, ...], -1, -2)
                digital[b, : cells//cfac, ...] = np.swapaxes(data[b * cells_tot + cfac - 1 + skip_first_burst * first_burst_length :
                                                          b * cells_tot  + cells + cfac - 1 + offset :cfac, cfac%2, ...], -1, -2)

                cellID[ b * cells : (b  + 1) * cells] = cellID_all[b * cells_tot + offset : b * cells_tot + cells + offset].flatten()
            except:
                #print(b * cells_tot + offset, b * cells_tot  + cells + offset)
                #print(b, offset, cells, data.shape[0]//cells)
                raise AttributeError("Foo")
        return {'analog': analog, 'digital': digital, 'cellID': cellID}


    pc_data = {'analog': np.zeros((bursts_total, cells//cfac, 128, 512)),
               'digital': np.zeros((bursts_total, cells//cfac, 128, 512)),
               'cellID': np.zeros(((bursts_total) * cells))
              }
    pc_data_merged = {'analog': np.zeros((bursts_total, cells//cfac, 128, 512)),
               'digital': np.zeros((bursts_total, cells//cfac, 128, 512)),
               'cellID': np.zeros(((bursts_total) * cells))
              }

    for run_idx, run in enumerate(runs):
        bursts_per_file, dseq = cal_bursts_per_file(run)
        print("Run {}: bursts per file: {} -> {} total".format(run, bursts_per_file, np.sum(bursts_per_file)))
        #Read files in
        last_burst = 0
        for seq in range(dseq, seqs+dseq):
            fname = os.path.join(path_temp.format(run),
                                 image_name_temp.format(run, channel, seq))
            if seq-dseq == 0:
                skip_first_burst = True
            else:
                skip_first_burst = False
            bursts = bursts_per_file[seq-dseq]

            try:
                aa = read_raw_data_file(fname, channel, bursts = bursts,
                                        skip_first_burst = skip_first_burst,
                                        first_burst_length = cells)
                pc_data['analog'][last_burst : last_burst+bursts_per_file[seq-dseq]-skip_first_burst, ...] = aa['analog']
                pc_data['digital'][last_burst : last_burst+bursts_per_file[seq-dseq]-skip_first_burst, ...] = aa['digital']
                pc_data['cellID'][last_burst * cells : (last_burst+bursts_per_file[seq-dseq]-skip_first_burst) * cells, ...] = aa['cellID']

            except Exception as e:
                print(e)
                pc_data['analog'][last_burst : last_burst+bursts_per_file[seq-dseq]-skip_first_burst, ...] = 0
                pc_data['digital'][last_burst : last_burst+bursts_per_file[seq-dseq]-skip_first_burst, ...] = 0
                pc_data['cellID'][last_burst * cells : (last_burst+bursts_per_file[seq-dseq]-skip_first_burst) * cells, ...] = 0
            finally:
                last_burst += bursts_per_file[seq-dseq]-skip_first_burst
        # Copy injected rows
        for row_i in range(8):
            try:
                pc_data_merged['analog'][:,:,row_i * 8 + (7 - run_idx),:] = pc_data['analog'][:bursts_total,:cells//cfac,row_i * 8 + (7 - run_idx),:]
                pc_data_merged['analog'][:,:,64 + row_i * 8 + run_idx ,:] = pc_data['analog'][:bursts_total,:cells//cfac, 64 + row_i * 8 + run_idx,:]
                pc_data_merged['digital'][:,:,row_i * 8 + (7 - run_idx),:] = pc_data['digital'][:bursts_total,:cells//cfac,row_i * 8 + (7 - run_idx),:]
                pc_data_merged['digital'][:,:,64 + row_i * 8 + run_idx ,:] = pc_data['digital'][:bursts_total,:cells//cfac, 64 + row_i * 8 + run_idx,:]
            except:
                pass
        #Check cellIDs
        #Copy cellIDs of first run
        if run_idx == 0:
            pc_data_merged['cellID'][...] = pc_data['cellID'][...]
        #Check cellIDs of all the other runs
        #else:
        #    print('cellID difference:{}'.format(np.sum(pc_data_merged['cellID']-pc_data['cellID'])))
    return pc_data_merged['analog'], pc_data_merged['digital'], pc_data_merged['cellID']

 start = datetime.now()
 p = partial(read_and_merge_module_data, maxcells, path_temp, image_name_temp,
            runs, seqs, IL_MODE, rawversion, instrument)
 # chunk this a bit, so that we don't overuse available memory
 res = list(map(p, modules))
 duration = (datetime.now()-start).total_seconds()
 logger.runtime_summary_entry(success=True, runtime=duration)
 logger.send()
 ```

 %% Cell type:markdown id: tags:

 ## Slope clustering and Fitting ##

 The following two cells contain the actual algorithm logic as well as a preview of a single pixel and memory cells visualizing the data and the concepts.

 We start out with calculating an estimate of the slope in proximity of a given data value. This is done by calculating the slopes of a given value with 15 neighbours and averaging the result. Values are then clustered by these slopes into three regions via a K-means algorithm.

 * for the first region a linear function is fitted to the data, determining the gain slope and offset for the high gain mode.

   $$y = mx + b$$

 * for the second and third region a composite function of the form:

  $$y = A*e^{-(x-O)/C}+mx+b$$

  is fitted, covering both the transition region and the medium gain slope.

 %% Cell type:code id: tags:

 ``` python
 from sklearn.cluster import KMeans
 from iminuit import Minuit
 from iminuit.util import make_func_code, describe

 def calc_m_cluster(x, y):
    scan_range = 15
    ms = np.zeros((x.shape[0], scan_range))
    for i in range(scan_range):
        xdiffs = x - np.roll(x, i+1)
        ydiffs = y - np.roll(y, i+1)
        m = ydiffs/xdiffs
        ms[:,i] = m
    m = np.mean(ms, axis=1)

    k = KMeans(n_clusters=3, n_jobs=-2)
    k.fit(m.reshape(-1, 1))
    ms = []
    for lbl in np.unique(k.labels_):
        xl = x[k.labels_ == lbl]
        xd = np.reshape(xl, (len(xl), 1))
        xdiff = xd - xd.transpose()

        yl = y[k.labels_ == lbl]
        yd = np.reshape(yl, (len(yl), 1))
        ydiff = yd - yd.transpose()
        ms.append(np.mean(np.nanmean(ydiff/xdiff, axis=0)))
    return ms, k.labels_, k.cluster_centers_

 def rolling_window(a, window):
    shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
    strides = a.strides + (a.strides[-1],)
    return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)


 def calc_m_cluster2(x, y, r1=5, r2=0, r3=1.5):
    scan_range = 15
    ms = np.zeros((x.shape[0], scan_range))
    for i in range(scan_range):
        xdiffs = x - np.roll(x, i+1)
        ydiffs = y - np.roll(y, i+1)
        m = ydiffs/xdiffs
        ms[:,i] = m
    m = np.mean(ms, axis=1)
    mm = np.zeros_like(m)
    mm[...] = np.nan
    m[scan_range//2:-scan_range//2+1] = np.mean(rolling_window(m, scan_range),-1)
    reg1 = m > r1
    reg2 = m < r2
    reg3 = (m > r2) & (m < r3)
    reg4 = ~(reg1 | reg2 | reg3)
    labels = [reg1, reg2, reg3, reg4]
    regions = np.zeros_like(x, np.uint8)
    for r, lbl in enumerate(labels):
        regions[lbl] = r
    scan_range = 30
    mregions = np.round(np.mean(rolling_window(regions, scan_range),-1))
    regions[...] = np.nan
    regions[scan_range//2:-scan_range//2+1] = mregions


    labels = [regions == 0, regions == 1, regions == 2, regions == 3]

    idx = np.arange(x.size)
    maxlbl = x.size-1
    for i in range(0, len(labels)-1):
        nidx = labels[i+1]
        if np.any(nidx):
            maxlbl = np.max(idx[nidx])
            cidx = idx > maxlbl
            if np.any(cidx):
                labels[i][cidx] = False

    ms = []
    for lbl in labels:
        xl = x[lbl]
        xd = np.reshape(xl, (len(xl), 1))
        xdiff = xd - xd.transpose()

        yl = y[lbl]
        yd = np.reshape(yl, (len(yl), 1))
        ydiff = yd - yd.transpose()
        ms.append(np.mean(np.nanmean(ydiff/xdiff, axis=0)))

    return ms, labels, None

 def fit_data(fun, x, y, yerr, par_ests):
    par_ests["throw_nan"] = False
    par_ests["pedantic"] = False
    par_ests["print_level"] = 0

    f_sig = describe(fun)[1:]

    class _Chi2Functor:
        def __init__(self, f, x, y, err):
            self.f = f
            self.x = x[y != 0]
            self.y = y[y != 0]
            self.err = err[y != 0]
            f_sig = describe(f)
            # this is how you fake function
            # signature dynamically
            self.func_code = make_func_code(
                f_sig[1:])  # docking off independent variable
            self.func_defaults = None  # this keeps numpy.vectorize happy

        def __call__(self, *arg):
            # notice that it accept variable length
            # positional arguments
            # chi2 = sum((y-self.f(x,*arg))**2 for x,y in zip(self.x,self.y))
            return np.sum(((self.f(self.x, *arg) - self.y) ** 2) / self.err)

    wrapped = _Chi2Functor(fun, x, y, yerr)
    m = Minuit(wrapped, **par_ests)
    fmin = m.migrad()

    return m.values

 def lin_fun(x, m, b):
    return m*x+b

 def hook_fun(x, a, c, o, m, b):
    return a*np.exp(-(x-o)/c)+m*x+b
 ```

 %% Cell type:code id: tags:

 ``` python
 from cal_tools.tools import get_constant_from_db_and_time
 offsets = {}
 noises = {}
 thresholds = {}
 for mod, r in enumerate(res):
    ii = modules[mod]
    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
    det = getattr(Detectors, dinstance)
    offset, when = get_constant_from_db_and_time(getattr(det, qm),
                                                     Constants.AGIPD.Offset(),
                                                     Conditions.Dark.AGIPD(
                                                         memory_cells=mem_cells,
                                                         bias_voltage=bias_voltage, acquisition_rate=acq_rate,
                                                         gain_setting=gain_setting),
                                                         np.zeros((128, 512, mem_cells, 3)),
                                                     cal_db_interface, creation_time=creation_time)
    print("Offset for {} was injected on {}".format(qm, when))
    offsets[mod] = np.array(offset.data)

    noise, when = get_constant_from_db_and_time(getattr(det, qm),
                                                     Constants.AGIPD.Noise(),
                                                     Conditions.Dark.AGIPD(
                                                         memory_cells=mem_cells,
                                                         bias_voltage=bias_voltage, acquisition_rate=acq_rate,
                                                         gain_setting=gain_setting),
                                                         np.zeros((128, 512, mem_cells, 3)),
                                                     cal_db_interface, creation_time=creation_time)
    print("Noise for {} was injected on {}".format(qm, when))
    noises[mod] = np.array(noise.data)

    threshold, when = get_constant_from_db_and_time(getattr(det, qm),
                                                     Constants.AGIPD.ThresholdsDark(),
                                                     Conditions.Dark.AGIPD(
                                                         memory_cells=mem_cells,
                                                         bias_voltage=bias_voltage, acquisition_rate=acq_rate,
                                                         gain_setting=gain_setting),
                                                         np.zeros((128, 512, mem_cells, 3)),
                                                     cal_db_interface, creation_time=creation_time)
    print("Threshold for {} was injected on {}".format(qm, when))
    thresholds[mod] = np.array(threshold.data)
 ```

 %% Cell type:code id: tags:

 ``` python
 test_pixels = []
 tpix_range1 = [(0,16), (0,64)]
 for i in range(*tpix_range1[0]):
    for j in range(*tpix_range1[1]):
        test_pixels.append((j,i))
 test_cells = [4, 38, 64, 128]#, 200, 249]
 tcell = np.array(test_cells)
 tcell = tcell[tcell < mem_cells]
 if tcell.size == 0:
    test_cells = [mem_cells-1]
 else:
    test_cells = tcell.tolist()

 from mpl_toolkits.axes_grid1 import ImageGrid
 for mod, r in enumerate(res):
    dig, ana, cellId = r
    d = []
    d2 = []
    d3 = []

    H = [0, 0, 0, 0]

    ex, ey = None, None
    offset = offsets[mod]
    for pix in test_pixels:
        for cell in test_cells:
            color = np.random.rand(3,1)

            x = np.arange(dig.shape[0])
            y = dig[:,cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]

            ms, labels, centers = calc_m_cluster2(x, y)
            bound = None
            bound_m = None
            markers = ['o','.','x','v']
            colors = ['b', 'r', 'g', 'k']
            ymin = y.min()


            for i, lbl in enumerate(labels):
                if np.any(lbl):
                    #ym = y[lbl]-y[lbl].min()
                    if i == 0:
                        gain = 0
                    else:
                        gain = 1
                    ym = y[lbl] - offset[pix[0], pix[1], cell, gain]
                    #if i != 0:
                    #    ym += y[labels[0]].max()-y[labels[0]].min()
                    h, ex, ey = np.histogram2d(x[lbl], ym, range=((0, 600), (-500, 6000)), bins=(300, 650))
                    H[i] += h



    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111)
    for i in range(3):
        H[i][H[i]==0] = np.nan
    ax.imshow(H[0].T, origin="bottom", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    ax.imshow(H[1].T, origin="bottom", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='spring', alpha=0.7, vmin=0, vmax=100)
    ax.imshow(H[2].T, origin="bottom", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD response (ADU)")
    ax.set_xlabel("PC scan point (#)")
 ```

 %% Cell type:markdown id: tags:

 ### Examples from Pixel Subset ###

 The follwing is an visualization of the clustering and fitting for a subset of pixels. If data significantly mismatches expectations, the clustering and fitting algorithms should fail for this subset:

 * the first plot shows the clustering results for pixels which were sucessfully evaluated
 * the second plot shows the clustering results for pixels which failed to evaluate
 * the third plot shows the fits and fit residuals for the pixel clusters shown in the first plot

 Non-smooth behaviour is an indication that you are errorously processing interleaved data that is not, or vice versa, or have the wrong number of memory cells set.

 We do this twice for different detector regions

 %% Cell type:code id: tags:

 ``` python
 test_pixels = []
 tpix_range1 = [(250,254), (60,64)]
 for i in range(*tpix_range1[0]):
    for j in range(*tpix_range1[1]):
        test_pixels.append((j,i))
 test_cells = [4, 38]
 tcell = np.array(test_cells)
 tcell = tcell[tcell < mem_cells]
 if tcell.size == 0:
    test_cells = [mem_cells-1]
 else:
    test_cells = tcell.tolist()

 for mod, r in enumerate(res):
    dig, ana, cellId = r
    d = []
    d2 = []
    d3 = []
    offset = offsets[mod]
    noise = noises[mod]
    for pix in test_pixels:
        for cell in test_cells:
            color = np.random.rand(3,1)

            x = np.arange(dig.shape[0])
            y = dig[:,cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]

            ms, labels, centers = calc_m_cluster2(x, y)
            bound = None
            bound_m = None
            markers = ['o','.','x','v']
            colors = ['b', 'r', 'g', 'k']
            for i, lbl in enumerate(labels):
                if i == 0:
                    gain = 0
                else:
                    gain = 1
                d.append({'x': x[lbl],
                  'y': y[lbl] - offset[pix[0], pix[1], cell, gain],
                  'marker': markers[i],
                  'color': colors[i],
                  'linewidth': 0
                 })
                #if ms[i] < 0: # slope separating two regions
                #    bound = np.min(x[lbl])
                #    bound_m = ms[i]
            bound = np.min(x[labels[1]])
            bound_m = ms[1]
            if bound is None or bound < 20 and False:
                ya = ana[:,cell, pix[0], pix[1]][vidx]
                msa, labels, centers = calc_m_cluster2(x, ya, 25, -10, 25)
                if np.count_nonzero(labels[0]) > 0:
                    bound = np.min(x[labels[0]])
                    bound_m = ms[3]
                else:
                    avg_g = np.nanmean(ya)
                    bound = np.max(x[y < avg_g])
                    bound_m = ms[3]

            #print(bound)
            # fit linear slope
            xl = x[(x<bound)]
            yl = y[(x<bound)] - offset[pix[0], pix[1], cell, 0]
            parms = {'m': bound_m, 'b': np.min(yl)}

            errors = np.ones(xl.shape)*noise[pix[0], pix[1], cell, 0]
            fitted = fit_data(lin_fun, xl, yl, errors , parms)
            yf = lin_fun(xl, fitted['m'], fitted['b'])
            max_devl = np.max(np.abs((yl-yf)/yl))

            d3.append({'x': xl,
                      'y': yf,
                      'color': 'k',
                      'linewidth': 1,
                       'y2': (yf-yl)/errors
                     })

            # fit hook slope
            if fit_hook:
                idx = (x >= bound) & (y > 0) & np.isfinite(x) & np.isfinite(y)
                xh = x[idx]
                yh = y[idx] - offset[pix[0], pix[1], cell, 1]
                parms = {'m': bound_m/10 if bound_m/10>0.3 else 0.5, 'b': np.min(yh[yh > 0]), 'a': np.max(yh), 'c': 5, 'o': bound-1}
                parms["limit_m"] = [0.3, 1.0]
                parms["limit_c"] = [1., 1000]
                errors = np.ones(xh.shape)*noise[pix[0], pix[1], cell, 1]
                fitted = fit_data(hook_fun, xh, yh, errors, parms)
                yf = hook_fun(xh, fitted['a'], fitted['c'], fitted['o'], fitted['m'], fitted['b'])

                max_devh = np.max(np.abs((yh-yf)/yh))
                #print(fitted)
                d3.append({'x': xh,
                          'y': yf,
                          'color': 'red',
                          'linewidth': 1,
                          'y2': (yf-yh)/errors
                         })

            x = np.arange(ana.shape[0])
            y = ana[:,cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]

            #ms, labels, centers = calc_m_cluster2(x, y, 25, -10, 25)
            threshold = (np.mean(y[labels[0]])+np.mean(y[labels[2]]))/2

            for i, lbl in enumerate(labels):

                d2.append({'x': x[lbl],
                  'y': y[lbl],
                  'marker': markers[i],
                  'color': colors[i],
                  'lw': None

                 })

                d2.append({'x': np.array([x[0], x[-1]]),
                  'y': np.ones(2)*threshold,

                  'color': 'k',
                  'lw': 1

                 })

            #threshold = (np.min(y[x<bound]) + np.max(y[x>=bound]))/2


    fig = xana.simplePlot(d, y_label="PC pixel signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #")
    fig.savefig("{}/module_{}_pixel_plot.png".format(out_folder, modules[mod]))

    fig = xana.simplePlot(d2, y_label="PC gain signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #")
    fig.savefig("{}/module_{}_pixel_plot_gain.png".format(out_folder, modules[mod]))

    fig = xana.simplePlot(d3, secondpanel=True, y_label="PC signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #", y2_label="Residuals ($\sigma$)", y2_range=(-5,5))
    fig.savefig("{}/module_{}_pixel_plot_fits.png".format(out_folder, modules[mod]))
 ```

 %% Cell type:code id: tags:

 ``` python
 test_pixels = []
 tpix_range2 = [(96,128), (32,64)]
 for i in range(*tpix_range2[0]):
    for j in range(*tpix_range2[1]):
        test_pixels.append((j,i))


 for mod, r in enumerate(res):
    dig, ana, cellId = r
    d = []
    d2 = []
    d3 = []
    offset = offsets[mod]
    noise = noises[mod]
    for pix in test_pixels:
        for cell in test_cells:
            color = np.random.rand(3,1)

            x = np.arange(dig.shape[0])
            y = dig[:,cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]

            ms, labels, centers = calc_m_cluster2(x, y)
            bound = None
            bound_m = None
            markers = ['o','.','x','v']
            colors = ['b', 'r', 'g', 'k']
            for i, lbl in enumerate(labels):
                if i == 0:
                    gain = 0
                else:
                    gain = 1
                d.append({'x': x[lbl],
                  'y': y[lbl] - offset[pix[0], pix[1], cell, gain],
                  'marker': markers[i],
                  'color': colors[i],
                  'linewidth': 0
                 })
                #if ms[i] < 0: # slope separating two regions
                #    bound = np.min(x[lbl])
                #    bound_m = ms[i]
            bound = np.min(x[labels[1]])
            bound_m = ms[1]

            # fit linear slope
            xl = x[(x<bound)]
            yl = y[(x<bound)] - offset[pix[0], pix[1], cell, 0]
            errors = np.ones(xl.shape)*noise[pix[0], pix[1], cell, 0]
            parms = {'m': bound_m, 'b': np.min(yl)}
            fitted = fit_data(lin_fun, xl, yl, errors, parms)

            yf = lin_fun(xl, fitted['m'], fitted['b'])
            max_devl = np.max(np.abs((yl-yf)/yl))

            xtt = np.arange(ana.shape[0])
            ytt = ana[:,cell, pix[0], pix[1]]

            vidx = (ytt > 1000) & np.isfinite(ytt)
            xtt = xtt[vidx]
            ytt = ytt[vidx]

            #ms, labels, centers = calc_m_cluster2(x, y, 25, -10, 25)
            threshold = (np.mean(ytt[labels[0]])+np.mean(ytt[labels[2]]))/2

            if threshold > 10000 or threshold < 4000:
                d3.append({'x': xl,
                          'y': yf,
                          'color': 'k',
                          'linewidth': 1,
                           'y2': (yf-yl)/errors
                         })

            # fit hook slope
            if fit_hook:
                idx = (x >= bound) & (y > 0) & np.isfinite(x) & np.isfinite(y)
                xh = x[idx]
                yh = y[idx] - offset[pix[0], pix[1], cell, 1]
                errors = np.ones(xh.shape)*noise[pix[0], pix[1], cell, 1]
                parms = {'m': np.abs(bound_m/10), 'b': np.min(yh[yh > 0]), 'a': np.max(yh), 'c': 5., 'o': bound-1}
                parms["limit_m"] = [0.3, 1.0]
                parms["limit_c"] = [1., 1000]
                fitted = fit_data(hook_fun, xh, yh, errors, parms)
                yf = hook_fun(xh, fitted['a'], fitted['c'], fitted['o'], fitted['m'], fitted['b'])
                max_devh = np.max(np.abs((yh-yf)/yh))
                #print(fitted)
                if threshold > 10000 or threshold < 4000 or fitted['m'] < 0.2:
                    d3.append({'x': xh,
                              'y': yf,
                              'color': 'red',
                              'linewidth': 1,
                              'y2': (yf-yh)/errors
                             })


            if threshold > 10000 or threshold < 4000:
                for i, lbl in enumerate(labels):

                    d2.append({'x': xtt[lbl],
                      'y': ytt[lbl],
                      'marker': markers[i],
                      'color': colors[i],
                      'lw': None

                     })

                    d2.append({'x': np.array([xtt[0], xtt[-1]]),
                      'y': np.ones(2)*threshold,

                      'color': 'k',
                      'lw': 1

                     })

            #threshold = (np.min(y[x<bound]) + np.max(y[x>=bound]))/2


    fig = xana.simplePlot(d, y_label="PC pixel signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #")
    fig.savefig("{}/module_{}_pixel_plot_fail.png".format(out_folder, modules[mod]))

    fig = xana.simplePlot(d2, y_label="PC gain signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #")
    fig.savefig("{}/module_{}_pixel_plot_gain_fail.png".format(out_folder, modules[mod]))

    fig = xana.simplePlot(d3, secondpanel=True, y_label="PC signal (ADU)", figsize='2col', aspect=2,
                         x_label="step #", y2_label="Residuals ($\sigma$)", y2_range=(-5,5))
    fig.savefig("{}/module_{}_pixel_plot_fits_fail.png".format(out_folder, modules[mod]))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Here we perform the calculations in column-parallel for all modules
 def calibrate_single_row(cells, fit_hook, inp):

    from sklearn.cluster import KMeans
    from iminuit import Minuit
    from iminuit.util import make_func_code, describe
    import numpy as np

    yrd, yra, offset, noise = inp

    def rolling_window(a, window):
        shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
        strides = a.strides + (a.strides[-1],)
        return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)


    def calc_m_cluster2(x, y, r1=5, r2=0, r3=1.5):
        scan_range = 15
        ms = np.zeros((x.shape[0], scan_range))
        for i in range(scan_range):
            xdiffs = x - np.roll(x, i+1)
            ydiffs = y - np.roll(y, i+1)
            m = ydiffs/xdiffs
            ms[:,i] = m
        m = np.mean(ms, axis=1)
        mm = np.zeros_like(m)
        mm[...] = np.nan
        m[scan_range//2:-scan_range//2+1] = np.mean(rolling_window(m, scan_range),-1)
        reg1 = m > r1
        reg2 = m < r2
        reg3 = (m > r2) & (m < r3)
        reg4 = ~(reg1 | reg2 | reg3)
        labels = [reg1, reg2, reg3, reg4]
        regions = np.zeros_like(x, np.uint8)
        for r, lbl in enumerate(labels):
            regions[lbl] = r
        scan_range = 30
        mregions = np.round(np.mean(rolling_window(regions, scan_range),-1))
        regions[...] = np.nan
        regions[scan_range//2:-scan_range//2+1] = mregions


        labels = [regions == 0, regions == 1, regions == 2, regions == 3]

        idx = np.arange(x.size)
        maxlbl = x.size-1
        for i in range(0, len(labels)-1):
            nidx = labels[i+1]
            if np.any(nidx):
                maxlbl = np.max(idx[nidx])
                cidx = idx > maxlbl
                if np.any(cidx):
                    labels[i][cidx] = False

        ms = []
        for lbl in labels:
            xl = x[lbl]
            xd = np.reshape(xl, (len(xl), 1))
            xdiff = xd - xd.transpose()

            yl = y[lbl]
            yd = np.reshape(yl, (len(yl), 1))
            ydiff = yd - yd.transpose()
            ms.append(np.mean(np.nanmean(ydiff/xdiff, axis=0)))

        return ms, labels, None

    def fit_data(fun, x, y, yerr, par_ests):
        par_ests["throw_nan"] = False
        par_ests["pedantic"] = False
        par_ests["print_level"] = 0

        f_sig = describe(fun)[1:]

        class _Chi2Functor:
            def __init__(self, f, x, y, err):
                self.f = f
                self.x = x
                self.y = y
                self.err = err
                f_sig = describe(f)
                # this is how you fake function
                # signature dynamically
                self.func_code = make_func_code(
                    f_sig[1:])  # docking off independent variable
                self.func_defaults = None  # this keeps numpy.vectorize happy

            def __call__(self, *arg):
                # notice that it accept variable length
                # positional arguments
                # chi2 = sum((y-self.f(x,*arg))**2 for x,y in zip(self.x,self.y))
                return np.sum(((self.f(self.x, *arg) - self.y) ** 2) / self.err)

        wrapped = _Chi2Functor(fun, x, y, yerr)
        m = Minuit(wrapped, **par_ests)
        fmin = m.migrad()

        return m.values

    def lin_fun(x, m, b):
        return m*x+b

    def hook_fun(x, a, c, o, m, b):
        return a*np.exp(-(x-o)/c)+m*x+b

    # linear slope
    ml = np.zeros(yrd.shape[1:])
    bl = np.zeros(yrd.shape[1:])
    devl = np.zeros(yrd.shape[1:])
    ml[...] = np.nan
    bl[...] = np.nan
    devl[...] = np.nan

    #hook function
    mh = np.zeros(yrd.shape[1:])
    bh = np.zeros(yrd.shape[1:])
    ch = np.zeros(yrd.shape[1:])
    oh = np.zeros(yrd.shape[1:])
    ah = np.zeros(yrd.shape[1:])
    devh = np.zeros(yrd.shape[1:])
    dhm = np.zeros(yrd.shape[1:])
    mh[...] = np.nan
    bh[...] = np.nan
    ch[...] = np.nan
    oh[...] = np.nan
    ah[...] = np.nan
    devh[...] = np.nan
    dhm[...] = np.nan

    # threshold
    thresh = np.zeros(list(yrd.shape[1:])+[3,])
    thresh[...] = np.nan
    failures = []

    for col in range(yrd.shape[-1]):
        try:
            y = yrd[:,col]
            x = np.arange(y.shape[0])

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]

            ms, labels, centers = calc_m_cluster2(x, y)

            bound = np.min(x[labels[1]])
            bound_m = ms[1]

            # fit linear slope
            xl = x[x<bound]
            yl = y[x<bound] - offset[col, 0]
            errors = np.ones(xl.shape)*noise[col, 0]
            parms = {'m': bound_m, 'b': np.min(yl)}
            fitted = fit_data(lin_fun, xl, yl, errors, parms)
            yf = lin_fun(xl, fitted['m'], fitted['b'])
            max_devl = np.median(np.abs((yl-yf)/yl))
            ml[col] = fitted['m']
            bl[col] = fitted['b']
            devl[col] = max_devl
            #if np.any(labels[0]) and np.any(labels[2]):
                #dhm[col] = y[labels[0]].max()-y[labels[2]].min()
            dhml = lin_fun(bound, fitted['m'], fitted['b'])
            # fit hook slope
            if fit_hook:
                idx = (x >= bound) & (y > 0) & np.isfinite(x) & np.isfinite(y)
                xh = x[idx]
                yh = y[idx] - offset[col, 1]
                errors = np.ones(xh.shape)*noise[col, 1]
                parms = {'m': bound_m/10 if bound_m/10 > 0.3 else 0.5, 'b': np.min(yh[yh > 0]), 'a': np.max(yh), 'c': 5., 'o': bound-1}
                parms["limit_m"] = [0.3, 1.0]
                parms["limit_c"] = [1., 1000]
                fitted = fit_data(hook_fun, xh, yh, errors, parms)
                yf = hook_fun(xh, fitted['a'], fitted['c'], fitted['o'], fitted['m'], fitted['b'])
                max_devh = np.median(np.abs((yh-yf)/yh))

                mh[col] = fitted['m']
                bh[col] = fitted['b']
                ah[col] = fitted['a']
                oh[col] = fitted['o']
                ch[col] = fitted['c']
                devh[col] = max_devh
                dhm[col] = bound #(dhml) - lin_fun(bound, fitted['m'], fitted['b'])

            y = yra[:,col]
            x = np.arange(y.shape[0])

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]

            threshold = (np.mean(y[labels[0]])+np.mean(y[labels[2]]))/2
            thresh[col,0] = threshold
            thresh[col,1] = np.mean(y[labels[0]])
            thresh[col,2] = np.mean(y[labels[2]])
        except Exception as e:
            print(e)
            failures.append((col, str(e)))
    del yrd
    del yra
    return thresh, (ml, bl, devl), (mh, bh, ah, oh, ch, devh), failures, dhm

 start = datetime.now()
 fres = {}
 failures = []
 for i, r in enumerate(res):
    offset = offsets[i]
    noise = noises[i]
    ii = modules[i]
    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
    dig, ana, cellId = r


    # linear slope
    ml = np.zeros(dig.shape[1:])
    bl = np.zeros(dig.shape[1:])
    devl = np.zeros(dig.shape[1:])

    #hook function
    mh = np.zeros(dig.shape[1:])
    bh = np.zeros(dig.shape[1:])
    ch = np.zeros(dig.shape[1:])
    oh = np.zeros(dig.shape[1:])
    ah = np.zeros(dig.shape[1:])
    devh = np.zeros(dig.shape[1:])
    dhma = np.zeros(dig.shape[1:])

    # threshold
    thresh = np.zeros(list(dig.shape[1:]))
    thresh_bounds = np.zeros(list(dig.shape[1:])+[2,])

    for cell in range(dig.shape[1]):
        inp = []
        for j in range(dig.shape[2]):
            inp.append((dig[:,cell,j,:], ana[:,cell,j,:], offset[j,:,cell,:], noise[j,:,cell,:]))

        p = partial(calibrate_single_row, cells, fit_hook)
        #print("Running {} tasks in parallel".format(len(inp)))
        frs = view.map_sync(p, inp)
        #frs = list(map(p, inp))

        for j, fr in enumerate(frs):
            threshr, lin, hook, fails, dhm = fr
            mlr, blr, devlr = lin
            mhr, bhr, ahr, ohr, chro, devhr = hook
            failures.append(fails)

            ml[cell,j,:] = mlr
            bl[cell,j,:] = blr
            devl[cell,j,:] = devlr

            mh[cell,j,:] = mhr
            bh[cell,j,:] = bhr
            oh[cell,j,:] = ohr
            ch[cell,j,:] = chro
            ah[cell,j,:] = ahr
            devh[cell,j,:] = devhr
            dhma[cell,j,:] = dhm

            thresh[cell,j,...] = threshr[...,0]
            thresh_bounds[cell,j,...] = threshr[...,1:]

    fres[qm] = {'ml': ml,
                'bl': bl,
                'devl': devl,
                'tresh': thresh,
                'tresh_bounds': thresh_bounds,
                'dhm': dhma}
    if fit_hook:
            fres[qm].update({
                'mh': mh,
                'bh': bh,
                'oh': oh,
                'ch': ch,
                'ah': ah,
                'devh': devh,
               })

 duration = (datetime.now()-start).total_seconds()
 logger.runtime_summary_entry(success=True, runtime=duration)
 logger.send()
 ```

 %% Cell type:markdown id: tags:

 Results of slope fitting from PC runs values are
 distinguished on axis 0 by index:

 0: linear slope - m value
 1: linear slope - b value
 2: linear slope - deviation
 3: hook function - m value
 4: hook function - b value
 5: hook function - o value
 6: hook function - c value
 7: hook function - a value
 8: hook function - deviation

 %% Cell type:code id: tags:

 ``` python
 def slope_dict_to_arr(d):
    key_to_index = {
        "ml": 0,
        "bl": 1,
        "devl": 2,
        "mh": 3,
        "bh": 4,
        "oh": 5,
        "ch": 6,
        "ah": 7,
        "devh": 8,
        "tresh": 9,

    }
    arr = np.zeros([11]+list(d["ml"].shape), np.float32)
    for key, item in d.items():
        if key not in key_to_index:
            continue
        arr[key_to_index[key],...] = item

    return arr
 ```

 %% Cell type:code id: tags:

 ``` python
 from collections import OrderedDict
 bad_pixels = OrderedDict()
 for qm, data in fres.items():
    mask = np.zeros(data['ml'].shape, np.uint32)
    mask[(data['tresh'][...,0] < 50) | (data['tresh'][...,0] > 8500)] |= BadPixels.CI_GAIN_OF_OF_THRESHOLD.value
    mask[(data['devl'] == 0)] |= BadPixels.CI_LINEAR_DEVIATION.value
    mask[(np.abs(data['devl']) > 0.5)] |= BadPixels.CI_LINEAR_DEVIATION.value
    mask[(~np.isfinite(data['devl']))] |= BadPixels.CI_EVAL_ERROR.value
    bad_pixels[qm] = mask
 ```

 %% Cell type:code id: tags:

 ``` python
 if local_output:
    ofile = "{}/agipd_pc_store_{}_{}_{}.h5".format(out_folder, "_".join([str(run) for run in runs]), modules[0], modules[-1])
    store_file = h5py.File(ofile, "w")
    for qm, r in fres.items():
        for key, item in r.items():
            store_file["/{}/{}/0/data".format(qm, key)] = item
        #arr = slope_dict_to_arr(r)
        #store_file["/{}/SlopesPC/0/data".format(qm)] = arr
        store_file["/{}/{}/0/data".format(qm, "BadPixelsPC")] = bad_pixels[qm]
    store_file.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = proposal + ' ' + ' '.join(list(map(str,runs)))
 ```

 %% Cell type:code id: tags:

 ``` python
 if db_output:
    for qm, r in fres.items():
        metadata = ConstantMetaData()
        slopespc = Constants.AGIPD.SlopesPC()
        slopespc.data = slope_dict_to_arr(r)
        metadata.calibration_constant = slopespc

        # set the operating condition
        condition = Conditions.Dark.AGIPD(memory_cells=maxcells, bias_voltage=bias_voltage,
                                          acquisition_rate=acq_rate, gain_setting=gain_setting)
        metadata.detector_condition = condition

        # specify the a version for this constant

        if creation_time is None:
            metadata.calibration_constant_version = Versions.Now(device=getattr(Detectors.AGIPD1M1, qm))
        else:
            metadata.calibration_constant_version = Versions.Timespan(device=getattr(Detectors.AGIPD1M1, qm),
                                                                      start=creation_time)

        metadata.calibration_constant_version.raw_data_location = file_loc
        metadata.send(cal_db_interface)

        # bad pixels

        metadata = ConstantMetaData()
        badpixpc = Constants.AGIPD.BadPixelsPC()
        badpixpc.data = bad_pixels[qm]
        metadata.calibration_constant = badpixpc

        # set the operating condition
        condition = Conditions.Dark.AGIPD(memory_cells=maxcells, bias_voltage=bias_voltage,
                                          acquisition_rate=acq_rate, gain_setting=gain_setting)
        metadata.detector_condition = condition

        # specify the a version for this constant
        if creation_time is None:
            metadata.calibration_constant_version = Versions.Now(device=getattr(Detectors.AGIPD1M1, qm))
        else:
            metadata.calibration_constant_version = Versions.Timespan(device=getattr(Detectors.AGIPD1M1, qm),
                                                                      start=creation_time)
        metadata.calibration_constant_version.raw_data_location = file_loc
        metadata.send(cal_db_interface)
 ```

 %% Cell type:markdown id: tags:

 ## Overview Plots ##

 Each of the following plots represents one of the fit parameters of memory cell 4 on a module:

 For the linear function of the high gain region

   $$y = mx + b$$

 * ml denotes the $m$ parameter
 * bl denotes the $b$ parameter
 * devl denotes the anbsolute relative deviation from linearity.

 For the composite function of the medium gain and transition region

  $$y = A*e^{-(x-O)/C}+mx+b$$

 * oh denotes the $O$ parameter
 * ch denotes the $C$ parameter
 * mh denotes the $m$ parameter
 * bh denotes the $b$ parameter
 * devh denotes the anbsolute relative deviation from the linear part of the function.

 Additionally, the thresholds and bad pixels (mask) are shown.

 Finally, the red and white rectangles indicate the first and second pixel ranges

 %% Cell type:code id: tags:

 ``` python
 import matplotlib.pyplot as plt
 from mpl_toolkits.axes_grid1 import AxesGrid
 import matplotlib.patches as patches

 cell_to_preview = min(59, mem_cells-1)
 for module, data in fres.items():
    fig = plt.figure(figsize=(20,20))
    grid = AxesGrid(fig, 111,
                    nrows_ncols=(7 if fit_hook else 3, 2),
                    axes_pad=(0.9, 0.15),
                    label_mode="1",
                    share_all=True,
                    cbar_location="right",
                    cbar_mode="each",
                    cbar_size="7%",
                    cbar_pad="2%",
                    )


    mask = bad_pixels[module]

    i = 0
    for key, citem in data.items():
        item = citem.copy()
        item[~np.isfinite(item)] = 0
        med = np.nanmedian(item)
        bound = 0.1
        maxcnt = 10
        if med < 0:
            bound = -bound

        while(np.count_nonzero((item < med-bound*med) | (item > med+bound*med))/item.size > 0.01):
            bound *=2
            maxcnt -= 1
            if maxcnt < 0:
                break


        if "bounds" in key:
            d = item[cell_to_preview,...,0]
            im = grid[i].imshow(d, interpolation="nearest",
                               vmin=med-bound*med, vmax=med+bound*med)
        else:
            d = item[cell_to_preview,...]
            im = grid[i].imshow(d, interpolation="nearest",
                               vmin=med-bound*med, vmax=med+bound*med)
        cb = grid.cbar_axes[i].colorbar(im)

        # axes coordinates are 0,0 is bottom left and 1,1 is upper right
        x0, x1 = tpix_range1[0][0], tpix_range1[0][1]
        y0, y1 = tpix_range1[1][0], tpix_range1[1][1]
        p = patches.Rectangle(
            (x0, y0), x1-x0, y1-y0, fill=False, color="red")

        grid[i].add_patch(p)

        x0, x1 = tpix_range2[0][0], tpix_range2[0][1]
        y0, y1 = tpix_range2[1][0], tpix_range2[1][1]
        p = patches.Rectangle(
            (x0, y0), x1-x0, y1-y0, fill=False, color="white")

        grid[i].add_patch(p)

        grid[i].text(20, 50, key, color="w", fontsize=50)

        i += 1

    im = grid[-1].imshow(mask[cell_to_preview,...], interpolation="nearest",
                           vmin=0, vmax=1)
    cb = grid.cbar_axes[-1].colorbar(im)

    grid[-1].text(20, 50, "mask", color="w", fontsize=50)
    fig.savefig("{}/module_{}_PC.png".format(out_folder, module))
 ```

 %% Cell type:markdown id: tags:

 ### Memory Cell dependent behavior of thresholding ###

 %% Cell type:code id: tags:

 ``` python
 toplot = {"tresh": "Gain theshold (ADU)",
          "ml": "Slope (linear)",
          "bl": "Offset (linear) (ADU)"}
 from matplotlib.colors import LogNorm, PowerNorm
 for module, data in fres.items():

    bins = 100

    for typ, label in toplot.items():
        r_hist = np.zeros((mem_cells, bins))
        mask = bad_pixels[module]
        thresh = data[typ]
        hrange = [0.5*np.nanmedian(thresh), 1.5*np.nanmedian(thresh)]
        if hrange[1] < hrange[0]:
            hrange = hrange[::-1]
        for c in range(mem_cells):
            d = thresh[c,...]
            h, e = np.histogram(d.flatten(), bins=bins, range=hrange)
            r_hist[c, :] = h
        fig = plt.figure(figsize=(5,5))
        ax = fig.add_subplot(111)
        im = ax.imshow(r_hist[:,:].T[::-1,:], interpolation="nearest",
                  aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(r_hist)),
                 extent=[0, mem_cells, hrange[0], hrange[1]])
        ax.set_xlabel("Memory cell")
        ax.set_ylabel(label)
        cb = fig.colorbar(im)
        cb.set_label("Counts")
    #fig.savefig("/gpfs/exfel/data/scratch/haufs/test/agipd_gain_threholds.pdf", bbox_inches="tight")
 ```

 %% Cell type:markdown id: tags:

 ## Global Bad Pixel Behaviour ##

 The following plots show the results of bad pixel evaluation for all evaluated memory cells. Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2. This excludes single bad pixels present only in disconnected pixels. Hence, any bad pixels spanning at least 2 pixels in the x/y-plane, or across at least two memory cells are indicated. Colors encode the bad pixel type, or mixed type.

 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.CI_GAIN_OF_OF_THRESHOLD.value: (BadPixels.CI_GAIN_OF_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.CI_EVAL_ERROR.value: (BadPixels.CI_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.CI_GAIN_OF_OF_THRESHOLD.value | BadPixels.OFFSET_OUT_OF_THRESHOLD.value: ('MIXED', '#DD00DD80')}

 rebin = 2 if not high_res_badpix_3d else 1

 gain = 0
 for mod, data in bad_pixels.items():
    plot_badpix_3d(np.moveaxis(data, 0, 2), cols, title=mod, rebin_fac=rebin, azim=60.)
 ```

 %% Cell type:code id: tags:

 ``` python
 one_photon = 55 # ADU
 test_pixels = []
 tpix_range1 = [(0,8), (0,8)]
 for i in range(*tpix_range1[0]):
    for j in range(*tpix_range1[1]):
        test_pixels.append((j,i))
 test_cells = [4, 38, 64, 128, 200, 249]
 tcell = np.array(test_cells)
 tcell = tcell[tcell < mem_cells]
 if tcell.size == 0:
    test_cells = [mem_cells-1]
 else:
    test_cells = tcell.tolist()
 from mpl_toolkits.axes_grid1 import ImageGrid
 for mod, r in enumerate(res):
    dig, ana, cellId = r
    d = []
    d2 = []
    d3 = []

    H = [0, 0, 0, 0]
    H2 = [0, 0, 0, 0]
    Ha = [0, 0, 0, 0]
    ii = modules[mod]
    qm = "Q{}M{}".format(ii//4+1, ii%4+1)
    cdata = fres[qm]
    ex, ey, ea = None, None, None
    medml = np.nanmean(cdata['ml'])
    medmh = np.nanmean(cdata['mh'][cdata['mh']> 0.5])
    offset = offsets[mod]
    threshold = thresholds[mod]

    medth = np.nanmean(threshold[...,0])
    for pix in test_pixels:
        for cell in test_cells:
            color = np.random.rand(3,1)

            x = np.arange(dig.shape[0])
            y = dig[:,cell, pix[0], pix[1]]
            a = ana[:,cell, pix[0], pix[1]]

            vidx = (y > 1000) & np.isfinite(y)
            x = x[vidx]
            y = y[vidx]
            a = a[vidx]

            ms, labels, centers = calc_m_cluster2(x, y)
            bound = None
            bound_m = None
            markers = ['o','.','x','v']
            colors = ['b', 'r', 'g', 'k']
            ymin = y.min()

            amin = a[labels[2]].min()
            for i, lbl in enumerate(labels):

                if np.any(lbl):
                    if i == 0:
                        cm = (cdata['ml'][cell, pix[0], pix[1]]/medml)

                        o = offset[pix[0], pix[1], cell, 0]
                        ym = (y[lbl]-o)/cm

                    elif i >= 1:
                        mh = cdata['mh'][cell, pix[0], pix[1]]
                        ml = cdata['ml'][cell, pix[0], pix[1]]
                        cml = ml/medml
                        cmh = mh/medmh
                        cm = medml/medmh
                        oh = cdata['bh'][cell, pix[0], pix[1]]
                        o = offset[pix[0], pix[1], cell, 1] + oh

                        ym = (y[lbl]-o)/cmh*cm

                        if i == 1:
                            ah = cdata['ah'][cell, pix[0], pix[1]]
                            ch = cdata['ch'][cell, pix[0], pix[1]]
                            ohh = cdata['oh'][cell, pix[0], pix[1]]
                            tx = ch * np.log(ah/(y[lbl]-o))+ohh

                            chook  = (ah*np.exp(-(tx-ohh)/ch) - mh*tx)/cmh*cm

                            ym -= chook

                    h, ex, ey = np.histogram2d(x[lbl], ym/one_photon, range=((0, 600), (0, 15000/one_photon)), bins=(300, 600))
                    H[i] += h

            labels = [a < threshold[pix[0], pix[1], cell,0], a >= threshold[pix[0], pix[1], cell,0]]
            for i, lbl in enumerate(labels):

                if np.any(lbl):
                    if i == 0:
                        cm = (cdata['ml'][cell, pix[0], pix[1]]/medml)

                        o = offset[pix[0], pix[1], cell, 0]
                        ym = (y[lbl]-o)/cm

                    elif i >= 1:
                        mh = cdata['mh'][cell, pix[0], pix[1]]
                        ml = cdata['ml'][cell, pix[0], pix[1]]
                        cml = ml/medml
                        cmh = mh/medmh
                        cm = medml/medmh
                        oh = cdata['bh'][cell, pix[0], pix[1]]
                        o = offset[pix[0], pix[1], cell, 1] + oh

                        ym = (y[lbl]-o)/cmh*cm

                        if i == 1:
                            ah = cdata['ah'][cell, pix[0], pix[1]]
                            ch = cdata['ch'][cell, pix[0], pix[1]]
                            ohh = cdata['oh'][cell, pix[0], pix[1]]
                            tx = ch * np.log(ah/(y[lbl]-o))+ohh

                            chook  = (ah*np.exp(-(tx-ohh)/ch) - mh*tx)/cmh*cm
                            idx = (a[lbl]-amin) < 0
                            ym[idx] -= chook[idx]

                            #ym = a[lbl]-amin

                    h, ex, ey = np.histogram2d(x[lbl], ym/one_photon, range=((0, 600), (0, 15000/one_photon)), bins=(300, 600))
                    H2[i] += h

            labels = [a < threshold[pix[0], pix[1], cell,0], a >= threshold[pix[0], pix[1], cell,0]]
            for i, lbl in enumerate(labels):

                if np.any(lbl):

                    #if i == 0:
                    #    amin = a[lbl].min()
                    #else:
                    #    amin = a[labels[0]].min() #a[labels[1]].min()# /(threshold[pix[0], pix[1], cell,0]/medth)
                    am = a[lbl] - amin
                    h, ex, ea = np.histogram2d(x[lbl], am, range=((0, 600), (-100, 5000)), bins=(300, 400))
                    Ha[i] += h



    fig = plt.figure(figsize=(10,15))
    ax = fig.add_subplot(311)
    for i in range(3):
        H[i][H[i]==0] = np.nan
    ax.imshow(H[0].T, origin="bottom", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    ax.imshow(H[1].T, origin="bottom", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='spring', alpha=0.7, vmin=0, vmax=100)
    ax.imshow(H[2].T, origin="bottom", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD response (ADU)")
    ax.set_xlabel("PC scan point (#)")

    x = np.arange(0, 600)
    ideal = medml*x/one_photon
    ax.plot(x, ideal, color='red')
    ax.plot(x, ideal + np.sqrt(ideal), color='red')
    ax.plot(x, ideal - np.sqrt(ideal), color='red')


    ax = fig.add_subplot(312)
    for i in range(2):
        H2[i][H2[i]==0] = np.nan
    ax.imshow(H2[0].T, origin="bottom", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    ax.imshow(H2[1].T, origin="bottom", extent=[ex[0], ex[-1], ey[0], ey[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD response (ADU)")
    ax.set_xlabel("PC scan point (#)")

    x = np.arange(0, 600)
    ideal = medml*x/one_photon
    ax.plot(x, ideal, color='red')
    ax.plot(x, ideal + np.sqrt(ideal), color='red')
    ax.plot(x, ideal - np.sqrt(ideal), color='red')


    ax = fig.add_subplot(313)
    for i in range(2):
        Ha[i][Ha[i]==0] = np.nan
    ax.imshow(Ha[0].T, origin="bottom", extent=[ex[0], ex[-1], ea[0], ea[-1]],
              aspect='auto', cmap='summer', alpha=0.7, vmin=0, vmax=1000)
    #ax.imshow(Ha[1].T, origin="bottom", extent=[ex[0], ex[-1], ea[0], ea[-1]],
    #          aspect='auto', cmap='spring', alpha=0.7, vmin=0, vmax=100)
    ax.imshow(Ha[1].T, origin="bottom", extent=[ex[0], ex[-1], ea[0], ea[-1]],
              aspect='auto', cmap='winter', alpha=0.7, vmin=0, vmax=1000)
    ax.set_ylabel("AGIPD gain (ADU)")
    ax.set_xlabel("PC scan point (#)")

 ```