Compare revisions

Mikhail Karnevskiy · Mikhail Karnevskiy · Karim Ahmed · Karim Ahmed · Mikhail Karnevskiy · Mikhail Karnevskiy
--- a/cal_tools/cal_tools/tools.py
+++ b/cal_tools/cal_tools/tools.py
@@ -94,7 +94,7 @@ def map_modules_from_folder(in_folder, run, path_template, karabo_da,
                module_files[name].put(abs_fname)
                total_sequences += 1
                sequences_qm[name] += 1
-                total_file_size += path.getsize(filename)
+                total_file_size += path.getsize(abs_fname)

    return (module_files, mod_ids, total_sequences,
            sequences_qm, total_file_size)

--- a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
+++ b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
 %% Cell type:markdown id: tags:

 # AGIPD Offline Correction #

 Author: European XFEL Detector Group, Version: 1.0

 Offline Calibration for the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
+cluster_profile = "noDB"
 in_folder = "/gpfs/exfel/exp/MID/201931/p900107/raw" # the folder to read data from, required
+out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/AGIPD_Corr"  # the folder to output to, required
+sequences =  [-1] # sequences to correct, set to -1 for all, range allowed
+modules = [-1] # modules to correct, set to -1 for all, range allowed
 run = 11 # runs to process, required
-out_folder =  "/gpfs/exfel/data/scratch/karnem/test/AGIPD_Corr"  # the folder to output to, required
+
+karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
+karabo_da = [-1]  # data aggregators
+receiver_id = "{}CH0" # inset for receiver devices
+path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
+h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
+h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images
+h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information
+karabo_id_control = "SPB_IRU_AGIPD1M1" # karabo-id for control device
+karabo_da_control = 'DA02' # karabo DA for control infromation
+
+use_dir_creation_date = True # use the creation data of the input dir for database queries
+cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
+cal_db_timeout = 30000 # in milli seconds
+creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
+
 calfile =  "" # path to calibration file. Leave empty if all data should come from DB
-sequences =  [-1] # sequences to correct, set to -1 for all, range allowed
+nodb = False # if set only file-based constants will be used
 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
+bias_voltage = 300
+acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
+gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
+photon_energy = 9.2 # photon energy in keV
 interlaced = False # whether data is in interlaced layout
 overwrite = True # set to True if existing data should be overwritten
-cluster_profile = "noDB"
 max_pulses = [0, 500, 1] # range list [st, end, step] of maximum pulse indices. 3 allowed maximum list input elements.
 local_input = False
-bias_voltage = 300
-cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
-use_dir_creation_date = True # use the creation data of the input dir for database queries
 sequences_per_node = 2 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
-photon_energy = 9.2 # photon energy in keV
 index_v = 2 # version of RAW index type
-nodb = False # if set only file-based constants will be used
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
-corr_asic_diag = False # if set, diagonal drop offs on ASICs are correted
 melt_snow = "" # if set to "none" snowy pixels are identified and resolved to NaN, if set to "interpolate", the value is interpolated from neighbouring pixels
-cal_db_timeout = 30000 # in milli seconds
 max_cells_db_dark = 0  # set to a value different than 0 to use this value for dark data DB queries
 max_cells_db = 0 # set to a value different than 0 to use this value for DB queries
 chunk_size_idim = 1  # chunking size of imaging dimension, adjust if user software is sensitive to this.
-creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
-instrument = "MID"  # the instrument the detector is installed at, required
 force_hg_if_below = 1000 # set to a value other than 0 to force a pixel into high gain if it's high gain offset subtracted value is below this threshold
 force_mg_if_below = 1000 # set to a value other than 0 to force a pixel into medium gain if it's medium gain offset subtracted value is below this threshold
 mask_noisy_adc = 0.25 # set to a value other than 0 and below 1 to mask entire ADC if fraction of noisy pixels is above
-acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
-gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
-
-h5path_ctrl = '/CONTROL/SPB_IRU_AGIPD1M1/MDL/FPGA_COMP_TEST' # path to control information
-karabo_da_control = 'DA02' # karabo DA for control infromation

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 dont_zero_nans = False # do not zero NaN values in corrected data
 dont_zero_orange = False # do not zero very negative and very large values
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
+corr_asic_diag = False # if set, diagonal drop offs on ASICs are correted

-def balance_sequences(in_folder, run, sequences, sequences_per_node):
+def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
-    return bs(in_folder, run, sequences, sequences_per_node, "AGIPD*")
+    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the herarichy and dependability for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["dont_zero_nans"] = dont_zero_nans
    corr_bools["dont_zero_orange"] = dont_zero_orange

 # Here the herarichy and dependability for data analysis booleans and arguments are defined
 data_analysis_parms = {}
 ```

 %% Cell type:code id: tags:

 ``` python
 import sys
 from collections import OrderedDict

 # make sure a cluster is running with ipcluster start --n=32, give it a while to start
 import os
 import h5py
 import numpy as np
 import matplotlib
 matplotlib.use("agg")
 import matplotlib.pyplot as plt
 from ipyparallel import Client
-print("Connecting to profile {}".format(cluster_profile))
+print(f"Connecting to profile {cluster_profile}")
 view = Client(profile=cluster_profile)[:]
 view.use_dill()

 from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions
-from cal_tools.tools import (gain_map_files, parse_runs, run_prop_seq_from_path, get_notebook_name,
+from cal_tools.tools import (map_modules_from_folder, parse_runs, run_prop_seq_from_path, get_notebook_name,
                                 get_dir_creation_date, get_constant_from_db)
 from cal_tools.agipdlib import get_gain_setting
 from dateutil import parser
 from datetime import timedelta

 il_mode = interlaced
 max_cells = mem_cells
 gains = np.arange(3)
 cells = np.arange(max_cells)

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    offset = parser.parse(creation_date_offset)
    delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
    creation_time += delta
-    print("Using {} as creation time".format(creation_time))
-
-in_folder = "{}/r{:04d}".format(in_folder, run)
+    print(f"Using {creation_time} as creation time")

-print("Working in IL Mode: {}. Actual cells in use are: {}".format(il_mode, max_cells))
+print(f"Working in IL Mode: {il_mode}. Actual cells in use are: {max_cells}")

 if sequences[0] == -1:
    sequences = None

-QUADRANTS = 4
-MODULES_PER_QUAD = 4
-DET_FILE_INSET = "AGIPD"
 CHUNK_SIZE = 250
 MAX_PAR = 32

 if in_folder[-1] == "/":
    in_folder = in_folder[:-1]
 print("Outputting to {}".format(out_folder))

 if not os.path.exists(out_folder):
    os.makedirs(out_folder)
 elif not overwrite:
    raise AttributeError("Output path exists! Exiting")

 import warnings
 warnings.filterwarnings('ignore')

 from cal_tools.agipdlib import SnowResolution
 melt_snow = False if melt_snow == "" else SnowResolution(melt_snow)

 special_opts = blc_noise_threshold, blc_hmatch, melt_snow

-loc = None
+instrument = karabo_id.split("_")[0]
 if instrument == "SPB":
-    loc = "SPB_DET_AGIPD1M-1"
    dinstance = "AGIPD1M1"
 else:
-    loc = "MID_DET_AGIPD1M-1"
    dinstance = "AGIPD1M2"
-print("Detector in use is {}".format(loc))
+```
+
+%% Cell type:code id: tags:
+
+``` python
 ```

 %% Cell type:code id: tags:

 ``` python
 control_fname = '{}/RAW-R{:04d}-{}-S00000.h5'.format(in_folder, run, karabo_da_control)

 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
        except Exception as e:
            print(f'Error while reading gain setting from: \n{control_fname}')
            print(e)
            print("Set gain settion to 0")
            gain_setting = 0

-print("Gain setting: {}".format(gain_setting))
+print(f"Gain setting: {gain_setting}")
+print(f"Detector in use is {karabo_id}")
+print(f"Instrument {instrument}")
+print(f"Detector instance {dinstance}")
+
+if karabo_da[0] == -1:
+    if modules[0] == -1:
+        modules = list(range(16))
+    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
+else:
+    modules = [int(x[-2:]) for x in karabo_da]
+print("Process modules: ",
+      ', '.join([f"Q{x // 4 + 1}M{x % 4 + 1}" for x in modules]))
+
+h5path = h5path.format(karabo_id, receiver_id)
+h5path_idx = h5path_idx.format(karabo_id, receiver_id)
+h5path_ctrl = h5path_ctrl.format(karabo_id_control)
 ```

 %% Cell type:code id: tags:

 ``` python
 def combine_stack(d, sdim):
    combined = np.zeros((sdim, 2048, 2048))
    combined[...] = np.nan

    dy = 0
    for i in range(16):

        try:
            if i < 8:
                dx = -512
                if i > 3:
                    dx -= 25
                mx = 1
                my = i % 8
                combined[:, my*128+dy:(my+1)*128+dy,
                         mx*512-dx:(mx+1)*512-dx] = np.rollaxis(d[i],2,1)[:,:,::-1]
                dy += 30
                if i == 3:
                    dy += 30

            elif i < 12:
                dx = 80 - 50
                if i == 8:
                    dy = 4*30 + 30 +50 -30

                mx = 1
                my = i % 8 +4
                combined[:, my*128+dy:(my+1)*128+dy,
                         mx*512-dx:(mx+1)*512-dx] = np.rollaxis(d[i],2,1)[:,::-1,:]
                dy += 30
            else:
                dx = 100 - 50
                if i == 11:
                    dy = 20

                mx = 1
                my = i - 14

                combined[:, my*128+dy:(my+1)*128+dy,
                         mx*512-dx:(mx+1)*512-dx] = np.rollaxis(d[i],2,1)[:,::-1,:]
                dy += 30
        except:
            continue

    return combined
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
-from queue import Queue
-from collections import OrderedDict
-
-def map_modules_from_files(filelist):
-    module_files = OrderedDict()
-    mod_ids = OrderedDict()
-    total_sequences = 0
-    sequences_qm = {}
-    one_module = None
-    for quadrant in range(0, QUADRANTS):
-        for module in range(0, MODULES_PER_QUAD):
-            name = "Q{}M{}".format(quadrant + 1, module + 1)
-            module_files[name] = Queue()
-            num = quadrant * 4 + module
-            mod_ids[name] = num
-            file_infix = "{}{:02d}".format(DET_FILE_INSET, num)
-            sequences_qm[name] = 0
-            for file in filelist:
-                if file_infix in file:
-                    if not one_module:
-                        one_module = file, num
-                    module_files[name].put(file)
-                    total_sequences += 1
-                    sequences_qm[name] += 1
-
-    return module_files, mod_ids, total_sequences, sequences_qm, one_module
-
-dirlist = sorted(os.listdir(in_folder))
-file_list = []
-
-
-for entry in dirlist:
-    #only h5 file
-    abs_entry = "{}/{}".format(in_folder, entry)
-    if os.path.isfile(abs_entry) and os.path.splitext(abs_entry)[1] == ".h5":
-
-        if sequences is None:
-            file_list.append(abs_entry)
-        else:
-            for seq in sequences:
-                if "{:05d}.h5".format(seq) in abs_entry:
-                    file_list.append(os.path.abspath(abs_entry))
-
-mapped_files, mod_ids, total_sequences, sequences_qm, one_module = map_modules_from_files(file_list)
+mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)
+mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf
 MAX_PAR = min(MAX_PAR, total_sequences)
 ```

 %% Cell type:markdown id: tags:

 ## Processed Files ##

 %% Cell type:code id: tags:

 ``` python
 import copy
 from IPython.display import HTML, display, Markdown, Latex
 import tabulate
-print("Processing a total of {} sequence files in chunks of {}".format(total_sequences, MAX_PAR))
+print(f"Processing a total of {total_sequences} sequence files in chunks of {MAX_PAR}")
 table = []
 mfc = copy.copy(mapped_files)
 ti = 0
 for k, files in mfc.items():
    i = 0
    while not files.empty():
        f = files.get()
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["#", "module", "# module", "file"])))
 # restore the queue
-mapped_files, mod_ids, total_sequences, sequences_qm, one_module = map_modules_from_files(file_list)
+mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)
+mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf
 ```

 %% Cell type:code id: tags:

 ``` python
 import copy
 from functools import partial
 def correct_module(max_cells, index_v, CHUNK_SIZE, total_sequences, sequences_qm,
                   bins_gain_vs_signal, bins_signal_low_range, bins_signal_high_range,
                   bins_dig_gain_vs_signal, max_pulses, dbparms, fileparms, nodb, chunk_size_idim,
                   special_opts, il_mode, loc, dinstance, force_hg_if_below, force_mg_if_below,
-                   mask_noisy_adc, acq_rate, gain_setting, corr_bools, inp):
+                   mask_noisy_adc, acq_rate, gain_setting, corr_bools, h5path, h5path_idx, inp):
    print("foo")
    import numpy as np
    import copy
    import h5py
    import socket
    from datetime import datetime
    import re
    import os
    from influxdb import InfluxDBClient
    import subprocess
    from cal_tools.enums import BadPixels
    from cal_tools.agipdlib import AgipdCorrections, SnowResolution
    from cal_tools.agipdlib import get_num_cells, get_acq_rate


    #client = InfluxDBClient('exflqr18318', 8086, 'root', 'root', 'calstats')

    def create_influx_entry(run, proposal, qm, sequence, filesize, chunksize,
                            total_sequences, success, runtime, reason=""):
        return {
            "measurement": "run_correction",
            "tags": {
                "host": socket.gethostname(),
                "run": run,
                "proposal": proposal,
                "mem_cells": max_cells,
                "sequence": sequence,
                "module": qm,
                "filesize": filesize,
                "chunksize": chunksize,
                "total_sequences": total_sequences,
                "sequences_module": sequences_qm[qm],
                "detector": "AGIPD",
                "instrument": "SPB",

            },
            "time": datetime.utcnow().isoformat(),
            "fields": {
                "success": success,
                "reason": reason,
                "runtime": runtime,
            }
        }

    hists_signal_low = None
    hists_signal_high = None
    hists_gain_vs_signal = None
    hists_dig_gain_vs_signal = None
    hist_pulses = None
    low_edges = None
    high_edges = None
    signal_edges = None
    dig_signal_edges = None
    gain_stats = 0
    when = None
    err = None


    try:
        start = datetime.now()
        success = True
        reason = ""
        filename, filename_out, channel, qm = inp
        print("Have input")

        if max_cells == 0:
            max_cells = get_num_cells(filename, loc, channel)
            if max_cells is None:
                raise ValueError("No raw images found for {}".format(qm))
            else:
                cells = np.arange(max_cells)

        if acq_rate == 0.:
            acq_rate = get_acq_rate(filename, loc, channel)
        else:
            acq_rate = None

        if dbparms[2] == 0:
            dbparms[2] = max_cells
        if dbparms[5] == 0:
            dbparms[5] = dbparms[2]

        print("Set memory cells to {}".format(max_cells))
        print("Set acquistion rate cells to {} MHz".format(acq_rate))

+        # AGIPD correction requires path without the leading "/"
+        if h5path[0] == '/':
+            h5path = h5path[1:]
+        if h5path_idx[0] == '/':
+            h5path_idx = h5path_idx[1:]
+

        infile = h5py.File(filename, "r", driver="core")
        outfile = h5py.File(filename_out, "w")
        try:
            agipd_corr = AgipdCorrections(infile, outfile, max_cells, channel, max_pulses,
                                          bins_gain_vs_signal, bins_signal_low_range,
                                          bins_signal_high_range, bins_dig_gain_vs_signal,
                                          chunk_size_idim=chunk_size_idim,
                                          il_mode=il_mode, raw_fmt_version=index_v,
-                                          h5_data_path="INSTRUMENT/{}/DET/{{}}CH0:xtdf/".format(loc),
-                                          h5_index_path="INDEX/{}/DET/{{}}CH0:xtdf/".format(loc),
+                                          h5_data_path=h5path,
+                                          h5_index_path=h5path_idx,
                                          cal_det_instance=dinstance, force_hg_if_below=force_hg_if_below,
                                          force_mg_if_below=force_mg_if_below, mask_noisy_adc=mask_noisy_adc,
                                          acquisition_rate=acq_rate, gain_setting=gain_setting,
                                          corr_bools=corr_bools)

            blc_noise_threshold, blc_hmatch, melt_snow = special_opts
            if not corr_bools["only_offset"]:
                blc_hmatch = False
                melt_snow = False
            agipd_corr.baseline_corr_noise_threshold = blc_noise_threshold
            agipd_corr.melt_snow = melt_snow
            try:
                agipd_corr.get_valid_image_idx()
            except IOError:
                return
            if not nodb:
                when = agipd_corr.initialize_from_db(dbparms, qm, only_dark=(fileparms != ""))
            if fileparms != "" and not corr_bools["only_offset"]:
                agipd_corr.initialize_from_file(fileparms, qm, with_dark=nodb)
            print("Initialized constants")

            for irange in agipd_corr.get_iteration_range():
                agipd_corr.correct_agipd(irange)
                print("Iterated")

            print("All iterations are finished")
            hists, edges = agipd_corr.get_histograms()
            hists_signal_low, hists_signal_high, hists_gain_vs_signal, hists_dig_gain_vs_signal, hist_pulses = hists
            low_edges, high_edges, signal_edges, dig_signal_edges = edges
            gain_stats = np.array(agipd_corr.gain_stats)

        finally:
            outfile.close()
            infile.close()
            print("Closed files")

    except Exception as e:
        print(e)
        err = e
        success = False
        reason = "Error"

    finally:
        run = re.findall(r'.*r([0-9]{4}).*', filename)[0]
        proposal = re.findall(r'.*p([0-9]{6}).*', filename)[0]
        sequence = re.findall(r'.*S([0-9]{5}).*', filename)[0]
        filesize = os.path.getsize(filename)
        duration = (datetime.now()-start).total_seconds()
        #influx = create_influx_entry(run, proposal, qm, sequence, filesize, CHUNK_SIZE, total_sequences, success, duration, reason)
        #client.write_points([influx])
    return (hists_signal_low, hists_signal_high, hists_gain_vs_signal, hists_dig_gain_vs_signal, hist_pulses,
            low_edges, high_edges, signal_edges, dig_signal_edges, gain_stats, max_cells, when, qm, err)

 done = False
 first_files = []
 inp = []
 left = total_sequences

 # Display Information about the selected pulses indices for correction.
 pulses_lst = list(range(*max_pulses)) if not (len(max_pulses)==1 and max_pulses[0]==0) else max_pulses

 try:
    if len(pulses_lst) > 1:
        print("A range of {} pulse indices is selected: from {} to {} with a step of {}"
               .format(len(pulses_lst), pulses_lst[0] , pulses_lst[-1] + (pulses_lst[1] - pulses_lst[0]),
                       pulses_lst[1] - pulses_lst[0]))
    else:
        print("one pulse is selected: a pulse of idx {}".format(pulses_lst[0]))
 except Exception as e:
    raise ValueError('max_pulses input Error: {}'.format(e))

 bins_gain_vs_signal = (100, 100)
 bins_signal_low_range = 100
 bins_signal_high_range = 100
 bins_dig_gain_vs_signal = (100, 4)

 hists_gain_vs_signal =  np.zeros((bins_gain_vs_signal), np.float64)
 hists_dig_gain_vs_signal =  np.zeros((bins_dig_gain_vs_signal), np.float64)
 gain_stats = 0

 low_edges, high_edges, signal_edges, dig_signal_edges = None, None, None, None
 dbparms = [cal_db_interface, creation_time, max_cells_db, bias_voltage,
           photon_energy, max_cells_db_dark, cal_db_timeout]

 fileparms = calfile

 all_cells = []
 whens = []
 errors = []
 while not done:

    dones = []
    first = True
    for i in range(16):
        qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
        if qm in mapped_files and not mapped_files[qm].empty():
            fname_in = str(mapped_files[qm].get())
            dones.append(mapped_files[qm].empty())
        else:
            print("Skipping {}".format(qm))
            first_files.append((None, None))
            continue
        fout = os.path.abspath("{}/{}".format(out_folder, (os.path.split(fname_in)[-1]).replace("RAW", "CORR")))
        if first:
            first_files.append((fname_in, fout))
        inp.append((fname_in, fout, i,  qm))
    first = False
    if len(inp) >= min(MAX_PAR, left):
        print("Running {} tasks parallel".format(len(inp)))
        p = partial(correct_module, max_cells, index_v, CHUNK_SIZE, total_sequences,
                    sequences_qm, bins_gain_vs_signal, bins_signal_low_range, bins_signal_high_range,
                    bins_dig_gain_vs_signal, max_pulses, dbparms, fileparms, nodb, chunk_size_idim,
-                    special_opts, il_mode, loc, dinstance, force_hg_if_below, force_mg_if_below,
-                    mask_noisy_adc, acq_rate, gain_setting, corr_bools)
+                    special_opts, il_mode, karabo_id, dinstance, force_hg_if_below, force_mg_if_below,
+                    mask_noisy_adc, acq_rate, gain_setting, corr_bools, h5path, h5path_idx)

        r = view.map_sync(p, inp)

        #r = list(map(p, inp))

        inp = []
        left -= MAX_PAR

        init_hist = False
        for rr in r:
            if rr is not None:
                hl, hh, hg, hdg, hp, low_edges, high_edges, signal_edges, dig_signal_edges, gs, cells, when, qm, err = rr
                all_cells.append(cells)
                whens.append((qm, when))
                errors.append(err)
                # Validate hp to be int not None.
                if not init_hist and hp is not None:
                    hists_signal_low =  np.zeros((bins_signal_low_range, hp), np.float64)
                    hists_signal_high =  np.zeros((bins_signal_low_range, hp), np.float64)
                    init_hist = True
                if hl is not None:  # any one being None will also make the others None
                    hists_signal_low += hl.astype(np.float64)
                    hists_signal_high += hh.astype(np.float64)
                    hists_gain_vs_signal += hg.astype(np.float64)
                    hists_dig_gain_vs_signal += hdg.astype(np.float64)
                    gain_stats += gs

    done = all(dones)
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Constants were injected on: ")
 to_store = []
 for i, (qm, when) in enumerate(whens):
    print(qm)
    line = [qm]
    # If correction is crashed
    if errors[i] is not None:
        print("Error: {}".format(errors[i]) )
    else:
        for key, item in when.items():
            if hasattr(item, 'strftime'):
                item = item.strftime('%y-%m-%d %H:%M')
            when[key] = item
            print('{:.<12s}'.format(key), item)

    # Store few time stamps if exists
    # Add NA to keep array structure
    for key in ['offset', 'slopesPC', 'slopesFF']:
        if when and key in when:
            line.append(when[key])
        else:
            if errors[i] is not None:
                line.append('Err')
            else:
                line.append('NA')
    to_store.append(line)

 np.savetxt("{}/tmp_const_beginat_S{:05d}.csv".format(out_folder, sequences[0]),
           np.array(to_store).astype(str), fmt='%s', delimiter = ',')
 ```

 %% Cell type:code id: tags:

 ``` python
 from mpl_toolkits.mplot3d import Axes3D
 import matplotlib.pyplot as plt
 from matplotlib import cm
 from matplotlib.ticker import LinearLocator, FormatStrFormatter
 import numpy as np
 %matplotlib inline
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10,10))
    ax = fig.gca(projection='3d')

    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)

    Z = data.T

    # Plot the surface.
    surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,
                           linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")
 ```

 %% Cell type:markdown id: tags:

 ## Signal vs. Analogue Gain ##

 The following plot shows plots signal vs. gain for the first 128 images.

 %% Cell type:code id: tags:

 ``` python
 if signal_edges is not None:
    do_3d_plot(hists_gain_vs_signal, signal_edges, "Signal (ADU)", "Analogue gain (ADU)")
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis):
    from matplotlib.colors import LogNorm
    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1,:], extent=extent, aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(data)))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")

 if signal_edges is not None:
    do_2d_plot(hists_gain_vs_signal, signal_edges, "Signal (ADU)", "Gain Value (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ## Signal vs. Digitized Gain ##

 The following plot shows plots signal vs. digitized gain for the first 128 images.

 %% Cell type:code id: tags:

 ``` python
 if dig_signal_edges is not None:
    do_2d_plot(hists_dig_gain_vs_signal, dig_signal_edges, "Signal (ADU)", "Gain Bit Value")
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots()
 if isinstance(gain_stats, np.ndarray):
    ax.pie(gain_stats, labels=["high", "medium", "low"], autopct='%1.2f%%',
            shadow=True, startangle=27)
    a = ax.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
 ```

 %% Cell type:markdown id: tags:

 ## Mean Intensity per Pulse ##

 The following plots show the mean signal for each pulse in a detailed and expanded intensity region.

 %% Cell type:code id: tags:

 ``` python
 if low_edges is not None:
    do_3d_plot(hists_signal_low, low_edges, "Signal (ADU)", "Pulse id")
    do_2d_plot(hists_signal_low, low_edges, "Signal (ADU)", "Pulse id")
 if high_edges is not None:
    do_3d_plot(hists_signal_high, high_edges, "Signal (ADU)", "Pulse id")
    do_2d_plot(hists_signal_high, high_edges, "Signal (ADU)", "Pulse id")
 ```

 %% Cell type:code id: tags:

 ``` python

 corrected = []
 raw = []
 gains = []
 mask = []
 cell_fac = 1
 first_idx = 0
 last_idx = cell_fac*176+first_idx
 pulse_ids = []
 train_ids = []
 for i, ff in enumerate(first_files[:16]):
    try:

        rf, cf = ff
-        #print(cf, i)
        if rf is None:

            raise Exception("File not present")
-        #print(rf)
        infile = h5py.File(rf, "r")
-        #print("/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(instrument, i))
-        raw.append(np.array(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(instrument, i)][first_idx:last_idx,0,...]))
+        datapath = h5path.format(i)
+        raw.append(np.array(infile[f"{datapath}/image/data"][first_idx:last_idx,0,...]))
        infile.close()

        infile = h5py.File(cf, "r")
-        #print("/INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(i))
-        corrected.append(np.array(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(instrument, i)][first_idx:last_idx,...]))
-        gains.append(np.array(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/gain".format(instrument, i)][first_idx:last_idx,...]))
-        mask.append(np.array(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/mask".format(instrument, i)][first_idx:last_idx,...]))
-        pulse_ids.append(np.squeeze(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/pulseId".format(instrument, i)][first_idx:last_idx,...]))
-        train_ids.append(np.squeeze(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/trainId".format(instrument, i)][first_idx:last_idx,...]))
+        corrected.append(np.array(infile[f"{datapath}/image/data"][first_idx:last_idx,...]))
+        gains.append(np.array(infile[f"{datapath}/image/gain"][first_idx:last_idx,...]))
+        mask.append(np.array(infile[f"{datapath}/image/mask"][first_idx:last_idx,...]))
+        pulse_ids.append(np.squeeze(infile[f"{datapath}/image/pulseId"][first_idx:last_idx,...]))
+        train_ids.append(np.squeeze(infile[f"{datapath}/image/trainId"][first_idx:last_idx,...]))
        infile.close()

    except Exception as e:
        print(e)
        corrected.append(np.zeros((last_idx-first_idx, 512, 128)))
        gains.append(np.zeros((last_idx-first_idx, 512, 128)))
        mask.append(np.zeros((last_idx-first_idx, 512, 128)))
        raw.append(np.zeros((last_idx-first_idx, 512, 128)))

 ```

 %% Cell type:code id: tags:

 ``` python
 domask = False
 if domask:
    for i, c in enumerate(corrected):
        c[mask[i] != 0] = 0
        #c[pats[i] < 200]  = 0
        #c[np.abs(pats[i]) == 1000] = np.abs(c[np.abs(pats[i]) == 1000])
 combined = combine_stack(corrected, last_idx-first_idx)
 combined_raw = combine_stack(raw, last_idx-first_idx)
 combined_g = combine_stack(gains, last_idx-first_idx)
 combined_mask = combine_stack(mask, last_idx-first_idx)
 ```

 %% Cell type:markdown id: tags:

 ### Mean RAW Preview ###

 The per pixel mean of the first 128 images of the RAW data

 %% Cell type:code id: tags:

 ``` python
 %matplotlib inline
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.mean(combined_raw[:,:1300,400:1600],axis=0),
               vmin=min(0.75*np.median(combined_raw[combined_raw > 0]), 4000),
               vmax=max(1.5*np.median(combined_raw[combined_raw > 0]), 7000), cmap="jet")
 cb = fig.colorbar(im, ax=ax)

 ```

 %% Cell type:markdown id: tags:

 ### Single Shot Preview ###

 A single shot image from cell 12 of the first train

 %% Cell type:code id: tags:

 ``` python

 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 dim = combined[1,:1300,400:1600]

 im = ax.imshow(dim, vmin=-0, vmax=max(20*np.median(dim[dim > 0]), 100), cmap="jet", interpolation="nearest")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 h = ax.hist(dim.flatten(), bins=1000, range=(0, 2000))
 ```

 %% Cell type:markdown id: tags:

 ### Mean CORRECTED Preview ###

 The per pixel mean of the first 128 images of the CORRECTED data

 %% Cell type:code id: tags:

 ``` python

 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.mean(combined[:,:1300,400:1600], axis=0), vmin=-50,
               vmax=max(100*np.median(combined[combined > 0]), 100), cmap="jet", interpolation="nearest")
 cb = fig.colorbar(im, ax=ax)

 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 combined[combined <= 0] = 0
 h = ax.hist(combined.flatten(), bins=1000, range=(-50, 1000), log=True)
 ```

-%% Cell type:code id: tags:
-
-``` python
-#np.save('/gpfs/exfel/data/scratch/haufs/agipd_hist/prop_off_pcor_splits.npy', h)
-```
-
 %% Cell type:markdown id: tags:

 ### Maximum GAIN Preview ###

 The per pixel maximum of the first 128 images of the digitized GAIN data

 %% Cell type:code id: tags:

 ``` python

 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.max(combined_g[1,:1300,400:1600][None,...], axis=0), vmin=0,
               vmax=3, cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 from cal_tools.enums import BadPixels
 from IPython.display import HTML, display, Markdown, Latex
 import tabulate
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:markdown id: tags:

 ### Single Shot Bad Pixels ###

 A single shot bad pixel map from cell 4 of the first train

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.log2(combined_mask[10,:1300,400:1600]), vmin=0,
               vmax=32, cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Full Train Bad Pixels ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.log2(np.max(combined_mask[:,:1300,400:1600], axis=0)), vmin=0,
               vmax=32, cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Full Train Bad Pixels - Only Dark Char. Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.max((combined_mask.astype(np.uint32)[:,:1300,400:1600] & BadPixels.NOISY_ADC.value) != 0, axis=0), vmin=0,
               vmax=1, cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:code id: tags:

 ``` python
 from cal_tools.enums import BadPixels
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 cm = combined_mask[:,:1300,400:1600]
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 im = ax.imshow(np.log2(np.max(cm, axis=0)), vmin=0,
               vmax=32, cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```
-
-%% Cell type:code id: tags:
-
-``` python
-```
-
-%% Cell type:code id: tags:
-
-``` python
-```

 %% Cell type:markdown id: tags:

 # AGIPD Offline Correction #

 Author: European XFEL Detector Group, Version: 1.0

 Offline Calibration for the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
+cluster_profile = "noDB"
 in_folder = "/gpfs/exfel/exp/MID/201931/p900107/raw" # the folder to read data from, required
+out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/AGIPD_Corr"  # the folder to output to, required
+sequences =  [-1] # sequences to correct, set to -1 for all, range allowed
+modules = [-1] # modules to correct, set to -1 for all, range allowed
 run = 11 # runs to process, required
-out_folder =  "/gpfs/exfel/data/scratch/karnem/test/AGIPD_Corr"  # the folder to output to, required
+
+karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
+karabo_da = [-1]  # data aggregators
+receiver_id = "{}CH0" # inset for receiver devices
+path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
+h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
+h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images
+h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information
+karabo_id_control = "SPB_IRU_AGIPD1M1" # karabo-id for control device
+karabo_da_control = 'DA02' # karabo DA for control infromation
+
+use_dir_creation_date = True # use the creation data of the input dir for database queries
+cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
+cal_db_timeout = 30000 # in milli seconds
+creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
+
 calfile =  "" # path to calibration file. Leave empty if all data should come from DB
-sequences =  [-1] # sequences to correct, set to -1 for all, range allowed
+nodb = False # if set only file-based constants will be used
 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
+bias_voltage = 300
+acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
+gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
+photon_energy = 9.2 # photon energy in keV
 interlaced = False # whether data is in interlaced layout
 overwrite = True # set to True if existing data should be overwritten
-cluster_profile = "noDB"
 max_pulses = [0, 500, 1] # range list [st, end, step] of maximum pulse indices. 3 allowed maximum list input elements.
 local_input = False
-bias_voltage = 300
-cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
-use_dir_creation_date = True # use the creation data of the input dir for database queries
 sequences_per_node = 2 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
-photon_energy = 9.2 # photon energy in keV
 index_v = 2 # version of RAW index type
-nodb = False # if set only file-based constants will be used
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
-corr_asic_diag = False # if set, diagonal drop offs on ASICs are correted
 melt_snow = "" # if set to "none" snowy pixels are identified and resolved to NaN, if set to "interpolate", the value is interpolated from neighbouring pixels
-cal_db_timeout = 30000 # in milli seconds
 max_cells_db_dark = 0  # set to a value different than 0 to use this value for dark data DB queries
 max_cells_db = 0 # set to a value different than 0 to use this value for DB queries
 chunk_size_idim = 1  # chunking size of imaging dimension, adjust if user software is sensitive to this.
-creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
-instrument = "MID"  # the instrument the detector is installed at, required
 force_hg_if_below = 1000 # set to a value other than 0 to force a pixel into high gain if it's high gain offset subtracted value is below this threshold
 force_mg_if_below = 1000 # set to a value other than 0 to force a pixel into medium gain if it's medium gain offset subtracted value is below this threshold
 mask_noisy_adc = 0.25 # set to a value other than 0 and below 1 to mask entire ADC if fraction of noisy pixels is above
-acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
-gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
-
-h5path_ctrl = '/CONTROL/SPB_IRU_AGIPD1M1/MDL/FPGA_COMP_TEST' # path to control information
-karabo_da_control = 'DA02' # karabo DA for control infromation

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 dont_zero_nans = False # do not zero NaN values in corrected data
 dont_zero_orange = False # do not zero very negative and very large values
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
+corr_asic_diag = False # if set, diagonal drop offs on ASICs are correted

-def balance_sequences(in_folder, run, sequences, sequences_per_node):
+def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
-    return bs(in_folder, run, sequences, sequences_per_node, "AGIPD*")
+    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the herarichy and dependability for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["dont_zero_nans"] = dont_zero_nans
    corr_bools["dont_zero_orange"] = dont_zero_orange

 # Here the herarichy and dependability for data analysis booleans and arguments are defined
 data_analysis_parms = {}
 ```

 %% Cell type:code id: tags:

 ``` python
 import sys
 from collections import OrderedDict

 # make sure a cluster is running with ipcluster start --n=32, give it a while to start
 import os
 import h5py
 import numpy as np
 import matplotlib
 matplotlib.use("agg")
 import matplotlib.pyplot as plt
 from ipyparallel import Client
-print("Connecting to profile {}".format(cluster_profile))
+print(f"Connecting to profile {cluster_profile}")
 view = Client(profile=cluster_profile)[:]
 view.use_dill()

 from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions
-from cal_tools.tools import (gain_map_files, parse_runs, run_prop_seq_from_path, get_notebook_name,
+from cal_tools.tools import (map_modules_from_folder, parse_runs, run_prop_seq_from_path, get_notebook_name,
                                 get_dir_creation_date, get_constant_from_db)
 from cal_tools.agipdlib import get_gain_setting
 from dateutil import parser
 from datetime import timedelta

 il_mode = interlaced
 max_cells = mem_cells
 gains = np.arange(3)
 cells = np.arange(max_cells)

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    offset = parser.parse(creation_date_offset)
    delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
    creation_time += delta
-    print("Using {} as creation time".format(creation_time))
-
-in_folder = "{}/r{:04d}".format(in_folder, run)
+    print(f"Using {creation_time} as creation time")

-print("Working in IL Mode: {}. Actual cells in use are: {}".format(il_mode, max_cells))
+print(f"Working in IL Mode: {il_mode}. Actual cells in use are: {max_cells}")

 if sequences[0] == -1:
    sequences = None

-QUADRANTS = 4
-MODULES_PER_QUAD = 4
-DET_FILE_INSET = "AGIPD"
 CHUNK_SIZE = 250
 MAX_PAR = 32

 if in_folder[-1] == "/":
    in_folder = in_folder[:-1]
 print("Outputting to {}".format(out_folder))

 if not os.path.exists(out_folder):
    os.makedirs(out_folder)
 elif not overwrite:
    raise AttributeError("Output path exists! Exiting")

 import warnings
 warnings.filterwarnings('ignore')

 from cal_tools.agipdlib import SnowResolution
 melt_snow = False if melt_snow == "" else SnowResolution(melt_snow)

 special_opts = blc_noise_threshold, blc_hmatch, melt_snow

-loc = None
+instrument = karabo_id.split("_")[0]
 if instrument == "SPB":
-    loc = "SPB_DET_AGIPD1M-1"
    dinstance = "AGIPD1M1"
 else:
-    loc = "MID_DET_AGIPD1M-1"
    dinstance = "AGIPD1M2"
-print("Detector in use is {}".format(loc))
+```
+
+%% Cell type:code id: tags:
+
+``` python
 ```

 %% Cell type:code id: tags:

 ``` python
 control_fname = '{}/RAW-R{:04d}-{}-S00000.h5'.format(in_folder, run, karabo_da_control)

 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
        except Exception as e:
            print(f'Error while reading gain setting from: \n{control_fname}')
            print(e)
            print("Set gain settion to 0")
            gain_setting = 0

-print("Gain setting: {}".format(gain_setting))
+print(f"Gain setting: {gain_setting}")
+print(f"Detector in use is {karabo_id}")
+print(f"Instrument {instrument}")
+print(f"Detector instance {dinstance}")
+
+if karabo_da[0] == -1:
+    if modules[0] == -1:
+        modules = list(range(16))
+    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
+else:
+    modules = [int(x[-2:]) for x in karabo_da]
+print("Process modules: ",
+      ', '.join([f"Q{x // 4 + 1}M{x % 4 + 1}" for x in modules]))
+
+h5path = h5path.format(karabo_id, receiver_id)
+h5path_idx = h5path_idx.format(karabo_id, receiver_id)
+h5path_ctrl = h5path_ctrl.format(karabo_id_control)
 ```

 %% Cell type:code id: tags:

 ``` python
 def combine_stack(d, sdim):
    combined = np.zeros((sdim, 2048, 2048))
    combined[...] = np.nan

    dy = 0
    for i in range(16):

        try:
            if i < 8:
                dx = -512
                if i > 3:
                    dx -= 25
                mx = 1
                my = i % 8
                combined[:, my*128+dy:(my+1)*128+dy,
                         mx*512-dx:(mx+1)*512-dx] = np.rollaxis(d[i],2,1)[:,:,::-1]
                dy += 30
                if i == 3:
                    dy += 30

            elif i < 12:
                dx = 80 - 50
                if i == 8:
                    dy = 4*30 + 30 +50 -30

                mx = 1
                my = i % 8 +4
                combined[:, my*128+dy:(my+1)*128+dy,
                         mx*512-dx:(mx+1)*512-dx] = np.rollaxis(d[i],2,1)[:,::-1,:]
                dy += 30
            else:
                dx = 100 - 50
                if i == 11:
                    dy = 20

                mx = 1
                my = i - 14

                combined[:, my*128+dy:(my+1)*128+dy,
                         mx*512-dx:(mx+1)*512-dx] = np.rollaxis(d[i],2,1)[:,::-1,:]
                dy += 30
        except:
            continue

    return combined
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
-from queue import Queue
-from collections import OrderedDict
-
-def map_modules_from_files(filelist):
-    module_files = OrderedDict()
-    mod_ids = OrderedDict()
-    total_sequences = 0
-    sequences_qm = {}
-    one_module = None
-    for quadrant in range(0, QUADRANTS):
-        for module in range(0, MODULES_PER_QUAD):
-            name = "Q{}M{}".format(quadrant + 1, module + 1)
-            module_files[name] = Queue()
-            num = quadrant * 4 + module
-            mod_ids[name] = num
-            file_infix = "{}{:02d}".format(DET_FILE_INSET, num)
-            sequences_qm[name] = 0
-            for file in filelist:
-                if file_infix in file:
-                    if not one_module:
-                        one_module = file, num
-                    module_files[name].put(file)
-                    total_sequences += 1
-                    sequences_qm[name] += 1
-
-    return module_files, mod_ids, total_sequences, sequences_qm, one_module
-
-dirlist = sorted(os.listdir(in_folder))
-file_list = []
-
-
-for entry in dirlist:
-    #only h5 file
-    abs_entry = "{}/{}".format(in_folder, entry)
-    if os.path.isfile(abs_entry) and os.path.splitext(abs_entry)[1] == ".h5":
-
-        if sequences is None:
-            file_list.append(abs_entry)
-        else:
-            for seq in sequences:
-                if "{:05d}.h5".format(seq) in abs_entry:
-                    file_list.append(os.path.abspath(abs_entry))
-
-mapped_files, mod_ids, total_sequences, sequences_qm, one_module = map_modules_from_files(file_list)
+mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)
+mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf
 MAX_PAR = min(MAX_PAR, total_sequences)
 ```

 %% Cell type:markdown id: tags:

 ## Processed Files ##

 %% Cell type:code id: tags:

 ``` python
 import copy
 from IPython.display import HTML, display, Markdown, Latex
 import tabulate
-print("Processing a total of {} sequence files in chunks of {}".format(total_sequences, MAX_PAR))
+print(f"Processing a total of {total_sequences} sequence files in chunks of {MAX_PAR}")
 table = []
 mfc = copy.copy(mapped_files)
 ti = 0
 for k, files in mfc.items():
    i = 0
    while not files.empty():
        f = files.get()
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["#", "module", "# module", "file"])))
 # restore the queue
-mapped_files, mod_ids, total_sequences, sequences_qm, one_module = map_modules_from_files(file_list)
+mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)
+mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf
 ```

 %% Cell type:code id: tags:

 ``` python
 import copy
 from functools import partial
 def correct_module(max_cells, index_v, CHUNK_SIZE, total_sequences, sequences_qm,
                   bins_gain_vs_signal, bins_signal_low_range, bins_signal_high_range,
                   bins_dig_gain_vs_signal, max_pulses, dbparms, fileparms, nodb, chunk_size_idim,
                   special_opts, il_mode, loc, dinstance, force_hg_if_below, force_mg_if_below,
-                   mask_noisy_adc, acq_rate, gain_setting, corr_bools, inp):
+                   mask_noisy_adc, acq_rate, gain_setting, corr_bools, h5path, h5path_idx, inp):
    print("foo")
    import numpy as np
    import copy
    import h5py
    import socket
    from datetime import datetime
    import re
    import os
    from influxdb import InfluxDBClient
    import subprocess
    from cal_tools.enums import BadPixels
    from cal_tools.agipdlib import AgipdCorrections, SnowResolution
    from cal_tools.agipdlib import get_num_cells, get_acq_rate


    #client = InfluxDBClient('exflqr18318', 8086, 'root', 'root', 'calstats')

    def create_influx_entry(run, proposal, qm, sequence, filesize, chunksize,
                            total_sequences, success, runtime, reason=""):
        return {
            "measurement": "run_correction",
            "tags": {
                "host": socket.gethostname(),
                "run": run,
                "proposal": proposal,
                "mem_cells": max_cells,
                "sequence": sequence,
                "module": qm,
                "filesize": filesize,
                "chunksize": chunksize,
                "total_sequences": total_sequences,
                "sequences_module": sequences_qm[qm],
                "detector": "AGIPD",
                "instrument": "SPB",

            },
            "time": datetime.utcnow().isoformat(),
            "fields": {
                "success": success,
                "reason": reason,
                "runtime": runtime,
            }
        }

    hists_signal_low = None
    hists_signal_high = None
    hists_gain_vs_signal = None
    hists_dig_gain_vs_signal = None
    hist_pulses = None
    low_edges = None
    high_edges = None
    signal_edges = None
    dig_signal_edges = None
    gain_stats = 0
    when = None
    err = None


    try:
        start = datetime.now()
        success = True
        reason = ""
        filename, filename_out, channel, qm = inp
        print("Have input")

        if max_cells == 0:
            max_cells = get_num_cells(filename, loc, channel)
            if max_cells is None:
                raise ValueError("No raw images found for {}".format(qm))
            else:
                cells = np.arange(max_cells)

        if acq_rate == 0.:
            acq_rate = get_acq_rate(filename, loc, channel)
        else:
            acq_rate = None

        if dbparms[2] == 0:
            dbparms[2] = max_cells
        if dbparms[5] == 0:
            dbparms[5] = dbparms[2]

        print("Set memory cells to {}".format(max_cells))
        print("Set acquistion rate cells to {} MHz".format(acq_rate))

+        # AGIPD correction requires path without the leading "/"
+        if h5path[0] == '/':
+            h5path = h5path[1:]
+        if h5path_idx[0] == '/':
+            h5path_idx = h5path_idx[1:]
+

        infile = h5py.File(filename, "r", driver="core")
        outfile = h5py.File(filename_out, "w")
        try:
            agipd_corr = AgipdCorrections(infile, outfile, max_cells, channel, max_pulses,
                                          bins_gain_vs_signal, bins_signal_low_range,
                                          bins_signal_high_range, bins_dig_gain_vs_signal,
                                          chunk_size_idim=chunk_size_idim,
                                          il_mode=il_mode, raw_fmt_version=index_v,
-                                          h5_data_path="INSTRUMENT/{}/DET/{{}}CH0:xtdf/".format(loc),
-                                          h5_index_path="INDEX/{}/DET/{{}}CH0:xtdf/".format(loc),
+                                          h5_data_path=h5path,
+                                          h5_index_path=h5path_idx,
                                          cal_det_instance=dinstance, force_hg_if_below=force_hg_if_below,
                                          force_mg_if_below=force_mg_if_below, mask_noisy_adc=mask_noisy_adc,
                                          acquisition_rate=acq_rate, gain_setting=gain_setting,
                                          corr_bools=corr_bools)

            blc_noise_threshold, blc_hmatch, melt_snow = special_opts
            if not corr_bools["only_offset"]:
                blc_hmatch = False
                melt_snow = False
            agipd_corr.baseline_corr_noise_threshold = blc_noise_threshold
            agipd_corr.melt_snow = melt_snow
            try:
                agipd_corr.get_valid_image_idx()
            except IOError:
                return
            if not nodb:
                when = agipd_corr.initialize_from_db(dbparms, qm, only_dark=(fileparms != ""))
            if fileparms != "" and not corr_bools["only_offset"]:
                agipd_corr.initialize_from_file(fileparms, qm, with_dark=nodb)
            print("Initialized constants")

            for irange in agipd_corr.get_iteration_range():
                agipd_corr.correct_agipd(irange)
                print("Iterated")

            print("All iterations are finished")
            hists, edges = agipd_corr.get_histograms()
            hists_signal_low, hists_signal_high, hists_gain_vs_signal, hists_dig_gain_vs_signal, hist_pulses = hists
            low_edges, high_edges, signal_edges, dig_signal_edges = edges
            gain_stats = np.array(agipd_corr.gain_stats)

        finally:
            outfile.close()
            infile.close()
            print("Closed files")

    except Exception as e:
        print(e)
        err = e
        success = False
        reason = "Error"

    finally:
        run = re.findall(r'.*r([0-9]{4}).*', filename)[0]
        proposal = re.findall(r'.*p([0-9]{6}).*', filename)[0]
        sequence = re.findall(r'.*S([0-9]{5}).*', filename)[0]
        filesize = os.path.getsize(filename)
        duration = (datetime.now()-start).total_seconds()
        #influx = create_influx_entry(run, proposal, qm, sequence, filesize, CHUNK_SIZE, total_sequences, success, duration, reason)
        #client.write_points([influx])
    return (hists_signal_low, hists_signal_high, hists_gain_vs_signal, hists_dig_gain_vs_signal, hist_pulses,
            low_edges, high_edges, signal_edges, dig_signal_edges, gain_stats, max_cells, when, qm, err)

 done = False
 first_files = []
 inp = []
 left = total_sequences

 # Display Information about the selected pulses indices for correction.
 pulses_lst = list(range(*max_pulses)) if not (len(max_pulses)==1 and max_pulses[0]==0) else max_pulses

 try:
    if len(pulses_lst) > 1:
        print("A range of {} pulse indices is selected: from {} to {} with a step of {}"
               .format(len(pulses_lst), pulses_lst[0] , pulses_lst[-1] + (pulses_lst[1] - pulses_lst[0]),
                       pulses_lst[1] - pulses_lst[0]))
    else:
        print("one pulse is selected: a pulse of idx {}".format(pulses_lst[0]))
 except Exception as e:
    raise ValueError('max_pulses input Error: {}'.format(e))

 bins_gain_vs_signal = (100, 100)
 bins_signal_low_range = 100
 bins_signal_high_range = 100
 bins_dig_gain_vs_signal = (100, 4)

 hists_gain_vs_signal =  np.zeros((bins_gain_vs_signal), np.float64)
 hists_dig_gain_vs_signal =  np.zeros((bins_dig_gain_vs_signal), np.float64)
 gain_stats = 0

 low_edges, high_edges, signal_edges, dig_signal_edges = None, None, None, None
 dbparms = [cal_db_interface, creation_time, max_cells_db, bias_voltage,
           photon_energy, max_cells_db_dark, cal_db_timeout]

 fileparms = calfile

 all_cells = []
 whens = []
 errors = []
 while not done:

    dones = []
    first = True
    for i in range(16):
        qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
        if qm in mapped_files and not mapped_files[qm].empty():
            fname_in = str(mapped_files[qm].get())
            dones.append(mapped_files[qm].empty())
        else:
            print("Skipping {}".format(qm))
            first_files.append((None, None))
            continue
        fout = os.path.abspath("{}/{}".format(out_folder, (os.path.split(fname_in)[-1]).replace("RAW", "CORR")))
        if first:
            first_files.append((fname_in, fout))
        inp.append((fname_in, fout, i,  qm))
    first = False
    if len(inp) >= min(MAX_PAR, left):
        print("Running {} tasks parallel".format(len(inp)))
        p = partial(correct_module, max_cells, index_v, CHUNK_SIZE, total_sequences,
                    sequences_qm, bins_gain_vs_signal, bins_signal_low_range, bins_signal_high_range,
                    bins_dig_gain_vs_signal, max_pulses, dbparms, fileparms, nodb, chunk_size_idim,
-                    special_opts, il_mode, loc, dinstance, force_hg_if_below, force_mg_if_below,
-                    mask_noisy_adc, acq_rate, gain_setting, corr_bools)
+                    special_opts, il_mode, karabo_id, dinstance, force_hg_if_below, force_mg_if_below,
+                    mask_noisy_adc, acq_rate, gain_setting, corr_bools, h5path, h5path_idx)

        r = view.map_sync(p, inp)

        #r = list(map(p, inp))

        inp = []
        left -= MAX_PAR

        init_hist = False
        for rr in r:
            if rr is not None:
                hl, hh, hg, hdg, hp, low_edges, high_edges, signal_edges, dig_signal_edges, gs, cells, when, qm, err = rr
                all_cells.append(cells)
                whens.append((qm, when))
                errors.append(err)
                # Validate hp to be int not None.
                if not init_hist and hp is not None:
                    hists_signal_low =  np.zeros((bins_signal_low_range, hp), np.float64)
                    hists_signal_high =  np.zeros((bins_signal_low_range, hp), np.float64)
                    init_hist = True
                if hl is not None:  # any one being None will also make the others None
                    hists_signal_low += hl.astype(np.float64)
                    hists_signal_high += hh.astype(np.float64)
                    hists_gain_vs_signal += hg.astype(np.float64)
                    hists_dig_gain_vs_signal += hdg.astype(np.float64)
                    gain_stats += gs

    done = all(dones)
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Constants were injected on: ")
 to_store = []
 for i, (qm, when) in enumerate(whens):
    print(qm)
    line = [qm]
    # If correction is crashed
    if errors[i] is not None:
        print("Error: {}".format(errors[i]) )
    else:
        for key, item in when.items():
            if hasattr(item, 'strftime'):
                item = item.strftime('%y-%m-%d %H:%M')
            when[key] = item
            print('{:.<12s}'.format(key), item)

    # Store few time stamps if exists
    # Add NA to keep array structure
    for key in ['offset', 'slopesPC', 'slopesFF']:
        if when and key in when:
            line.append(when[key])
        else:
            if errors[i] is not None:
                line.append('Err')
            else:
                line.append('NA')
    to_store.append(line)

 np.savetxt("{}/tmp_const_beginat_S{:05d}.csv".format(out_folder, sequences[0]),
           np.array(to_store).astype(str), fmt='%s', delimiter = ',')
 ```

 %% Cell type:code id: tags:

 ``` python
 from mpl_toolkits.mplot3d import Axes3D
 import matplotlib.pyplot as plt
 from matplotlib import cm
 from matplotlib.ticker import LinearLocator, FormatStrFormatter
 import numpy as np
 %matplotlib inline
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10,10))
    ax = fig.gca(projection='3d')

    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)

    Z = data.T

    # Plot the surface.
    surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,
                           linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")
 ```

 %% Cell type:markdown id: tags:

 ## Signal vs. Analogue Gain ##

 The following plot shows plots signal vs. gain for the first 128 images.

 %% Cell type:code id: tags:

 ``` python
 if signal_edges is not None:
    do_3d_plot(hists_gain_vs_signal, signal_edges, "Signal (ADU)", "Analogue gain (ADU)")
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis):
    from matplotlib.colors import LogNorm
    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1,:], extent=extent, aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(data)))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")

 if signal_edges is not None:
    do_2d_plot(hists_gain_vs_signal, signal_edges, "Signal (ADU)", "Gain Value (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ## Signal vs. Digitized Gain ##

 The following plot shows plots signal vs. digitized gain for the first 128 images.

 %% Cell type:code id: tags:

 ``` python
 if dig_signal_edges is not None:
    do_2d_plot(hists_dig_gain_vs_signal, dig_signal_edges, "Signal (ADU)", "Gain Bit Value")
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots()
 if isinstance(gain_stats, np.ndarray):
    ax.pie(gain_stats, labels=["high", "medium", "low"], autopct='%1.2f%%',
            shadow=True, startangle=27)
    a = ax.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
 ```

 %% Cell type:markdown id: tags:

 ## Mean Intensity per Pulse ##

 The following plots show the mean signal for each pulse in a detailed and expanded intensity region.

 %% Cell type:code id: tags:

 ``` python
 if low_edges is not None:
    do_3d_plot(hists_signal_low, low_edges, "Signal (ADU)", "Pulse id")
    do_2d_plot(hists_signal_low, low_edges, "Signal (ADU)", "Pulse id")
 if high_edges is not None:
    do_3d_plot(hists_signal_high, high_edges, "Signal (ADU)", "Pulse id")
    do_2d_plot(hists_signal_high, high_edges, "Signal (ADU)", "Pulse id")
 ```

 %% Cell type:code id: tags:

 ``` python

 corrected = []
 raw = []
 gains = []
 mask = []
 cell_fac = 1
 first_idx = 0
 last_idx = cell_fac*176+first_idx
 pulse_ids = []
 train_ids = []
 for i, ff in enumerate(first_files[:16]):
    try:

        rf, cf = ff
-        #print(cf, i)
        if rf is None:

            raise Exception("File not present")
-        #print(rf)
        infile = h5py.File(rf, "r")
-        #print("/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(instrument, i))
-        raw.append(np.array(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(instrument, i)][first_idx:last_idx,0,...]))
+        datapath = h5path.format(i)
+        raw.append(np.array(infile[f"{datapath}/image/data"][first_idx:last_idx,0,...]))
        infile.close()

        infile = h5py.File(cf, "r")
-        #print("/INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(i))
-        corrected.append(np.array(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(instrument, i)][first_idx:last_idx,...]))
-        gains.append(np.array(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/gain".format(instrument, i)][first_idx:last_idx,...]))
-        mask.append(np.array(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/mask".format(instrument, i)][first_idx:last_idx,...]))
-        pulse_ids.append(np.squeeze(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/pulseId".format(instrument, i)][first_idx:last_idx,...]))
-        train_ids.append(np.squeeze(infile["/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/trainId".format(instrument, i)][first_idx:last_idx,...]))
+        corrected.append(np.array(infile[f"{datapath}/image/data"][first_idx:last_idx,...]))
+        gains.append(np.array(infile[f"{datapath}/image/gain"][first_idx:last_idx,...]))
+        mask.append(np.array(infile[f"{datapath}/image/mask"][first_idx:last_idx,...]))
+        pulse_ids.append(np.squeeze(infile[f"{datapath}/image/pulseId"][first_idx:last_idx,...]))
+        train_ids.append(np.squeeze(infile[f"{datapath}/image/trainId"][first_idx:last_idx,...]))
        infile.close()

    except Exception as e:
        print(e)
        corrected.append(np.zeros((last_idx-first_idx, 512, 128)))
        gains.append(np.zeros((last_idx-first_idx, 512, 128)))
        mask.append(np.zeros((last_idx-first_idx, 512, 128)))
        raw.append(np.zeros((last_idx-first_idx, 512, 128)))

 ```

 %% Cell type:code id: tags:

 ``` python
 domask = False
 if domask:
    for i, c in enumerate(corrected):
        c[mask[i] != 0] = 0
        #c[pats[i] < 200]  = 0
        #c[np.abs(pats[i]) == 1000] = np.abs(c[np.abs(pats[i]) == 1000])
 combined = combine_stack(corrected, last_idx-first_idx)
 combined_raw = combine_stack(raw, last_idx-first_idx)
 combined_g = combine_stack(gains, last_idx-first_idx)
 combined_mask = combine_stack(mask, last_idx-first_idx)
 ```

 %% Cell type:markdown id: tags:

 ### Mean RAW Preview ###

 The per pixel mean of the first 128 images of the RAW data

 %% Cell type:code id: tags:

 ``` python
 %matplotlib inline
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.mean(combined_raw[:,:1300,400:1600],axis=0),
               vmin=min(0.75*np.median(combined_raw[combined_raw > 0]), 4000),
               vmax=max(1.5*np.median(combined_raw[combined_raw > 0]), 7000), cmap="jet")
 cb = fig.colorbar(im, ax=ax)

 ```

 %% Cell type:markdown id: tags:

 ### Single Shot Preview ###

 A single shot image from cell 12 of the first train

 %% Cell type:code id: tags:

 ``` python

 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 dim = combined[1,:1300,400:1600]

 im = ax.imshow(dim, vmin=-0, vmax=max(20*np.median(dim[dim > 0]), 100), cmap="jet", interpolation="nearest")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 h = ax.hist(dim.flatten(), bins=1000, range=(0, 2000))
 ```

 %% Cell type:markdown id: tags:

 ### Mean CORRECTED Preview ###

 The per pixel mean of the first 128 images of the CORRECTED data

 %% Cell type:code id: tags:

 ``` python

 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.mean(combined[:,:1300,400:1600], axis=0), vmin=-50,
               vmax=max(100*np.median(combined[combined > 0]), 100), cmap="jet", interpolation="nearest")
 cb = fig.colorbar(im, ax=ax)

 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 combined[combined <= 0] = 0
 h = ax.hist(combined.flatten(), bins=1000, range=(-50, 1000), log=True)
 ```

-%% Cell type:code id: tags:
-
-``` python
-#np.save('/gpfs/exfel/data/scratch/haufs/agipd_hist/prop_off_pcor_splits.npy', h)
-```
-
 %% Cell type:markdown id: tags:

 ### Maximum GAIN Preview ###

 The per pixel maximum of the first 128 images of the digitized GAIN data

 %% Cell type:code id: tags:

 ``` python

 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.max(combined_g[1,:1300,400:1600][None,...], axis=0), vmin=0,
               vmax=3, cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 from cal_tools.enums import BadPixels
 from IPython.display import HTML, display, Markdown, Latex
 import tabulate
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:markdown id: tags:

 ### Single Shot Bad Pixels ###

 A single shot bad pixel map from cell 4 of the first train

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.log2(combined_mask[10,:1300,400:1600]), vmin=0,
               vmax=32, cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Full Train Bad Pixels ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.log2(np.max(combined_mask[:,:1300,400:1600], axis=0)), vmin=0,
               vmax=32, cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Full Train Bad Pixels - Only Dark Char. Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.max((combined_mask.astype(np.uint32)[:,:1300,400:1600] & BadPixels.NOISY_ADC.value) != 0, axis=0), vmin=0,
               vmax=1, cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:code id: tags:

 ``` python
 from cal_tools.enums import BadPixels
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 cm = combined_mask[:,:1300,400:1600]
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 im = ax.imshow(np.log2(np.max(cm, axis=0)), vmin=0,
               vmax=32, cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```
-
-%% Cell type:code id: tags:
-
-``` python
-```
-
-%% Cell type:code id: tags:
-
-``` python
-```

--- a/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
+++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Characterize Dark Images #

 Author: S. Hauf, Version: 0.1

 The following code analyzes a set of dark images taken with the AGIPD detector to deduce detector offsets , noise, bad-pixel maps and thresholding. All four types of constants are evaluated per-pixel and per-memory cell. Data for the detector's three gain stages needs to be present, separated into separate runs.

 The evaluated calibration constants are stored locally and injected in the calibration data base.

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB" # The ipcluster profile to use
 in_folder = "/gpfs/exfel/d/raw/SPB/202030/p900138/" # path to input data, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/AGIPD3" # path to output to, required
 sequences = [0] # sequence files to evaluate.
+modules = [-1]  # list of modules to evaluate, RANGE ALLOWED
+run_high = 264 # run number in which high gain data was recorded, required
+run_med = 265 # run number in which medium gain data was recorded, required
+run_low = 266 # run number in which low gain data was recorded, required
+
+karabo_id = "SPB_DET_AGIPD1M-1" # karabo karabo_id
+karabo_da = [-1]  # data aggregators
+receiver_id = "{}CH0" # inset for receiver devices
+path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
+h5path = '/INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
+h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
+h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information
+karabo_id_control = "SPB_IRU_AGIPD1M1" # karabo-id for control device
+karabo_da_control = 'DA02' # karabo DA for control infromation

-run_high = 199 # run number in which high gain data was recorded, required
-run_med = 200 # run number in which medium gain data was recorded, required
-run_low = 201 # run number in which low gain data was recorded, required
-
-mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
+use_dir_creation_date = True  # use dir creation date as data production reference date
+cal_db_interface = "tcp://max-exfl016:8020" # the database interface to use
+cal_db_timeout = 3000000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database
+
+mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300 # detector bias voltage
-cal_db_interface = "tcp://max-exfl016:8020" # the database interface to use
-cal_db_timeout = 3000000 # timeout on caldb requests"
+gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
+acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 interlaced = False # assume interlaced data format, for data prior to Dec. 2017
 rawversion = 2 # RAW file format version
-dont_use_dir_date = False # don't use the dir creation date for determining the creation time

 thresholds_offset_sigma = 3. # thresholds in terms of n sigma noise for offset deduced bad pixels
 thresholds_offset_hard = [4000, 8500] # thresholds in absolute ADU terms for offset deduced bad pixels

 thresholds_noise_sigma = 5. # thresholds in terms of n sigma noise for offset deduced bad pixels
 thresholds_noise_hard = [4, 20] # thresholds in absolute ADU terms for offset deduced bad pixels

-instrument = "SPB"
 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h
-modules = [0]#,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]  # module to run for
-acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
-gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
-
-h5path_ctrl = '/CONTROL/SPB_IRU_AGIPD1M1/MDL/FPGA_COMP_TEST' # path to control information
-karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation
 ```

 %% Cell type:code id: tags:

 ``` python
 # imports and things that do not usually need to be changed
 from datetime import datetime
 import dateutil.parser
 import warnings
 warnings.filterwarnings('ignore')
 from collections import OrderedDict
 import os
 import h5py
 import numpy as np
 import matplotlib
 import tabulate

 matplotlib.use('agg')
 import matplotlib.pyplot as plt
 from IPython.display import display, Markdown, Latex
 %matplotlib inline

-from cal_tools.tools import (gain_map_files, parse_runs,
+from cal_tools.tools import (map_gain_stages, parse_runs,
                             run_prop_seq_from_path, get_notebook_name,
                             get_dir_creation_date, save_const_to_h5,
                             get_random_db_interface, get_from_db)
 from cal_tools.influx import InfluxLogger
 from cal_tools.enums import BadPixels
 from cal_tools.plotting import show_overview, plot_badpix_3d, create_constant_overview
 from cal_tools.agipdlib import get_gain_setting

 # make sure a cluster is running with ipcluster start --n=32, give it a while to start
 from ipyparallel import Client

 view = Client(profile=cluster_profile)[:]
 view.use_dill()

 from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions

 gains = np.arange(3)

-# no need to change this
-
-QUADRANTS = 4
-MODULES_PER_QUAD = 4
-DET_FILE_INSET = "AGIPD"
-
 IL_MODE = interlaced
 max_cells = mem_cells

 offset_runs = OrderedDict()
-offset_runs["high"] = parse_runs(run_high)[0]
-offset_runs["med"] = parse_runs(run_med)[0]
-offset_runs["low"] = parse_runs(run_low)[0]
+offset_runs["high"] = run_high
+offset_runs["med"] = run_med
+offset_runs["low"] = run_low

 creation_time=None
-if not dont_use_dir_date:
+if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)
-
+print(f"Using {creation_time} as creation time of constant.")

 run, prop, seq = run_prop_seq_from_path(in_folder)
-logger = InfluxLogger(detector="AGIPD", instrument=instrument, mem_cells=mem_cells,
-                      notebook=get_notebook_name(), proposal=prop)
-
-print("Using {} as creation time of constant.".format(creation_time))

 cal_db_interface = get_random_db_interface(cal_db_interface)
-print('Calibration database interface: {}'.format(cal_db_interface))
+print(f'Calibration database interface: {cal_db_interface}')

 loc = None
+instrument = karabo_id.split("_")[0]
 if instrument == "SPB":
-    loc = "SPB_DET_AGIPD1M-1"
    dinstance = "AGIPD1M1"
 else:
-    loc = "MID_DET_AGIPD1M-1"
    dinstance = "AGIPD1M2"
-print("Detector in use is {}".format(loc))
+print(f"Detector in use is {karabo_id}")
+print(f"Instrument {instrument}")
+print(f"Detector instance {dinstance}")
+
+logger = InfluxLogger(detector="AGIPD", instrument=instrument, mem_cells=mem_cells,
+                      notebook=get_notebook_name(), proposal=prop)
 ```

 %% Cell type:code id: tags:

 ``` python
 control_fname = '{}/r{:04d}/RAW-R{:04d}-{}-S00000.h5'.format(in_folder, run_high,
                                                             run_high, karabo_da_control)

 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < dateutil.parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
        except Exception as e:
            print(f'Error while reading gain setting from: \n{control_fname}')
            print(e)
            print("Gain setting is not found in the control information")
            print("Data will not be processed")
            sequences = []
 ```

 %% Cell type:code id: tags:

 ``` python
+if karabo_da[0] == -1:
+    if modules[0] == -1:
+        modules = list(range(16))
+    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
+else:
+    modules = [int(x[-2:]) for x in karabo_da]
+
+h5path = h5path.format(karabo_id, receiver_id)
+h5path_idx = h5path_idx.format(karabo_id, receiver_id)
+h5path_ctrl = h5path_ctrl.format(karabo_id_control)
+
 print("Parameters are:")
-print("Proposal: {}".format(prop))
-print("Memory cells: {}/{}".format(mem_cells, max_cells))
+print(f"Proposal: {prop}")
+print(f"Memory cells: {mem_cells}/{max_cells}")
 print("Runs: {}".format([ v for v in offset_runs.values()]))
-print("Sequences: {}".format(sequences))
-print("Interlaced mode: {}".format(IL_MODE))
-print("Using DB: {}".format(db_output))
-print("Input: {}".format(in_folder))
-print("Output: {}".format(out_folder))
-print("Bias voltage: {}V".format(bias_voltage))
-print("Gain setting: {}".format(gain_setting))
+print(f"Sequences: {sequences}")
+print(f"Interlaced mode: {IL_MODE}")
+print(f"Using DB: {db_output}")
+print(f"Input: {in_folder}")
+print(f"Output: {out_folder}")
+print(f"Bias voltage: {bias_voltage}V")
+print(f"Gain setting: {gain_setting}")
 ```

 %% Cell type:markdown id: tags:

 The following lines will create a queue of files which will the be executed module-parallel. Distiguishing between different gains.

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
-if not os.path.exists(out_folder):
-    os.makedirs(out_folder)
-
-gmf = gain_map_files(in_folder, offset_runs, sequences, DET_FILE_INSET, QUADRANTS, MODULES_PER_QUAD)
+os.makedirs(out_folder, exist_ok=True)
+gmf = map_gain_stages(in_folder, offset_runs, path_template, karabo_da, sequences)
 gain_mapped_files, total_sequences, total_file_size = gmf
-
-print("Will process at total of {} sequences: {:0.2f} GB of data.".format(total_sequences, total_file_size))
+print(f"Will process a total of {total_sequences} sequences.")
 ```

 %% Cell type:markdown id: tags:

 ## Calculate Offsets, Noise and Thresholds ##

 The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array.

 %% Cell type:code id: tags:

 ``` python
 import copy
 from functools import partial
-def characterize_module(il_mode, cells, bp_thresh, rawversion, loc, acq_rate, inp):
+def characterize_module(il_mode, cells, bp_thresh, rawversion, loc, acq_rate,
+                        h5path, h5path_idx, inp):
    import numpy as np
    import copy
    import h5py
    from cal_tools.enums import BadPixels
    from cal_tools.agipdlib import get_num_cells, get_acq_rate

    filename, filename_out, channel = inp

-
    if cells == 0:
        cells = get_num_cells(filename, loc, channel)

-    print("Using {} memory cells".format(cells))
+    print(f"Using {cells} memory cells")

    if acq_rate == 0.:
        acq_rate = get_acq_rate(filename, loc, channel)

    thresholds_offset_hard, thresholds_offset_sigma, thresholds_noise_hard, thresholds_noise_sigma = bp_thresh

    infile = h5py.File(filename, "r", driver="core")
+
+    h5path = h5path.format(channel)
+    h5path_idx = h5path_idx.format(channel)
+
    if rawversion == 2:
-        count = np.squeeze(infile["/INDEX/{}/DET/{}CH0:xtdf/image/count".format(loc, channel)])
-        first = np.squeeze(infile["/INDEX/{}/DET/{}CH0:xtdf/image/first".format(loc, channel)])
+        count = np.squeeze(infile[f"{h5path_idx}/count"])
+        first = np.squeeze(infile[f"{h5path_idx}/first"])
        last_index = int(first[count != 0][-1]+count[count != 0][-1])
        first_index = int(first[count != 0][0])
    else:
-        status = np.squeeze(infile["/INDEX/{}/DET/{}CH0:xtdf/image/status".format(loc, channel)])
+        status = np.squeeze(infile[f"{h5path_idx}/status"])
        if np.count_nonzero(status != 0) == 0:
            return
-        last = np.squeeze(infile["/INDEX/{}/DET/{}CH0:xtdf/image/last".format(loc, channel)])
-        first = np.squeeze(infile["/INDEX/{}/DET/{}CH0:xtdf/image/first".format(loc, channel)])
+        last = np.squeeze(infile[f"{h5path_idx}/last"])
+        first = np.squeeze(infile[f"{h5path_idx}/first"])
        last_index = int(last[status != 0][-1]) + 1
        first_index = int(first[status != 0][0])
-    im = np.array(infile["/INSTRUMENT/{}/DET/{}CH0:xtdf/image/data".format(loc, channel)][first_index:last_index,...])
-    cellIds = np.squeeze(infile["/INSTRUMENT/{}/DET/{}CH0:xtdf/image/cellId".format(loc, channel)][first_index:last_index,...])
+    im = np.array(infile[f"{h5path}/data"][first_index:last_index,...])
+    cellIds = np.squeeze(infile[f"{h5path}/cellId"][first_index:last_index,...])

    infile.close()

    if il_mode:
        ga = im[1::2, 0, ...]
        im = im[0::2, 0, ...].astype(np.float32)
        cellIds = cellIds[::2]
    else:
        ga = im[:, 1, ...]
        im = im[:, 0, ...].astype(np.float32)

    im = np.rollaxis(im, 2)
    im = np.rollaxis(im, 2, 1)

    ga = np.rollaxis(ga, 2)
    ga = np.rollaxis(ga, 2, 1)

    mcells = cells #max(cells, np.max(cellIds)+1)
    offset = np.zeros((im.shape[0], im.shape[1], mcells))
    gains = np.zeros((im.shape[0], im.shape[1], mcells))
    noise = np.zeros((im.shape[0], im.shape[1], mcells))

    for cc in np.unique(cellIds[cellIds < mcells]):
        cellidx = cellIds == cc
        offset[...,cc] = np.median(im[..., cellidx], axis=2)
        noise[...,cc] = np.std(im[..., cellidx], axis=2)
        gains[...,cc] = np.median(ga[..., cellidx], axis=2)

    # bad pixels
    bp = np.zeros(offset.shape, np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0,1))
    offset_std = np.nanstd(offset, axis=(0,1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[(offset < thresholds_offset_hard[0]) | (
        offset > thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0,1))
    noise_std = np.nanstd(noise, axis=(0,1))

    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[(noise < thresholds_noise_hard[0]) | (noise > thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value


    return offset, noise, gains, bp, cells, acq_rate


 offset_g = OrderedDict()
 noise_g = OrderedDict()
 gain_g = OrderedDict()
 badpix_g = OrderedDict()
 gg = 0

 start = datetime.now()
 all_cells = []
 all_acq_rate = []
 for gain, mapped_files in gain_mapped_files.items():

    inp = []
    dones = []
    for i in modules:
        qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
        if qm in mapped_files and not mapped_files[qm].empty():
            fname_in = mapped_files[qm].get()
            dones.append(mapped_files[qm].empty())
        else:
            continue
        fout = os.path.abspath("{}/{}".format(out_folder, (os.path.split(fname_in)[-1]).replace("RAW", "CORR")))
        inp.append((fname_in, fout, i))
    first = False
    p = partial(characterize_module, IL_MODE, max_cells,
               (thresholds_offset_hard, thresholds_offset_sigma,
-                thresholds_noise_hard, thresholds_noise_sigma), rawversion, loc, acq_rate)
-    results = list(map(p, inp))
-    #results = view.map_sync(p, inp)
+                thresholds_noise_hard, thresholds_noise_sigma),
+                rawversion, karabo_id, acq_rate, h5path, h5path_idx)
+    #results = list(map(p, inp))
+    results = view.map_sync(p, inp)
    for ii, r in enumerate(results):
        i = modules[ii]
        offset, noise, gain, bp, thiscell, thisacq = r
        all_cells.append(thiscell)
        all_acq_rate.append(thisacq)
        qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
        if qm not in offset_g:
            offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2], 3))
            noise_g[qm] = np.zeros_like(offset_g[qm])
            gain_g[qm] = np.zeros_like(offset_g[qm])
            badpix_g[qm] = np.zeros_like(offset_g[qm], np.uint32)

        offset_g[qm][...,gg] = offset
        noise_g[qm][...,gg] = noise
        gain_g[qm][...,gg] = gain
        badpix_g[qm][...,gg] = bp
    gg +=1

 duration = (datetime.now()-start).total_seconds()
 logger.runtime_summary_entry(success=True, runtime=duration,
                                   total_sequences=total_sequences,
                                   filesize=total_file_size)
 logger.send()
 max_cells = np.max(all_cells)
-print("Using {} memory cells".format(max_cells))
+print(f"Using {max_cells} memory cells")
 acq_rate = np.max(all_acq_rate)
-print("Using {} MHz acquisition rate".format(acq_rate))
+print(f"Using {acq_rate} MHz acquisition rate")
 ```

 %% Cell type:markdown id: tags:

 The thresholds for gain switching are then defined as the mean value between in individual gain bit levels. Note that these thresholds need to be refined with charge induced thresholds, as the two are not the same.

 %% Cell type:code id: tags:

 ``` python
 thresholds_g = {}
 for qm in gain_g.keys():
    thresholds_g[qm] = np.zeros((gain_g[qm].shape[0], gain_g[qm].shape[1], gain_g[qm].shape[2], 5))
    thresholds_g[qm][...,0] = (gain_g[qm][...,1]+gain_g[qm][...,0])/2
    thresholds_g[qm][...,1] = (gain_g[qm][...,2]+gain_g[qm][...,1])/2
    for i in range(3):
        thresholds_g[qm][...,2+i] = gain_g[qm][...,i]
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()
 for i in modules:
    qm = "Q{}M{}".format(i//4+1, i%4+1)
    res[qm] = {'Offset': offset_g[qm],
               'Noise': noise_g[qm],
               'ThresholdsDark': thresholds_g[qm],
               'BadPixelsDark': badpix_g[qm]
               }

 if local_output:
    for qm in offset_g.keys():
-        ofile = "{}/agipd_offset_store_{}_{}.h5".format(out_folder, "_".join(offset_runs.values()), qm)
+        ofile = "{}/agipd_offset_store_{}_{}.h5".format(out_folder,
+                                                        "{}-{}-{}".format(*offset_runs.values()),
+                                                        qm)
        store_file = h5py.File(ofile, "w")
        store_file["{}/Offset/0/data".format(qm)] = offset_g[qm]
        store_file["{}/Noise/0/data".format(qm)] = noise_g[qm]
        store_file["{}/Threshold/0/data".format(qm)] = thresholds_g[qm]
        store_file["{}/BadPixels/0/data".format(qm)] = badpix_g[qm]
        store_file.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve existing constants for comparison
 clist = ["Offset", "Noise", "ThresholdsDark", "BadPixelsDark"]
 old_const = {}
 old_mdata = {}
 detinst = getattr(Detectors, dinstance)

 print('Retrieve pre-existing constants for comparison.')

 for qm in res:
    for const in res[qm]:
        metadata = ConstantMetaData()
        dconst = getattr(Constants.AGIPD, const)()
        dconst.data = res[qm][const]
        metadata.calibration_constant = dconst

        # Setting conditions
        condition = Conditions.Dark.AGIPD(memory_cells=max_cells,
                                          bias_voltage=bias_voltage,
                                          acquisition_rate=acq_rate,
                                          gain_setting=gain_setting)
        device = getattr(detinst, qm)
        data, mdata = get_from_db(device,
                                  getattr(Constants.AGIPD, const)(),
                                  condition,
                                  None,
                                  cal_db_interface, creation_time=creation_time,
                                  verbosity=2, timeout=cal_db_timeout)

        old_const[const] = data
        if mdata is not None and data is not None:
            time = mdata.calibration_constant_version.begin_at
            old_mdata[const] = time.isoformat()
            os.makedirs('{}/old/'.format(out_folder), exist_ok=True)
            save_const_to_h5(mdata, '{}/old/'.format(out_folder))
        else:
            old_mdata[const] = "Not found"
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_low, run_med, run_high)
 ```

 %% Cell type:code id: tags:

 ``` python
 for qm in res:

-    print("Injecting constants with conditions:\n".format(const))
+    print(f"Injecting constants {const} with conditions:\n")
    print("1. memory_cells: {}\n2. bias_voltage: {}\n"
              "3. acquisition_rate: {}\n4. gain_setting: {}\n"
              "5. creation_time: {}\n".format(max_cells, bias_voltage,
                                           acq_rate, gain_setting,
                                           creation_time))
    for const in res[qm]:
        metadata = ConstantMetaData()
        dconst = getattr(Constants.AGIPD, const)()
        dconst.data = res[qm][const]
        metadata.calibration_constant = dconst

        # set the operating condition
        condition = Conditions.Dark.AGIPD(memory_cells=max_cells,
                                          bias_voltage=bias_voltage,
                                          acquisition_rate=acq_rate,
                                          gain_setting=gain_setting)
        detinst = getattr(Detectors, dinstance)
        device = getattr(detinst, qm)

        metadata.detector_condition = condition

        # specify the a version for this constant
        if creation_time is None:
            metadata.calibration_constant_version = Versions.Now(device=device)
        else:
            metadata.calibration_constant_version = Versions.Timespan(device=device,
                                                                      start=creation_time)

        metadata.calibration_constant_version.raw_data_location = file_loc

        if db_output:
            try:
                metadata.send(cal_db_interface, timeout=cal_db_timeout)
                msg = 'Const {} for module {} was injected to the calibration DB. Begin at: {}'
                print(msg.format(const, qm,
                                 metadata.calibration_constant_version.begin_at))
            except Exception as e:
                print(e)

        if local_output:
            save_const_to_h5(metadata, out_folder)
            print("Calibration constant {} is stored locally.\n".format(const))
 ```

 %% Cell type:code id: tags:

 ``` python
 qm = "Q{}M{}".format(modules[0]//4+1, modules[0] % 4+1)
 display(Markdown('## Position of the module {} and it\'s ASICs##'.format(qm)))

 fig, ax = plt.subplots(1, figsize=(10, 10))
 ax.set_axis_off()

 ax.set_xlim(0, 90)
 ax.set_ylim(0, 75)

 asic_pos = 5

 q_st = 8

 l_y = 6
 l_x = 5

 for iq, q_x in enumerate([[43,66],[45,34],[4,32],[2,64]]):
    for im in range(4):
        x = q_x[0]
        for i_as in range(8):
            ax.add_patch(matplotlib.patches.Rectangle((x,q_x[1]-q_st*im), l_x, l_y, linewidth=2, edgecolor='c',
                                           facecolor='lightblue', fill=True))

            if iq*4+im == modules[0]:
                ax.add_patch(matplotlib.patches.Rectangle((x,q_x[1]-q_st*im), l_x, l_y/2, linewidth=2,edgecolor='c',
                                           facecolor='sandybrown', fill=True))
                ax.add_patch(matplotlib.patches.Rectangle((x,(q_x[1]-q_st*im+3)), l_x, l_y/2, linewidth=2,edgecolor='c',
                                           facecolor='sandybrown', fill=True))
            x += asic_pos

        if iq*4+im == modules[0]:
            # Change Text for current processed module.
            ax.text(q_x[0]+13, q_x[1]-q_st*im+1.5, 'Q{}M{}'.format(
                iq+1, im+1),  fontsize=28, color='mediumblue',weight='bold')
        else:
            ax.text(q_x[0]+14, q_x[1]-q_st*im+1.5, 'Q{}M{}'.format(
                                        iq+1, im+1),  fontsize=25, color='k')
 ```

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:markdown id: tags:

 ### High Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 0
 out_folder = None
-show_overview(res, cell, gain, out_folder=out_folder, infix="_".join(offset_runs.values()))
+show_overview(res, cell, gain, out_folder=out_folder, infix="{}-{}-{}".format(*offset_runs.values()))
 ```

 %% Cell type:markdown id: tags:

 ### Medium Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 1
-show_overview(res, cell, gain, out_folder=out_folder, infix="_".join(offset_runs.values()))
+show_overview(res, cell, gain, out_folder=out_folder, infix="{}-{}-{}".format(*offset_runs.values()))
 ```

 %% Cell type:markdown id: tags:

 ### Low Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 2
-show_overview(res, cell, gain, out_folder=out_folder, infix="_".join(offset_runs.values()))
+show_overview(res, cell, gain, out_folder=out_folder, infix="{}-{}-{}".format(*offset_runs.values()))
 ```

 %% Cell type:markdown id: tags:

 ## Global Bad Pixel Behaviour ##

 The following plots show the results of bad pixel evaluation for all evaluated memory cells. Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2. This excludes single bad pixels present only in disconnected pixels. Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated. Colors encode the bad pixel type, or mixed type.

 %% Cell type:markdown id: tags:

 ### High Gain ###

 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.NOISE_OUT_OF_THRESHOLD.value: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR.value: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value | BadPixels.NOISE_OUT_OF_THRESHOLD.value: ('MIXED', '#DD00DD80')}

 rebin = 8 if not high_res_badpix_3d else 2

 gain = 0
 for mod, data in badpix_g.items():
    plot_badpix_3d(data[...,gain], cols, title=mod, rebin_fac=rebin)
 ```

 %% Cell type:markdown id: tags:

 ### Medium Gain ###

 %% Cell type:code id: tags:

 ``` python
 gain = 1
 for mod, data in badpix_g.items():
    plot_badpix_3d(data[...,gain], cols, title=mod, rebin_fac=rebin)
 ```

 %% Cell type:markdown id: tags:

 ### Low Gain ###

 %% Cell type:code id: tags:

 ``` python
 gain = 2
 for mod, data in badpix_g.items():
    plot_badpix_3d(data[...,gain], cols, title=mod, rebin_fac=rebin)
 ```

 %% Cell type:markdown id: tags:

 ## Aggregate values, and per Cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior.

 %% Cell type:code id: tags:

 ``` python
-create_constant_overview(offset_g, "Offset (ADU)", max_cells, 4000, 10000,
-                         out_folder=out_folder, infix="_".join(offset_runs.values()),
-                         badpixels=[badpix_g, np.nan])
+create_constant_overview(offset_g, "Offset (ADU)", max_cells, 4000, 8000,
+                         out_folder=out_folder, infix="{}-{}-{}".format(*offset_runs.values()),
+                         badpixels=[badpix_g, np.nan]))
 ```

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(noise_g, "Noise (ADU)", max_cells, 0, 100,
-                         out_folder=out_folder, infix="_".join(offset_runs.values()),
+                         out_folder=out_folder, infix="{}-{}-{}".format(*offset_runs.values()),
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 # Plot only three gain threshold maps.
 bp_thresh = OrderedDict()
 for mod, con in badpix_g.items():
    bp_thresh[mod] = np.zeros((con.shape[0], con.shape[1], con.shape[2], 5), dtype=con.dtype)
    bp_thresh[mod][...,:2] = con[...,:2]
    bp_thresh[mod][...,2:] = con


 create_constant_overview(thresholds_g, "Threshold (ADU)", max_cells, 4000, 10000, 5,
-                         out_folder=out_folder, infix="_".join(offset_runs.values()),
+                         out_folder=out_folder, infix="{}-{}-{}".format(*offset_runs.values()),
                         badpixels=[bp_thresh, np.nan],
                         gmap=['HG-MG Threshold', 'MG-LG Threshold', 'High gain', 'Medium gain', 'low gain'],
                         marker=['d','d','','','']
                         )
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixel_aggregate_g = OrderedDict()
 for m, d in badpix_g.items():
    bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)
 create_constant_overview(bad_pixel_aggregate_g, "Bad pixel fraction", max_cells, 0, 0.10, 3,
-                         out_folder=out_folder, infix="_".join(offset_runs.values()))
+                         out_folder=out_folder, infix="{}-{}-{}".format(*offset_runs.values()))
 ```

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 table = []
 gain_names = ['High', 'Medium', 'Low']
 for qm in badpix_g.keys():
    for gain in range(3):

        l_data = []
        l_data_old = []

        data = np.copy(badpix_g[qm][:,:,:,gain])
        datau32 = data.astype(np.uint32)
        l_data.append(data)
        l_data.append(datau32 - np.bitwise_or(datau32,BadPixels.NOISE_OUT_OF_THRESHOLD.value))
        l_data.append(datau32 - np.bitwise_or(datau32,BadPixels.OFFSET_OUT_OF_THRESHOLD.value))
        l_data.append(datau32 - np.bitwise_or(datau32,BadPixels.OFFSET_NOISE_EVAL_ERROR.value))

        if old_const['BadPixelsDark'] is not None:
            dataold = np.copy(old_const['BadPixelsDark'][:, :, :, gain])
            datau32old = dataold.astype(np.uint32)
            l_data_old.append(dataold)
            l_data_old.append(datau32old - np.bitwise_or(datau32old,BadPixels.NOISE_OUT_OF_THRESHOLD.value))
            l_data_old.append(datau32old - np.bitwise_or(datau32old,BadPixels.OFFSET_OUT_OF_THRESHOLD.value))
            l_data_old.append(datau32old - np.bitwise_or(datau32old,BadPixels.OFFSET_NOISE_EVAL_ERROR.value))

        l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',
                       'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR']

        l_threshold = ['', '{}'.format(thresholds_noise_sigma), '{}'.format(thresholds_offset_sigma),
                      '{}/{}'.format(thresholds_offset_hard, thresholds_noise_hard)]

        for i in range(len(l_data)):
            line = ['{}, {} gain'.format(l_data_name[i], gain_names[gain]),
                      l_threshold[i],
                      len(l_data[i][l_data[i]>0].flatten())
                   ]
            if old_const['BadPixelsDark'] is not None:
                line += [len(l_data_old[i][l_data_old[i]>0].flatten())]
            else:
                line += ['-']

            table.append(line)
        table.append(['', '', '', ''])

 display(Markdown('### Number of bad pixels ###'.format(qm)))
 if len(table)>0:
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Pixel type", "Threshold", "New constant", "Old constant "])))
 ```

 %% Cell type:code id: tags:

 ``` python

 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 for const in ['Offset', 'Noise', 'ThresholdsDark']:
    if const != 'ThresholdsDark':
        table = [['','High gain', 'High gain', 'Medium gain', 'Medium gain', 'Low gain', 'Low gain']]
    else:
        table = [['','HG-MG threshold', 'HG-MG threshold', 'MG-LG threshold', 'MG-LG threshold']]
    for qm in res.keys():

        data = np.copy(res[qm][const])
        if const == 'ThresholdsDark':
            data[...,0][res[qm]['BadPixelsDark'][...,0]>0] = np.nan
            data[...,1][res[qm]['BadPixelsDark'][...,1]>0] = np.nan
        else:
            data[res[qm]['BadPixelsDark']>0] = np.nan

        if old_const[const] is not None and old_const['BadPixelsDark'] is not None:
            dataold = np.copy(old_const[const])
            if const == 'ThresholdsDark':
                dataold[...,0][old_const['BadPixelsDark'][...,0]>0] = np.nan
                dataold[...,1][old_const['BadPixelsDark'][...,1]>0] = np.nan
            else:
                dataold[old_const['BadPixelsDark']>0] = np.nan

        f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
        n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

        for i, f in enumerate(f_list):
            line = [n_list[i]]
            for gain in range(3):
                # Compare only 3 threshold gain-maps
                if gain == 2 and const == 'ThresholdsDark':
                    continue
                line.append('{:6.1f}'.format(f(data[...,gain])))
                if old_const[const] is not None and old_const['BadPixelsDark'] is not None:
                    line.append('{:6.1f}'.format(f(dataold[...,gain])))
                else:
                    line.append('-')

            table.append(line)

    display(Markdown('### {} [ADU], good pixels only ###'.format(const)))
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:markdown id: tags:

 # Characterize Dark Images #

 Author: S. Hauf, Version: 0.1

 The following code analyzes a set of dark images taken with the AGIPD detector to deduce detector offsets , noise, bad-pixel maps and thresholding. All four types of constants are evaluated per-pixel and per-memory cell. Data for the detector's three gain stages needs to be present, separated into separate runs.

 The evaluated calibration constants are stored locally and injected in the calibration data base.

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB" # The ipcluster profile to use
 in_folder = "/gpfs/exfel/d/raw/SPB/202030/p900138/" # path to input data, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/AGIPD3" # path to output to, required
 sequences = [0] # sequence files to evaluate.
+modules = [-1]  # list of modules to evaluate, RANGE ALLOWED
+run_high = 264 # run number in which high gain data was recorded, required
+run_med = 265 # run number in which medium gain data was recorded, required
+run_low = 266 # run number in which low gain data was recorded, required
+
+karabo_id = "SPB_DET_AGIPD1M-1" # karabo karabo_id
+karabo_da = [-1]  # data aggregators
+receiver_id = "{}CH0" # inset for receiver devices
+path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
+h5path = '/INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
+h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
+h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information
+karabo_id_control = "SPB_IRU_AGIPD1M1" # karabo-id for control device
+karabo_da_control = 'DA02' # karabo DA for control infromation

-run_high = 199 # run number in which high gain data was recorded, required
-run_med = 200 # run number in which medium gain data was recorded, required
-run_low = 201 # run number in which low gain data was recorded, required
-
-mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
+use_dir_creation_date = True  # use dir creation date as data production reference date
+cal_db_interface = "tcp://max-exfl016:8020" # the database interface to use
+cal_db_timeout = 3000000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database
+
+mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300 # detector bias voltage
-cal_db_interface = "tcp://max-exfl016:8020" # the database interface to use
-cal_db_timeout = 3000000 # timeout on caldb requests"
+gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
+acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 interlaced = False # assume interlaced data format, for data prior to Dec. 2017
 rawversion = 2 # RAW file format version
-dont_use_dir_date = False # don't use the dir creation date for determining the creation time

 thresholds_offset_sigma = 3. # thresholds in terms of n sigma noise for offset deduced bad pixels
 thresholds_offset_hard = [4000, 8500] # thresholds in absolute ADU terms for offset deduced bad pixels

 thresholds_noise_sigma = 5. # thresholds in terms of n sigma noise for offset deduced bad pixels
 thresholds_noise_hard = [4, 20] # thresholds in absolute ADU terms for offset deduced bad pixels

-instrument = "SPB"
 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h
-modules = [0]#,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]  # module to run for
-acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
-gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
-
-h5path_ctrl = '/CONTROL/SPB_IRU_AGIPD1M1/MDL/FPGA_COMP_TEST' # path to control information
-karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation
 ```

 %% Cell type:code id: tags:

 ``` python
 # imports and things that do not usually need to be changed
 from datetime import datetime
 import dateutil.parser
 import warnings
 warnings.filterwarnings('ignore')
 from collections import OrderedDict
 import os
 import h5py
 import numpy as np
 import matplotlib
 import tabulate

 matplotlib.use('agg')
 import matplotlib.pyplot as plt
 from IPython.display import display, Markdown, Latex
 %matplotlib inline

-from cal_tools.tools import (gain_map_files, parse_runs,
+from cal_tools.tools import (map_gain_stages, parse_runs,
                             run_prop_seq_from_path, get_notebook_name,
                             get_dir_creation_date, save_const_to_h5,
                             get_random_db_interface, get_from_db)
 from cal_tools.influx import InfluxLogger
 from cal_tools.enums import BadPixels
 from cal_tools.plotting import show_overview, plot_badpix_3d, create_constant_overview
 from cal_tools.agipdlib import get_gain_setting

 # make sure a cluster is running with ipcluster start --n=32, give it a while to start
 from ipyparallel import Client

 view = Client(profile=cluster_profile)[:]
 view.use_dill()

 from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions

 gains = np.arange(3)

-# no need to change this
-
-QUADRANTS = 4
-MODULES_PER_QUAD = 4
-DET_FILE_INSET = "AGIPD"
-
 IL_MODE = interlaced
 max_cells = mem_cells

 offset_runs = OrderedDict()
-offset_runs["high"] = parse_runs(run_high)[0]
-offset_runs["med"] = parse_runs(run_med)[0]
-offset_runs["low"] = parse_runs(run_low)[0]
+offset_runs["high"] = run_high
+offset_runs["med"] = run_med
+offset_runs["low"] = run_low

 creation_time=None
-if not dont_use_dir_date:
+if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)
-
+print(f"Using {creation_time} as creation time of constant.")

 run, prop, seq = run_prop_seq_from_path(in_folder)
-logger = InfluxLogger(detector="AGIPD", instrument=instrument, mem_cells=mem_cells,
-                      notebook=get_notebook_name(), proposal=prop)
-
-print("Using {} as creation time of constant.".format(creation_time))

 cal_db_interface = get_random_db_interface(cal_db_interface)
-print('Calibration database interface: {}'.format(cal_db_interface))
+print(f'Calibration database interface: {cal_db_interface}')

 loc = None
+instrument = karabo_id.split("_")[0]
 if instrument == "SPB":
-    loc = "SPB_DET_AGIPD1M-1"
    dinstance = "AGIPD1M1"
 else:
-    loc = "MID_DET_AGIPD1M-1"
    dinstance = "AGIPD1M2"
-print("Detector in use is {}".format(loc))
+print(f"Detector in use is {karabo_id}")
+print(f"Instrument {instrument}")
+print(f"Detector instance {dinstance}")
+
+logger = InfluxLogger(detector="AGIPD", instrument=instrument, mem_cells=mem_cells,
+                      notebook=get_notebook_name(), proposal=prop)
 ```

 %% Cell type:code id: tags:

 ``` python
 control_fname = '{}/r{:04d}/RAW-R{:04d}-{}-S00000.h5'.format(in_folder, run_high,
                                                             run_high, karabo_da_control)

 if gain_setting == 0.1:
    if creation_time.replace(tzinfo=None) < dateutil.parser.parse('2020-01-31'):
        print("Set gain-setting to None for runs taken before 2020-01-31")
        gain_setting = None
    else:
        try:
            gain_setting = get_gain_setting(control_fname, h5path_ctrl)
        except Exception as e:
            print(f'Error while reading gain setting from: \n{control_fname}')
            print(e)
            print("Gain setting is not found in the control information")
            print("Data will not be processed")
            sequences = []
 ```

 %% Cell type:code id: tags:

 ``` python
+if karabo_da[0] == -1:
+    if modules[0] == -1:
+        modules = list(range(16))
+    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
+else:
+    modules = [int(x[-2:]) for x in karabo_da]
+
+h5path = h5path.format(karabo_id, receiver_id)
+h5path_idx = h5path_idx.format(karabo_id, receiver_id)
+h5path_ctrl = h5path_ctrl.format(karabo_id_control)
+
 print("Parameters are:")
-print("Proposal: {}".format(prop))
-print("Memory cells: {}/{}".format(mem_cells, max_cells))
+print(f"Proposal: {prop}")
+print(f"Memory cells: {mem_cells}/{max_cells}")
 print("Runs: {}".format([ v for v in offset_runs.values()]))
-print("Sequences: {}".format(sequences))
-print("Interlaced mode: {}".format(IL_MODE))
-print("Using DB: {}".format(db_output))
-print("Input: {}".format(in_folder))
-print("Output: {}".format(out_folder))
-print("Bias voltage: {}V".format(bias_voltage))
-print("Gain setting: {}".format(gain_setting))
+print(f"Sequences: {sequences}")
+print(f"Interlaced mode: {IL_MODE}")
+print(f"Using DB: {db_output}")
+print(f"Input: {in_folder}")
+print(f"Output: {out_folder}")
+print(f"Bias voltage: {bias_voltage}V")
+print(f"Gain setting: {gain_setting}")
 ```

 %% Cell type:markdown id: tags:

 The following lines will create a queue of files which will the be executed module-parallel. Distiguishing between different gains.

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
-if not os.path.exists(out_folder):
-    os.makedirs(out_folder)
-
-gmf = gain_map_files(in_folder, offset_runs, sequences, DET_FILE_INSET, QUADRANTS, MODULES_PER_QUAD)
+os.makedirs(out_folder, exist_ok=True)
+gmf = map_gain_stages(in_folder, offset_runs, path_template, karabo_da, sequences)
 gain_mapped_files, total_sequences, total_file_size = gmf
-
-print("Will process at total of {} sequences: {:0.2f} GB of data.".format(total_sequences, total_file_size))
+print(f"Will process a total of {total_sequences} sequences.")
 ```

 %% Cell type:markdown id: tags:

 ## Calculate Offsets, Noise and Thresholds ##

 The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array.

 %% Cell type:code id: tags:

 ``` python
 import copy
 from functools import partial
-def characterize_module(il_mode, cells, bp_thresh, rawversion, loc, acq_rate, inp):
+def characterize_module(il_mode, cells, bp_thresh, rawversion, loc, acq_rate,
+                        h5path, h5path_idx, inp):
    import numpy as np
    import copy
    import h5py
    from cal_tools.enums import BadPixels
    from cal_tools.agipdlib import get_num_cells, get_acq_rate

    filename, filename_out, channel = inp

-
    if cells == 0:
        cells = get_num_cells(filename, loc, channel)

-    print("Using {} memory cells".format(cells))
+    print(f"Using {cells} memory cells")

    if acq_rate == 0.:
        acq_rate = get_acq_rate(filename, loc, channel)

    thresholds_offset_hard, thresholds_offset_sigma, thresholds_noise_hard, thresholds_noise_sigma = bp_thresh

    infile = h5py.File(filename, "r", driver="core")
+
+    h5path = h5path.format(channel)
+    h5path_idx = h5path_idx.format(channel)
+
    if rawversion == 2:
-        count = np.squeeze(infile["/INDEX/{}/DET/{}CH0:xtdf/image/count".format(loc, channel)])
-        first = np.squeeze(infile["/INDEX/{}/DET/{}CH0:xtdf/image/first".format(loc, channel)])
+        count = np.squeeze(infile[f"{h5path_idx}/count"])
+        first = np.squeeze(infile[f"{h5path_idx}/first"])
        last_index = int(first[count != 0][-1]+count[count != 0][-1])
        first_index = int(first[count != 0][0])
    else:
-        status = np.squeeze(infile["/INDEX/{}/DET/{}CH0:xtdf/image/status".format(loc, channel)])
+        status = np.squeeze(infile[f"{h5path_idx}/status"])
        if np.count_nonzero(status != 0) == 0:
            return
-        last = np.squeeze(infile["/INDEX/{}/DET/{}CH0:xtdf/image/last".format(loc, channel)])
-        first = np.squeeze(infile["/INDEX/{}/DET/{}CH0:xtdf/image/first".format(loc, channel)])
+        last = np.squeeze(infile[f"{h5path_idx}/last"])
+        first = np.squeeze(infile[f"{h5path_idx}/first"])
        last_index = int(last[status != 0][-1]) + 1
        first_index = int(first[status != 0][0])
-    im = np.array(infile["/INSTRUMENT/{}/DET/{}CH0:xtdf/image/data".format(loc, channel)][first_index:last_index,...])
-    cellIds = np.squeeze(infile["/INSTRUMENT/{}/DET/{}CH0:xtdf/image/cellId".format(loc, channel)][first_index:last_index,...])
+    im = np.array(infile[f"{h5path}/data"][first_index:last_index,...])
+    cellIds = np.squeeze(infile[f"{h5path}/cellId"][first_index:last_index,...])

    infile.close()

    if il_mode:
        ga = im[1::2, 0, ...]
        im = im[0::2, 0, ...].astype(np.float32)
        cellIds = cellIds[::2]
    else:
        ga = im[:, 1, ...]
        im = im[:, 0, ...].astype(np.float32)

    im = np.rollaxis(im, 2)
    im = np.rollaxis(im, 2, 1)

    ga = np.rollaxis(ga, 2)
    ga = np.rollaxis(ga, 2, 1)

    mcells = cells #max(cells, np.max(cellIds)+1)
    offset = np.zeros((im.shape[0], im.shape[1], mcells))
    gains = np.zeros((im.shape[0], im.shape[1], mcells))
    noise = np.zeros((im.shape[0], im.shape[1], mcells))

    for cc in np.unique(cellIds[cellIds < mcells]):
        cellidx = cellIds == cc
        offset[...,cc] = np.median(im[..., cellidx], axis=2)
        noise[...,cc] = np.std(im[..., cellidx], axis=2)
        gains[...,cc] = np.median(ga[..., cellidx], axis=2)

    # bad pixels
    bp = np.zeros(offset.shape, np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0,1))
    offset_std = np.nanstd(offset, axis=(0,1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[(offset < thresholds_offset_hard[0]) | (
        offset > thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0,1))
    noise_std = np.nanstd(noise, axis=(0,1))

    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[(noise < thresholds_noise_hard[0]) | (noise > thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value


    return offset, noise, gains, bp, cells, acq_rate


 offset_g = OrderedDict()
 noise_g = OrderedDict()
 gain_g = OrderedDict()
 badpix_g = OrderedDict()
 gg = 0

 start = datetime.now()
 all_cells = []
 all_acq_rate = []
 for gain, mapped_files in gain_mapped_files.items():

    inp = []
    dones = []
    for i in modules:
        qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
        if qm in mapped_files and not mapped_files[qm].empty():
            fname_in = mapped_files[qm].get()
            dones.append(mapped_files[qm].empty())
        else:
            continue
        fout = os.path.abspath("{}/{}".format(out_folder, (os.path.split(fname_in)[-1]).replace("RAW", "CORR")))
        inp.append((fname_in, fout, i))
    first = False
    p = partial(characterize_module, IL_MODE, max_cells,
               (thresholds_offset_hard, thresholds_offset_sigma,
-                thresholds_noise_hard, thresholds_noise_sigma), rawversion, loc, acq_rate)
-    results = list(map(p, inp))
-    #results = view.map_sync(p, inp)
+                thresholds_noise_hard, thresholds_noise_sigma),
+                rawversion, karabo_id, acq_rate, h5path, h5path_idx)
+    #results = list(map(p, inp))
+    results = view.map_sync(p, inp)
    for ii, r in enumerate(results):
        i = modules[ii]
        offset, noise, gain, bp, thiscell, thisacq = r
        all_cells.append(thiscell)
        all_acq_rate.append(thisacq)
        qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
        if qm not in offset_g:
            offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2], 3))
            noise_g[qm] = np.zeros_like(offset_g[qm])
            gain_g[qm] = np.zeros_like(offset_g[qm])
            badpix_g[qm] = np.zeros_like(offset_g[qm], np.uint32)

        offset_g[qm][...,gg] = offset
        noise_g[qm][...,gg] = noise
        gain_g[qm][...,gg] = gain
        badpix_g[qm][...,gg] = bp
    gg +=1

 duration = (datetime.now()-start).total_seconds()
 logger.runtime_summary_entry(success=True, runtime=duration,
                                   total_sequences=total_sequences,
                                   filesize=total_file_size)
 logger.send()
 max_cells = np.max(all_cells)
-print("Using {} memory cells".format(max_cells))
+print(f"Using {max_cells} memory cells")
 acq_rate = np.max(all_acq_rate)
-print("Using {} MHz acquisition rate".format(acq_rate))
+print(f"Using {acq_rate} MHz acquisition rate")
 ```

 %% Cell type:markdown id: tags:

 The thresholds for gain switching are then defined as the mean value between in individual gain bit levels. Note that these thresholds need to be refined with charge induced thresholds, as the two are not the same.

 %% Cell type:code id: tags:

 ``` python
 thresholds_g = {}
 for qm in gain_g.keys():
    thresholds_g[qm] = np.zeros((gain_g[qm].shape[0], gain_g[qm].shape[1], gain_g[qm].shape[2], 5))
    thresholds_g[qm][...,0] = (gain_g[qm][...,1]+gain_g[qm][...,0])/2
    thresholds_g[qm][...,1] = (gain_g[qm][...,2]+gain_g[qm][...,1])/2
    for i in range(3):
        thresholds_g[qm][...,2+i] = gain_g[qm][...,i]
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()
 for i in modules:
    qm = "Q{}M{}".format(i//4+1, i%4+1)
    res[qm] = {'Offset': offset_g[qm],
               'Noise': noise_g[qm],
               'ThresholdsDark': thresholds_g[qm],
               'BadPixelsDark': badpix_g[qm]
               }

 if local_output:
    for qm in offset_g.keys():
-        ofile = "{}/agipd_offset_store_{}_{}.h5".format(out_folder, "_".join(offset_runs.values()), qm)
+        ofile = "{}/agipd_offset_store_{}_{}.h5".format(out_folder,
+                                                        "{}-{}-{}".format(*offset_runs.values()),
+                                                        qm)
        store_file = h5py.File(ofile, "w")
        store_file["{}/Offset/0/data".format(qm)] = offset_g[qm]
        store_file["{}/Noise/0/data".format(qm)] = noise_g[qm]
        store_file["{}/Threshold/0/data".format(qm)] = thresholds_g[qm]
        store_file["{}/BadPixels/0/data".format(qm)] = badpix_g[qm]
        store_file.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve existing constants for comparison
 clist = ["Offset", "Noise", "ThresholdsDark", "BadPixelsDark"]
 old_const = {}
 old_mdata = {}
 detinst = getattr(Detectors, dinstance)

 print('Retrieve pre-existing constants for comparison.')

 for qm in res:
    for const in res[qm]:
        metadata = ConstantMetaData()
        dconst = getattr(Constants.AGIPD, const)()
        dconst.data = res[qm][const]
        metadata.calibration_constant = dconst

        # Setting conditions
        condition = Conditions.Dark.AGIPD(memory_cells=max_cells,
                                          bias_voltage=bias_voltage,
                                          acquisition_rate=acq_rate,
                                          gain_setting=gain_setting)
        device = getattr(detinst, qm)
        data, mdata = get_from_db(device,
                                  getattr(Constants.AGIPD, const)(),
                                  condition,
                                  None,
                                  cal_db_interface, creation_time=creation_time,
                                  verbosity=2, timeout=cal_db_timeout)

        old_const[const] = data
        if mdata is not None and data is not None:
            time = mdata.calibration_constant_version.begin_at
            old_mdata[const] = time.isoformat()
            os.makedirs('{}/old/'.format(out_folder), exist_ok=True)
            save_const_to_h5(mdata, '{}/old/'.format(out_folder))
        else:
            old_mdata[const] = "Not found"
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_low, run_med, run_high)
 ```

 %% Cell type:code id: tags:

 ``` python
 for qm in res:

-    print("Injecting constants with conditions:\n".format(const))
+    print(f"Injecting constants {const} with conditions:\n")
    print("1. memory_cells: {}\n2. bias_voltage: {}\n"
              "3. acquisition_rate: {}\n4. gain_setting: {}\n"
              "5. creation_time: {}\n".format(max_cells, bias_voltage,
                                           acq_rate, gain_setting,
                                           creation_time))
    for const in res[qm]:
        metadata = ConstantMetaData()
        dconst = getattr(Constants.AGIPD, const)()
        dconst.data = res[qm][const]
        metadata.calibration_constant = dconst

        # set the operating condition
        condition = Conditions.Dark.AGIPD(memory_cells=max_cells,
                                          bias_voltage=bias_voltage,
                                          acquisition_rate=acq_rate,
                                          gain_setting=gain_setting)
        detinst = getattr(Detectors, dinstance)
        device = getattr(detinst, qm)

        metadata.detector_condition = condition

        # specify the a version for this constant
        if creation_time is None:
            metadata.calibration_constant_version = Versions.Now(device=device)
        else:
            metadata.calibration_constant_version = Versions.Timespan(device=device,
                                                                      start=creation_time)

        metadata.calibration_constant_version.raw_data_location = file_loc

        if db_output:
            try:
                metadata.send(cal_db_interface, timeout=cal_db_timeout)
                msg = 'Const {} for module {} was injected to the calibration DB. Begin at: {}'
                print(msg.format(const, qm,
                                 metadata.calibration_constant_version.begin_at))
            except Exception as e:
                print(e)

        if local_output:
            save_const_to_h5(metadata, out_folder)
            print("Calibration constant {} is stored locally.\n".format(const))
 ```

 %% Cell type:code id: tags:

 ``` python
 qm = "Q{}M{}".format(modules[0]//4+1, modules[0] % 4+1)
 display(Markdown('## Position of the module {} and it\'s ASICs##'.format(qm)))

 fig, ax = plt.subplots(1, figsize=(10, 10))
 ax.set_axis_off()

 ax.set_xlim(0, 90)
 ax.set_ylim(0, 75)

 asic_pos = 5

 q_st = 8

 l_y = 6
 l_x = 5

 for iq, q_x in enumerate([[43,66],[45,34],[4,32],[2,64]]):
    for im in range(4):
        x = q_x[0]
        for i_as in range(8):
            ax.add_patch(matplotlib.patches.Rectangle((x,q_x[1]-q_st*im), l_x, l_y, linewidth=2, edgecolor='c',
                                           facecolor='lightblue', fill=True))

            if iq*4+im == modules[0]:
                ax.add_patch(matplotlib.patches.Rectangle((x,q_x[1]-q_st*im), l_x, l_y/2, linewidth=2,edgecolor='c',
                                           facecolor='sandybrown', fill=True))
                ax.add_patch(matplotlib.patches.Rectangle((x,(q_x[1]-q_st*im+3)), l_x, l_y/2, linewidth=2,edgecolor='c',
                                           facecolor='sandybrown', fill=True))
            x += asic_pos

        if iq*4+im == modules[0]:
            # Change Text for current processed module.
            ax.text(q_x[0]+13, q_x[1]-q_st*im+1.5, 'Q{}M{}'.format(
                iq+1, im+1),  fontsize=28, color='mediumblue',weight='bold')
        else:
            ax.text(q_x[0]+14, q_x[1]-q_st*im+1.5, 'Q{}M{}'.format(
                                        iq+1, im+1),  fontsize=25, color='k')
 ```

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:markdown id: tags:

 ### High Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 0
 out_folder = None
-show_overview(res, cell, gain, out_folder=out_folder, infix="_".join(offset_runs.values()))
+show_overview(res, cell, gain, out_folder=out_folder, infix="{}-{}-{}".format(*offset_runs.values()))
 ```

 %% Cell type:markdown id: tags:

 ### Medium Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 1
-show_overview(res, cell, gain, out_folder=out_folder, infix="_".join(offset_runs.values()))
+show_overview(res, cell, gain, out_folder=out_folder, infix="{}-{}-{}".format(*offset_runs.values()))
 ```

 %% Cell type:markdown id: tags:

 ### Low Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 2
-show_overview(res, cell, gain, out_folder=out_folder, infix="_".join(offset_runs.values()))
+show_overview(res, cell, gain, out_folder=out_folder, infix="{}-{}-{}".format(*offset_runs.values()))
 ```

 %% Cell type:markdown id: tags:

 ## Global Bad Pixel Behaviour ##

 The following plots show the results of bad pixel evaluation for all evaluated memory cells. Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2. This excludes single bad pixels present only in disconnected pixels. Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated. Colors encode the bad pixel type, or mixed type.

 %% Cell type:markdown id: tags:

 ### High Gain ###

 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.NOISE_OUT_OF_THRESHOLD.value: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR.value: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value | BadPixels.NOISE_OUT_OF_THRESHOLD.value: ('MIXED', '#DD00DD80')}

 rebin = 8 if not high_res_badpix_3d else 2

 gain = 0
 for mod, data in badpix_g.items():
    plot_badpix_3d(data[...,gain], cols, title=mod, rebin_fac=rebin)
 ```

 %% Cell type:markdown id: tags:

 ### Medium Gain ###

 %% Cell type:code id: tags:

 ``` python
 gain = 1
 for mod, data in badpix_g.items():
    plot_badpix_3d(data[...,gain], cols, title=mod, rebin_fac=rebin)
 ```

 %% Cell type:markdown id: tags:

 ### Low Gain ###

 %% Cell type:code id: tags:

 ``` python
 gain = 2
 for mod, data in badpix_g.items():
    plot_badpix_3d(data[...,gain], cols, title=mod, rebin_fac=rebin)
 ```

 %% Cell type:markdown id: tags:

 ## Aggregate values, and per Cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior.

 %% Cell type:code id: tags:

 ``` python
-create_constant_overview(offset_g, "Offset (ADU)", max_cells, 4000, 10000,
-                         out_folder=out_folder, infix="_".join(offset_runs.values()),
-                         badpixels=[badpix_g, np.nan])
+create_constant_overview(offset_g, "Offset (ADU)", max_cells, 4000, 8000,
+                         out_folder=out_folder, infix="{}-{}-{}".format(*offset_runs.values()),
+                         badpixels=[badpix_g, np.nan]))
 ```

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(noise_g, "Noise (ADU)", max_cells, 0, 100,
-                         out_folder=out_folder, infix="_".join(offset_runs.values()),
+                         out_folder=out_folder, infix="{}-{}-{}".format(*offset_runs.values()),
                         badpixels=[badpix_g, np.nan])
 ```

 %% Cell type:code id: tags:

 ``` python
 # Plot only three gain threshold maps.
 bp_thresh = OrderedDict()
 for mod, con in badpix_g.items():
    bp_thresh[mod] = np.zeros((con.shape[0], con.shape[1], con.shape[2], 5), dtype=con.dtype)
    bp_thresh[mod][...,:2] = con[...,:2]
    bp_thresh[mod][...,2:] = con


 create_constant_overview(thresholds_g, "Threshold (ADU)", max_cells, 4000, 10000, 5,
-                         out_folder=out_folder, infix="_".join(offset_runs.values()),
+                         out_folder=out_folder, infix="{}-{}-{}".format(*offset_runs.values()),
                         badpixels=[bp_thresh, np.nan],
                         gmap=['HG-MG Threshold', 'MG-LG Threshold', 'High gain', 'Medium gain', 'low gain'],
                         marker=['d','d','','','']
                         )
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixel_aggregate_g = OrderedDict()
 for m, d in badpix_g.items():
    bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)
 create_constant_overview(bad_pixel_aggregate_g, "Bad pixel fraction", max_cells, 0, 0.10, 3,
-                         out_folder=out_folder, infix="_".join(offset_runs.values()))
+                         out_folder=out_folder, infix="{}-{}-{}".format(*offset_runs.values()))
 ```

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 table = []
 gain_names = ['High', 'Medium', 'Low']
 for qm in badpix_g.keys():
    for gain in range(3):

        l_data = []
        l_data_old = []

        data = np.copy(badpix_g[qm][:,:,:,gain])
        datau32 = data.astype(np.uint32)
        l_data.append(data)
        l_data.append(datau32 - np.bitwise_or(datau32,BadPixels.NOISE_OUT_OF_THRESHOLD.value))
        l_data.append(datau32 - np.bitwise_or(datau32,BadPixels.OFFSET_OUT_OF_THRESHOLD.value))
        l_data.append(datau32 - np.bitwise_or(datau32,BadPixels.OFFSET_NOISE_EVAL_ERROR.value))

        if old_const['BadPixelsDark'] is not None:
            dataold = np.copy(old_const['BadPixelsDark'][:, :, :, gain])
            datau32old = dataold.astype(np.uint32)
            l_data_old.append(dataold)
            l_data_old.append(datau32old - np.bitwise_or(datau32old,BadPixels.NOISE_OUT_OF_THRESHOLD.value))
            l_data_old.append(datau32old - np.bitwise_or(datau32old,BadPixels.OFFSET_OUT_OF_THRESHOLD.value))
            l_data_old.append(datau32old - np.bitwise_or(datau32old,BadPixels.OFFSET_NOISE_EVAL_ERROR.value))

        l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',
                       'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR']

        l_threshold = ['', '{}'.format(thresholds_noise_sigma), '{}'.format(thresholds_offset_sigma),
                      '{}/{}'.format(thresholds_offset_hard, thresholds_noise_hard)]

        for i in range(len(l_data)):
            line = ['{}, {} gain'.format(l_data_name[i], gain_names[gain]),
                      l_threshold[i],
                      len(l_data[i][l_data[i]>0].flatten())
                   ]
            if old_const['BadPixelsDark'] is not None:
                line += [len(l_data_old[i][l_data_old[i]>0].flatten())]
            else:
                line += ['-']

            table.append(line)
        table.append(['', '', '', ''])

 display(Markdown('### Number of bad pixels ###'.format(qm)))
 if len(table)>0:
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Pixel type", "Threshold", "New constant", "Old constant "])))
 ```

 %% Cell type:code id: tags:

 ``` python

 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 for const in ['Offset', 'Noise', 'ThresholdsDark']:
    if const != 'ThresholdsDark':
        table = [['','High gain', 'High gain', 'Medium gain', 'Medium gain', 'Low gain', 'Low gain']]
    else:
        table = [['','HG-MG threshold', 'HG-MG threshold', 'MG-LG threshold', 'MG-LG threshold']]
    for qm in res.keys():

        data = np.copy(res[qm][const])
        if const == 'ThresholdsDark':
            data[...,0][res[qm]['BadPixelsDark'][...,0]>0] = np.nan
            data[...,1][res[qm]['BadPixelsDark'][...,1]>0] = np.nan
        else:
            data[res[qm]['BadPixelsDark']>0] = np.nan

        if old_const[const] is not None and old_const['BadPixelsDark'] is not None:
            dataold = np.copy(old_const[const])
            if const == 'ThresholdsDark':
                dataold[...,0][old_const['BadPixelsDark'][...,0]>0] = np.nan
                dataold[...,1][old_const['BadPixelsDark'][...,1]>0] = np.nan
            else:
                dataold[old_const['BadPixelsDark']>0] = np.nan

        f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
        n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

        for i, f in enumerate(f_list):
            line = [n_list[i]]
            for gain in range(3):
                # Compare only 3 threshold gain-maps
                if gain == 2 and const == 'ThresholdsDark':
                    continue
                line.append('{:6.1f}'.format(f(data[...,gain])))
                if old_const[const] is not None and old_const['BadPixelsDark'] is not None:
                    line.append('{:6.1f}'.format(f(dataold[...,gain])))
                else:
                    line.append('-')

            table.append(line)

    display(Markdown('### {} [ADU], good pixels only ###'.format(const)))
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

--- a/notebooks/DSSC/Characterize_DSSC_Darks_NBC.ipynb
+++ b/notebooks/DSSC/Characterize_DSSC_Darks_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Characterize Dark Images #

 Author: S. Hauf, Version: 0.1

 The following code analyzes a set of dark images taken with the DSSC detector to deduce detector offsets and noise. Data for the detector is presented in one run and don't acquire multiple gain stages.

 The notebook explicitely does what pyDetLib provides in its offset calculation method for streaming data.

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB" # The ipcluster profile to use
 in_folder = "/gpfs/exfel/exp/SCS/201931/p900095/raw" # path to input data, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/DSSC" # path to output to, required
 sequences = [0] # sequence files to evaluate.
-
+modules = [-1]  # modules to run for
 run = 1497 # run number in which data was recorded, required

-mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
+karabo_id = "SCS_DET_DSSC1M-1" # karabo karabo_id
+karabo_da = [-1]  # data aggregators
+receiver_id = "{}CH0" # inset for receiver devices
+path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
+h5path = '/INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
+h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
+
+dont_use_dir_date = True # don't use the dir creation date for determining the creation time
+cal_db_interface = "tcp://max-exfl016:8020" # the database interface to use
+cal_db_timeout = 3000000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database
+
+mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300 # detector bias voltage
-cal_db_interface = "tcp://max-exfl016:8020" # the database interface to use
 rawversion = 2 # RAW file format version
-dont_use_dir_date = True # don't use the dir creation date for determining the creation time

 thresholds_offset_sigma = 3. # thresholds in terms of n sigma noise for offset deduced bad pixels
 thresholds_offset_hard = [4000, 8500] # thresholds in absolute ADU terms for offset deduced bad pixels

 thresholds_noise_sigma = 5. # thresholds in terms of n sigma noise for offset deduced bad pixels
 thresholds_noise_hard = [4, 20] # thresholds in absolute ADU terms for offset deduced bad pixels

 instrument = "SCS" # the instrument
 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h
-modules = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]  # modules to run for
 ```

 %% Cell type:code id: tags:

 ``` python
 # imports and things that do not usually need to be changed
 from datetime import datetime
 import warnings
 warnings.filterwarnings('ignore')
 from collections import OrderedDict
 import os
 import h5py
 import numpy as np
 import matplotlib
 matplotlib.use('agg')
 import matplotlib.pyplot as plt
 %matplotlib inline

-from cal_tools.tools import (gain_map_files, parse_runs, run_prop_seq_from_path,
+from cal_tools.tools import (map_gain_stages, parse_runs, run_prop_seq_from_path,
                             get_notebook_name, get_dir_creation_date,
                             get_random_db_interface)
 from cal_tools.influx import InfluxLogger
 from cal_tools.enums import BadPixels
 from cal_tools.plotting import show_overview, plot_badpix_3d, create_constant_overview

 # make sure a cluster is running with ipcluster start --n=32, give it a while to start
 from ipyparallel import Client

 view = Client(profile=cluster_profile)[:]
 view.use_dill()

 from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions


-# no need to change this
+h5path = h5path.format(karabo_id, receiver_id)
+h5path_idx = h5path_idx.format(karabo_id, receiver_id)

-QUADRANTS = 4
-MODULES_PER_QUAD = 4
-DET_FILE_INSET = "DSSC"
+
+if karabo_da[0] == -1:
+    if modules[0] == -1:
+        modules = list(range(16))
+    karabo_da = ["DSSC{:02d}".format(i) for i in modules]
+else:
+    modules = [int(x[-2:]) for x in karabo_da]

 max_cells = mem_cells

 offset_runs = OrderedDict()
-offset_runs["high"] = parse_runs(run)[0]
+offset_runs["high"] = run

 creation_time=None
 if not dont_use_dir_date:
    creation_time = get_dir_creation_date(in_folder, run)


 run, prop, seq = run_prop_seq_from_path(in_folder)
 logger = InfluxLogger(detector="DSSC", instrument=instrument, mem_cells=mem_cells,
                      notebook=get_notebook_name(), proposal=prop)

-print("Using {} as creation time of constant.".format(creation_time))
+print(f"Using {creation_time} as creation time of constant.")

-loc = None
-if instrument == "SCS":
-    loc = "SCS_DET_DSSC1M-1"
-    dinstance = "DSSC1M1"
+dinstance = "DSSC1M1"

-print("Detector in use is {}".format(loc))
+print(f"Detector in use is {karabo_id}")

 cal_db_interface = get_random_db_interface(cal_db_interface)
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Parameters are:")
-print("Proposal: {}".format(prop))
-print("Memory cells: {}/{}".format(mem_cells, max_cells))
+print(f"Proposal: {prop}")
+print(f"Memory cells: {mem_cells}/{max_cells}")
 print("Runs: {}".format([ v for v in offset_runs.values()]))
-print("Sequences: {}".format(sequences))
-print("Using DB: {}".format(db_output))
-print("Input: {}".format(in_folder))
-print("Output: {}".format(out_folder))
-print("Bias voltage: {}V".format(bias_voltage))
+print(f"Sequences: {sequences}")
+print(f"Using DB: {db_output}")
+print(f"Input: {in_folder}")
+print(f"Output: {out_folder}")
+print(f"Bias voltage: {bias_voltage}V")
 ```

 %% Cell type:markdown id: tags:

 The following lines will create a queue of files which will the be executed module-parallel. Distinguishing between different gains.

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
-if not os.path.exists(out_folder):
-    os.makedirs(out_folder)
-
-gmf = gain_map_files(in_folder, offset_runs, sequences, DET_FILE_INSET, QUADRANTS, MODULES_PER_QUAD)
+os.makedirs(out_folder, exist_ok=True)
+gmf = map_gain_stages(in_folder, offset_runs, path_template, karabo_da, sequences)
 gain_mapped_files, total_sequences, total_file_size = gmf
-print("Will process at total of {} sequences: {:0.2f} GB of data.".format(total_sequences, total_file_size))
+print(f"Will process a total of {total_sequences} sequences.")
 ```

 %% Cell type:markdown id: tags:

 ## Calculate Offsets, Noise and Thresholds ##

 The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array.

 %% Cell type:code id: tags:

 ``` python
 import copy
 from functools import partial
-def characterize_module(cells, bp_thresh, rawversion, loc, inp):
+def characterize_module(cells, bp_thresh, rawversion, karabo_id, h5path, h5path_idx, inp):
    import numpy as np
    import copy
    import h5py
    from cal_tools.enums import BadPixels

    from hashlib import blake2b
    import struct
    import binascii

-    def get_num_cells(fname, loc, module):
+    def get_num_cells(fname, h5path):
        with h5py.File(fname, "r") as f:

-            cells = f["INSTRUMENT/{}/DET/{}CH0:xtdf/image/cellId".format(loc, module)][()]
+            cells = f[f"{h5path}/cellId"][()]
            maxcell = np.max(cells)
            options = [100, 200, 400, 500, 600, 700, 800]
            dists = np.array([(o-maxcell) for o in options])
            dists[dists<0] = 10000 # assure to always go higher
            return options[np.argmin(dists)]

    filename, filename_out, channel = inp

+    h5path = h5path.format(channel)
+    h5path_idx = h5path_idx.format(channel)

    if cells == 0:
-        cells = get_num_cells(filename, loc, channel)
+        cells = get_num_cells(filename, h5path)

    pulseid_checksum = None

    thresholds_offset_hard, thresholds_offset_sigma, thresholds_noise_hard, thresholds_noise_sigma = bp_thresh

    infile = h5py.File(filename, "r", driver="core")
    if rawversion == 2:
-        count = np.squeeze(infile["/INDEX/{}/DET/{}CH0:xtdf/image/count".format(loc, channel)])
-        first = np.squeeze(infile["/INDEX/{}/DET/{}CH0:xtdf/image/first".format(loc, channel)])
+        count = np.squeeze(infile[f"{h5path_idx}/count"])
+        first = np.squeeze(infile[f"{h5path_idx}/first"])
        last_index = int(first[count != 0][-1]+count[count != 0][-1])
        first_index = int(first[count != 0][0])
-        pulseids = infile["INSTRUMENT/{}/DET/{}CH0:xtdf/image/pulseId".format(loc, channel)][first_index:int(first[count != 0][1])]
+        pulseids = infile[f"{h5path}/pulseId"][first_index:int(first[count != 0][1])]
        bveto = blake2b(pulseids.data, digest_size=8)
        pulseid_checksum = struct.unpack('d', binascii.unhexlify(bveto.hexdigest()))[0]
    else:
-        status = np.squeeze(infile["/INDEX/{}/DET/{}CH0:xtdf/image/status".format(loc, channel)])
+        status = np.squeeze(infile[f"{h5path_idx}/status"])
        if np.count_nonzero(status != 0) == 0:
            return
-        last = np.squeeze(infile["/INDEX/{}/DET/{}CH0:xtdf/image/last".format(loc, channel)])
-        first = np.squeeze(infile["/INDEX/{}/DET/{}CH0:xtdf/image/first".format(loc, channel)])
+        last = np.squeeze(infile[f"{h5path_idx}/last"])
+        first = np.squeeze(infile[f"{h5path_idx}/first"])
        last_index = int(last[status != 0][-1]) + 1
        first_index = int(first[status != 0][0])
-    im = np.array(infile["/INSTRUMENT/{}/DET/{}CH0:xtdf/image/data".format(loc, channel)][first_index:last_index,...])
-    cellIds = np.squeeze(infile["/INSTRUMENT/{}/DET/{}CH0:xtdf/image/cellId".format(loc, channel)][first_index:last_index,...])
+    im = np.array(infile[f"{h5path}/data"][first_index:last_index,...])
+    cellIds = np.squeeze(infile[f"{h5path}/cellId"][first_index:last_index,...])

    infile.close()


    im = im[:, 0, ...].astype(np.float32)

    im = np.rollaxis(im, 2)
    im = np.rollaxis(im, 2, 1)

    mcells = cells
    offset = np.zeros((im.shape[0], im.shape[1], mcells))
    noise = np.zeros((im.shape[0], im.shape[1], mcells))

    for cc in np.unique(cellIds[cellIds < mcells]):
        cellidx = cellIds == cc
        offset[...,cc] = np.median(im[..., cellidx], axis=2)
        noise[...,cc] = np.std(im[..., cellidx], axis=2)


    # bad pixels
    bp = np.zeros(offset.shape, np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0,1))
    offset_std = np.nanstd(offset, axis=(0,1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[(offset < thresholds_offset_hard[0]) | (offset > thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0,1))
    noise_std = np.nanstd(noise, axis=(0,1))

    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[(noise < thresholds_noise_hard[0]) | (noise > thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value


    return offset, noise, bp, cells, pulseid_checksum


 offset_g = OrderedDict()
 noise_g = OrderedDict()
 gain_g = OrderedDict()
 badpix_g = OrderedDict()
 gg = 0

 start = datetime.now()
 all_cells = []
 checksums = {}

 for gain, mapped_files in gain_mapped_files.items():

    inp = []
    dones = []
    for i in modules:
        qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
        if qm in mapped_files and not mapped_files[qm].empty():
            fname_in = mapped_files[qm].get()
            dones.append(mapped_files[qm].empty())
        else:
            continue
        fout = os.path.abspath("{}/{}".format(out_folder, (os.path.split(fname_in)[-1]).replace("RAW", "CORR")))
        inp.append((fname_in, fout, i))
    first = False
    p = partial(characterize_module, max_cells,
               (thresholds_offset_hard, thresholds_offset_sigma,
-                thresholds_noise_hard, thresholds_noise_sigma), rawversion, loc)
+                thresholds_noise_hard, thresholds_noise_sigma), rawversion, karabo_id, h5path, h5path_idx)
    results = list(map(p, inp))

    for ii, r in enumerate(results):
        i = modules[ii]
        offset, noise,  bp, thiscell, pulseid_checksum = r
        all_cells.append(thiscell)
        qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
        if qm not in offset_g:
            offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2]))
            noise_g[qm] = np.zeros_like(offset_g[qm])

            badpix_g[qm] = np.zeros_like(offset_g[qm], np.uint32)
            checksums[qm] = pulseid_checksum

        offset_g[qm][...] = offset
        noise_g[qm][...] = noise
        badpix_g[qm][...] = bp
    gg +=1

 duration = (datetime.now()-start).total_seconds()
 logger.runtime_summary_entry(success=True, runtime=duration,
                                   total_sequences=total_sequences,
                                   filesize=total_file_size)
 logger.send()
 max_cells = np.max(all_cells)
-print("Using {} memory cells".format(max_cells))
+print(f"Using {max_cells} memory cells")
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()
 for i in modules:
    qm = "Q{}M{}".format(i//4+1, i%4+1)
    res[qm] = {'Offset': offset_g[qm],
               'Noise': noise_g[qm],
               'BadPixels': badpix_g[qm]
               }

 if local_output:
    for qm in offset_g.keys():
-        ofile = "{}/dssc_offset_store_{}_{}.h5".format(out_folder, "_".join(offset_runs.values()), qm)
+        ofile = "{}/dssc_offset_store_{}_{}.h5".format(out_folder, run, qm)
        store_file = h5py.File(ofile, "w")
        store_file["{}/Offset/0/data".format(qm)] = offset_g[qm]
        store_file["{}/Noise/0/data".format(qm)] = noise_g[qm]
        store_file["{}/BadPixels/0/data".format(qm)] = badpix_g[qm]
        store_file.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 if db_output:
    for dont_use_pulseIds in [True, False]:
        for qm in offset_g.keys():
            try:
                metadata = ConstantMetaData()
                offset = Constants.DSSC.Offset()
                offset.data = offset_g[qm]
                metadata.calibration_constant = offset
                pidsum = None if dont_use_pulseIds else checksums[qm]
                # set the operating condition
                condition = Conditions.Dark.DSSC(memory_cells=max_cells, bias_voltage=bias_voltage,
                                                 pulseid_checksum=pidsum)
                detinst = getattr(Detectors, dinstance)

                device = getattr(detinst, qm)

                metadata.detector_condition = condition

                # specify the a version for this constant
                if creation_time is None:
                    metadata.calibration_constant_version = Versions.Now(device=device)
                else:
                    metadata.calibration_constant_version = Versions.Timespan(device=device, start=creation_time)
-                metadata.send(cal_db_interface, timeout=3000000)
+                metadata.send(cal_db_interface, timeout=cal_db_timeout)


                metadata = ConstantMetaData()
                noise = Constants.DSSC.Noise()
                noise.data = noise_g[qm]
                metadata.calibration_constant = noise

                # set the operating condition
                condition = Conditions.Dark.DSSC(memory_cells=max_cells, bias_voltage=bias_voltage,
                                                 pulseid_checksum=pidsum)
                metadata.detector_condition = condition

                # specify the a version for this constant
                if creation_time is None:
                    metadata.calibration_constant_version = Versions.Now(device=device)
                else:
                    metadata.calibration_constant_version = Versions.Timespan(device=device, start=creation_time)
-                metadata.send(cal_db_interface, timeout=3000000)
+                metadata.send(cal_db_interface, timeout=cal_db_timeout)

                continue  # no bad pixels yet
                metadata = ConstantMetaData()
                badpixels = Constants.DSSC.BadPixelsDark()
                badpixels.data = badpix_g[qm]
                metadata.calibration_constant = badpixels

                # set the operating condition
                condition = Conditions.Dark.DSSC(memory_cells=max_cells, bias_voltage=bias_voltage,
                                                 pulseid_checksum=pidsum)
                metadata.detector_condition = condition

                # specify the a version for this constant
                if creation_time is None:
                    metadata.calibration_constant_version = Versions.Now(device=device)
                else:
                    metadata.calibration_constant_version = Versions.Timespan(device=device, start=creation_time)
-                metadata.send(cal_db_interface, timeout=3000000)
+                metadata.send(cal_db_interface, timeout=cal_db_timeout)
            except Exception as e:
                print(e)
 ```

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:code id: tags:

 ``` python
 for r in res.values():
    del r["BadPixels"]
 ```

 %% Cell type:code id: tags:

 ``` python
 cell = 9
 gain = 0
 out_folder = None
-show_overview(res, cell, gain, out_folder=out_folder, infix="_".join(offset_runs.values()))
+show_overview(res, cell, gain, out_folder=out_folder, infix="_{}".format(run))
 ```

 %% Cell type:markdown id: tags:

 ## Global Bad Pixel Behaviour ##

 The following plots show the results of bad pixel evaluation for all evaluated memory cells. Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2. This excludes single bad pixels present only in disconnected pixels. Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated. Colors encode the bad pixel type, or mixed type.

 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.NOISE_OUT_OF_THRESHOLD.value: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR.value: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value | BadPixels.NOISE_OUT_OF_THRESHOLD.value: ('MIXED', '#DD00DD80')}

 rebin = 8 if not high_res_badpix_3d else 2

 gain = 0
 for mod, data in badpix_g.items():
    plot_badpix_3d(data, cols, title=mod, rebin_fac=rebin)
 ```

 %% Cell type:markdown id: tags:

 ## Aggregate values, and per Cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior.

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(offset_g, "Offset (ADU)", max_cells,
-                         out_folder=out_folder, infix="_".join(offset_runs.values()), entries=1)
+                         out_folder=out_folder, infix="_{}".format(run), entries=1)
 ```

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(noise_g, "Noise (ADU)", max_cells, 0, 100,
-                         out_folder=out_folder, infix="_".join(offset_runs.values()), entries=1)
+                         out_folder=out_folder, infix="_{}".format(run), entries=1)
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixel_aggregate_g = OrderedDict()
 for m, d in badpix_g.items():
    bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)
 create_constant_overview(bad_pixel_aggregate_g, "Bad pixel fraction", max_cells, entries=1,
-                         out_folder=out_folder, infix="_".join(offset_runs.values()))
+                         out_folder=out_folder, infix="_{}".format(run))
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:markdown id: tags:

 # Characterize Dark Images #

 Author: S. Hauf, Version: 0.1

 The following code analyzes a set of dark images taken with the DSSC detector to deduce detector offsets and noise. Data for the detector is presented in one run and don't acquire multiple gain stages.

 The notebook explicitely does what pyDetLib provides in its offset calculation method for streaming data.

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = "noDB" # The ipcluster profile to use
 in_folder = "/gpfs/exfel/exp/SCS/201931/p900095/raw" # path to input data, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/DSSC" # path to output to, required
 sequences = [0] # sequence files to evaluate.
-
+modules = [-1]  # modules to run for
 run = 1497 # run number in which data was recorded, required

-mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
+karabo_id = "SCS_DET_DSSC1M-1" # karabo karabo_id
+karabo_da = [-1]  # data aggregators
+receiver_id = "{}CH0" # inset for receiver devices
+path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
+h5path = '/INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
+h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
+
+dont_use_dir_date = True # don't use the dir creation date for determining the creation time
+cal_db_interface = "tcp://max-exfl016:8020" # the database interface to use
+cal_db_timeout = 3000000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database
+
+mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300 # detector bias voltage
-cal_db_interface = "tcp://max-exfl016:8020" # the database interface to use
 rawversion = 2 # RAW file format version
-dont_use_dir_date = True # don't use the dir creation date for determining the creation time

 thresholds_offset_sigma = 3. # thresholds in terms of n sigma noise for offset deduced bad pixels
 thresholds_offset_hard = [4000, 8500] # thresholds in absolute ADU terms for offset deduced bad pixels

 thresholds_noise_sigma = 5. # thresholds in terms of n sigma noise for offset deduced bad pixels
 thresholds_noise_hard = [4, 20] # thresholds in absolute ADU terms for offset deduced bad pixels

 instrument = "SCS" # the instrument
 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h
-modules = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]  # modules to run for
 ```

 %% Cell type:code id: tags:

 ``` python
 # imports and things that do not usually need to be changed
 from datetime import datetime
 import warnings
 warnings.filterwarnings('ignore')
 from collections import OrderedDict
 import os
 import h5py
 import numpy as np
 import matplotlib
 matplotlib.use('agg')
 import matplotlib.pyplot as plt
 %matplotlib inline

-from cal_tools.tools import (gain_map_files, parse_runs, run_prop_seq_from_path,
+from cal_tools.tools import (map_gain_stages, parse_runs, run_prop_seq_from_path,
                             get_notebook_name, get_dir_creation_date,
                             get_random_db_interface)
 from cal_tools.influx import InfluxLogger
 from cal_tools.enums import BadPixels
 from cal_tools.plotting import show_overview, plot_badpix_3d, create_constant_overview

 # make sure a cluster is running with ipcluster start --n=32, give it a while to start
 from ipyparallel import Client

 view = Client(profile=cluster_profile)[:]
 view.use_dill()

 from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions


-# no need to change this
+h5path = h5path.format(karabo_id, receiver_id)
+h5path_idx = h5path_idx.format(karabo_id, receiver_id)

-QUADRANTS = 4
-MODULES_PER_QUAD = 4
-DET_FILE_INSET = "DSSC"
+
+if karabo_da[0] == -1:
+    if modules[0] == -1:
+        modules = list(range(16))
+    karabo_da = ["DSSC{:02d}".format(i) for i in modules]
+else:
+    modules = [int(x[-2:]) for x in karabo_da]

 max_cells = mem_cells

 offset_runs = OrderedDict()
-offset_runs["high"] = parse_runs(run)[0]
+offset_runs["high"] = run

 creation_time=None
 if not dont_use_dir_date:
    creation_time = get_dir_creation_date(in_folder, run)


 run, prop, seq = run_prop_seq_from_path(in_folder)
 logger = InfluxLogger(detector="DSSC", instrument=instrument, mem_cells=mem_cells,
                      notebook=get_notebook_name(), proposal=prop)

-print("Using {} as creation time of constant.".format(creation_time))
+print(f"Using {creation_time} as creation time of constant.")

-loc = None
-if instrument == "SCS":
-    loc = "SCS_DET_DSSC1M-1"
-    dinstance = "DSSC1M1"
+dinstance = "DSSC1M1"

-print("Detector in use is {}".format(loc))
+print(f"Detector in use is {karabo_id}")

 cal_db_interface = get_random_db_interface(cal_db_interface)
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Parameters are:")
-print("Proposal: {}".format(prop))
-print("Memory cells: {}/{}".format(mem_cells, max_cells))
+print(f"Proposal: {prop}")
+print(f"Memory cells: {mem_cells}/{max_cells}")
 print("Runs: {}".format([ v for v in offset_runs.values()]))
-print("Sequences: {}".format(sequences))
-print("Using DB: {}".format(db_output))
-print("Input: {}".format(in_folder))
-print("Output: {}".format(out_folder))
-print("Bias voltage: {}V".format(bias_voltage))
+print(f"Sequences: {sequences}")
+print(f"Using DB: {db_output}")
+print(f"Input: {in_folder}")
+print(f"Output: {out_folder}")
+print(f"Bias voltage: {bias_voltage}V")
 ```

 %% Cell type:markdown id: tags:

 The following lines will create a queue of files which will the be executed module-parallel. Distinguishing between different gains.

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
-if not os.path.exists(out_folder):
-    os.makedirs(out_folder)
-
-gmf = gain_map_files(in_folder, offset_runs, sequences, DET_FILE_INSET, QUADRANTS, MODULES_PER_QUAD)
+os.makedirs(out_folder, exist_ok=True)
+gmf = map_gain_stages(in_folder, offset_runs, path_template, karabo_da, sequences)
 gain_mapped_files, total_sequences, total_file_size = gmf
-print("Will process at total of {} sequences: {:0.2f} GB of data.".format(total_sequences, total_file_size))
+print(f"Will process a total of {total_sequences} sequences.")
 ```

 %% Cell type:markdown id: tags:

 ## Calculate Offsets, Noise and Thresholds ##

 The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array.

 %% Cell type:code id: tags:

 ``` python
 import copy
 from functools import partial
-def characterize_module(cells, bp_thresh, rawversion, loc, inp):
+def characterize_module(cells, bp_thresh, rawversion, karabo_id, h5path, h5path_idx, inp):
    import numpy as np
    import copy
    import h5py
    from cal_tools.enums import BadPixels

    from hashlib import blake2b
    import struct
    import binascii

-    def get_num_cells(fname, loc, module):
+    def get_num_cells(fname, h5path):
        with h5py.File(fname, "r") as f:

-            cells = f["INSTRUMENT/{}/DET/{}CH0:xtdf/image/cellId".format(loc, module)][()]
+            cells = f[f"{h5path}/cellId"][()]
            maxcell = np.max(cells)
            options = [100, 200, 400, 500, 600, 700, 800]
            dists = np.array([(o-maxcell) for o in options])
            dists[dists<0] = 10000 # assure to always go higher
            return options[np.argmin(dists)]

    filename, filename_out, channel = inp

+    h5path = h5path.format(channel)
+    h5path_idx = h5path_idx.format(channel)

    if cells == 0:
-        cells = get_num_cells(filename, loc, channel)
+        cells = get_num_cells(filename, h5path)

    pulseid_checksum = None

    thresholds_offset_hard, thresholds_offset_sigma, thresholds_noise_hard, thresholds_noise_sigma = bp_thresh

    infile = h5py.File(filename, "r", driver="core")
    if rawversion == 2:
-        count = np.squeeze(infile["/INDEX/{}/DET/{}CH0:xtdf/image/count".format(loc, channel)])
-        first = np.squeeze(infile["/INDEX/{}/DET/{}CH0:xtdf/image/first".format(loc, channel)])
+        count = np.squeeze(infile[f"{h5path_idx}/count"])
+        first = np.squeeze(infile[f"{h5path_idx}/first"])
        last_index = int(first[count != 0][-1]+count[count != 0][-1])
        first_index = int(first[count != 0][0])
-        pulseids = infile["INSTRUMENT/{}/DET/{}CH0:xtdf/image/pulseId".format(loc, channel)][first_index:int(first[count != 0][1])]
+        pulseids = infile[f"{h5path}/pulseId"][first_index:int(first[count != 0][1])]
        bveto = blake2b(pulseids.data, digest_size=8)
        pulseid_checksum = struct.unpack('d', binascii.unhexlify(bveto.hexdigest()))[0]
    else:
-        status = np.squeeze(infile["/INDEX/{}/DET/{}CH0:xtdf/image/status".format(loc, channel)])
+        status = np.squeeze(infile[f"{h5path_idx}/status"])
        if np.count_nonzero(status != 0) == 0:
            return
-        last = np.squeeze(infile["/INDEX/{}/DET/{}CH0:xtdf/image/last".format(loc, channel)])
-        first = np.squeeze(infile["/INDEX/{}/DET/{}CH0:xtdf/image/first".format(loc, channel)])
+        last = np.squeeze(infile[f"{h5path_idx}/last"])
+        first = np.squeeze(infile[f"{h5path_idx}/first"])
        last_index = int(last[status != 0][-1]) + 1
        first_index = int(first[status != 0][0])
-    im = np.array(infile["/INSTRUMENT/{}/DET/{}CH0:xtdf/image/data".format(loc, channel)][first_index:last_index,...])
-    cellIds = np.squeeze(infile["/INSTRUMENT/{}/DET/{}CH0:xtdf/image/cellId".format(loc, channel)][first_index:last_index,...])
+    im = np.array(infile[f"{h5path}/data"][first_index:last_index,...])
+    cellIds = np.squeeze(infile[f"{h5path}/cellId"][first_index:last_index,...])

    infile.close()


    im = im[:, 0, ...].astype(np.float32)

    im = np.rollaxis(im, 2)
    im = np.rollaxis(im, 2, 1)

    mcells = cells
    offset = np.zeros((im.shape[0], im.shape[1], mcells))
    noise = np.zeros((im.shape[0], im.shape[1], mcells))

    for cc in np.unique(cellIds[cellIds < mcells]):
        cellidx = cellIds == cc
        offset[...,cc] = np.median(im[..., cellidx], axis=2)
        noise[...,cc] = np.std(im[..., cellidx], axis=2)


    # bad pixels
    bp = np.zeros(offset.shape, np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0,1))
    offset_std = np.nanstd(offset, axis=(0,1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[(offset < thresholds_offset_hard[0]) | (offset > thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0,1))
    noise_std = np.nanstd(noise, axis=(0,1))

    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[(noise < thresholds_noise_hard[0]) | (noise > thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value


    return offset, noise, bp, cells, pulseid_checksum


 offset_g = OrderedDict()
 noise_g = OrderedDict()
 gain_g = OrderedDict()
 badpix_g = OrderedDict()
 gg = 0

 start = datetime.now()
 all_cells = []
 checksums = {}

 for gain, mapped_files in gain_mapped_files.items():

    inp = []
    dones = []
    for i in modules:
        qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
        if qm in mapped_files and not mapped_files[qm].empty():
            fname_in = mapped_files[qm].get()
            dones.append(mapped_files[qm].empty())
        else:
            continue
        fout = os.path.abspath("{}/{}".format(out_folder, (os.path.split(fname_in)[-1]).replace("RAW", "CORR")))
        inp.append((fname_in, fout, i))
    first = False
    p = partial(characterize_module, max_cells,
               (thresholds_offset_hard, thresholds_offset_sigma,
-                thresholds_noise_hard, thresholds_noise_sigma), rawversion, loc)
+                thresholds_noise_hard, thresholds_noise_sigma), rawversion, karabo_id, h5path, h5path_idx)
    results = list(map(p, inp))

    for ii, r in enumerate(results):
        i = modules[ii]
        offset, noise,  bp, thiscell, pulseid_checksum = r
        all_cells.append(thiscell)
        qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
        if qm not in offset_g:
            offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2]))
            noise_g[qm] = np.zeros_like(offset_g[qm])

            badpix_g[qm] = np.zeros_like(offset_g[qm], np.uint32)
            checksums[qm] = pulseid_checksum

        offset_g[qm][...] = offset
        noise_g[qm][...] = noise
        badpix_g[qm][...] = bp
    gg +=1

 duration = (datetime.now()-start).total_seconds()
 logger.runtime_summary_entry(success=True, runtime=duration,
                                   total_sequences=total_sequences,
                                   filesize=total_file_size)
 logger.send()
 max_cells = np.max(all_cells)
-print("Using {} memory cells".format(max_cells))
+print(f"Using {max_cells} memory cells")
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()
 for i in modules:
    qm = "Q{}M{}".format(i//4+1, i%4+1)
    res[qm] = {'Offset': offset_g[qm],
               'Noise': noise_g[qm],
               'BadPixels': badpix_g[qm]
               }

 if local_output:
    for qm in offset_g.keys():
-        ofile = "{}/dssc_offset_store_{}_{}.h5".format(out_folder, "_".join(offset_runs.values()), qm)
+        ofile = "{}/dssc_offset_store_{}_{}.h5".format(out_folder, run, qm)
        store_file = h5py.File(ofile, "w")
        store_file["{}/Offset/0/data".format(qm)] = offset_g[qm]
        store_file["{}/Noise/0/data".format(qm)] = noise_g[qm]
        store_file["{}/BadPixels/0/data".format(qm)] = badpix_g[qm]
        store_file.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 if db_output:
    for dont_use_pulseIds in [True, False]:
        for qm in offset_g.keys():
            try:
                metadata = ConstantMetaData()
                offset = Constants.DSSC.Offset()
                offset.data = offset_g[qm]
                metadata.calibration_constant = offset
                pidsum = None if dont_use_pulseIds else checksums[qm]
                # set the operating condition
                condition = Conditions.Dark.DSSC(memory_cells=max_cells, bias_voltage=bias_voltage,
                                                 pulseid_checksum=pidsum)
                detinst = getattr(Detectors, dinstance)

                device = getattr(detinst, qm)

                metadata.detector_condition = condition

                # specify the a version for this constant
                if creation_time is None:
                    metadata.calibration_constant_version = Versions.Now(device=device)
                else:
                    metadata.calibration_constant_version = Versions.Timespan(device=device, start=creation_time)
-                metadata.send(cal_db_interface, timeout=3000000)
+                metadata.send(cal_db_interface, timeout=cal_db_timeout)


                metadata = ConstantMetaData()
                noise = Constants.DSSC.Noise()
                noise.data = noise_g[qm]
                metadata.calibration_constant = noise

                # set the operating condition
                condition = Conditions.Dark.DSSC(memory_cells=max_cells, bias_voltage=bias_voltage,
                                                 pulseid_checksum=pidsum)
                metadata.detector_condition = condition

                # specify the a version for this constant
                if creation_time is None:
                    metadata.calibration_constant_version = Versions.Now(device=device)
                else:
                    metadata.calibration_constant_version = Versions.Timespan(device=device, start=creation_time)
-                metadata.send(cal_db_interface, timeout=3000000)
+                metadata.send(cal_db_interface, timeout=cal_db_timeout)

                continue  # no bad pixels yet
                metadata = ConstantMetaData()
                badpixels = Constants.DSSC.BadPixelsDark()
                badpixels.data = badpix_g[qm]
                metadata.calibration_constant = badpixels

                # set the operating condition
                condition = Conditions.Dark.DSSC(memory_cells=max_cells, bias_voltage=bias_voltage,
                                                 pulseid_checksum=pidsum)
                metadata.detector_condition = condition

                # specify the a version for this constant
                if creation_time is None:
                    metadata.calibration_constant_version = Versions.Now(device=device)
                else:
                    metadata.calibration_constant_version = Versions.Timespan(device=device, start=creation_time)
-                metadata.send(cal_db_interface, timeout=3000000)
+                metadata.send(cal_db_interface, timeout=cal_db_timeout)
            except Exception as e:
                print(e)
 ```

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:code id: tags:

 ``` python
 for r in res.values():
    del r["BadPixels"]
 ```

 %% Cell type:code id: tags:

 ``` python
 cell = 9
 gain = 0
 out_folder = None
-show_overview(res, cell, gain, out_folder=out_folder, infix="_".join(offset_runs.values()))
+show_overview(res, cell, gain, out_folder=out_folder, infix="_{}".format(run))
 ```

 %% Cell type:markdown id: tags:

 ## Global Bad Pixel Behaviour ##

 The following plots show the results of bad pixel evaluation for all evaluated memory cells. Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2. This excludes single bad pixels present only in disconnected pixels. Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated. Colors encode the bad pixel type, or mixed type.

 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.NOISE_OUT_OF_THRESHOLD.value: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR.value: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value | BadPixels.NOISE_OUT_OF_THRESHOLD.value: ('MIXED', '#DD00DD80')}

 rebin = 8 if not high_res_badpix_3d else 2

 gain = 0
 for mod, data in badpix_g.items():
    plot_badpix_3d(data, cols, title=mod, rebin_fac=rebin)
 ```

 %% Cell type:markdown id: tags:

 ## Aggregate values, and per Cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior.

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(offset_g, "Offset (ADU)", max_cells,
-                         out_folder=out_folder, infix="_".join(offset_runs.values()), entries=1)
+                         out_folder=out_folder, infix="_{}".format(run), entries=1)
 ```

 %% Cell type:code id: tags:

 ``` python
 create_constant_overview(noise_g, "Noise (ADU)", max_cells, 0, 100,
-                         out_folder=out_folder, infix="_".join(offset_runs.values()), entries=1)
+                         out_folder=out_folder, infix="_{}".format(run), entries=1)
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixel_aggregate_g = OrderedDict()
 for m, d in badpix_g.items():
    bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)
 create_constant_overview(bad_pixel_aggregate_g, "Bad pixel fraction", max_cells, entries=1,
-                         out_folder=out_folder, infix="_".join(offset_runs.values()))
+                         out_folder=out_folder, infix="_{}".format(run))
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

--- a/notebooks/DSSC/DSSC_Correct_and_Verify.ipynb
+++ b/notebooks/DSSC/DSSC_Correct_and_Verify.ipynb
--- a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Jungfrau Offline Correction #

 Author: European XFEL Detector Group, Version: 0.1

 Offline Calibration for the Jungfrau Detector

 %% Cell type:code id: tags:

 ``` python
-in_folder = "/gpfs/exfel/exp/SPB/201922/p002566/raw" # the folder to read data from, required
-run = 70 # runs to process, required
+cluster_profile = "noDB" # cluster profile to use
+in_folder = "/gpfs/exfel/exp/CALLAB/202031/p900113/raw" # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/karnem/test/005"  # the folder to output to, required
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
+run = 9999 # runs to process, required
+
+karabo_id = "FXE_XAD_JF1M" # karabo prefix of Jungfrau devices
+karabo_da = 'JNGFR01' # data aggregators
+receiver_id = "RECEIVER-{}" # inset for receiver devices
+receiver_control_id = "CONTROL" # inset for control devices
+path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5'  # template to use for file name, double escape sequence number
+h5path = '/INSTRUMENT/{}/DET/{}:daqOutput/data'  # path in H5 file under which images are located
+h5path_run = '/RUN/{}/DET/{}' # path to run data
+h5path_cntrl = '/CONTROL/{}/DET/{}' # path to control data
+karabo_id_control = ""  # if control is on a different ID, set to empty string if it is the same a karabo-id
+karabo_da_control = "JNGFR01" # file inset for control data
+
+use_dir_creation_date = True # use the creation data of the input dir for database queries
+cal_db_interface = "tcp://max-exfl017:8016" #"tcp://max-exfl016:8015#8025" # the database interface to use
+cal_db_timeout = 180000 # timeout on caldb requests",
+
 overwrite = True # set to True if existing data should be overwritten
 no_relative_gain = False # do not do relative gain correction
-cluster_profile = "noDB" # cluster profile to use
 bias_voltage = 180 # will be overwritten by value in file
-cal_db_interface = "tcp://max-exfl016:8016" #"tcp://max-exfl016:8015#8025" # the database interface to use
-cal_db_timeout = 180000 # timeout on caldb requests",
-use_dir_creation_date = True # use the creation data of the input dir for database queries
 sequences_per_node = 5 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
 photon_energy = 9.2 # photon energy in keV
 chunk_size_idim = 1  # chunking size of imaging dimension, adjust if user software is sensitive to this.
-path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5'  # template to use for file name, double escape sequence number
-path_template_control = 'RAW-R{:04d}-{}-S{{:05d}}.h5'  # template to use for file name, double escape sequence number
 integration_time = 4.96 # integration time in us, will be overwritten by value in file
-h5path = '/INSTRUMENT/{}/DET/{}:daqOutput/data'  # path in H5 file under which images are located
 gmapfile = "" # variable is not used but left here for back compatibility
-karabo_id = "SPB_IRDA_JNGFR" # karabo prefix of Jungfrau devices
-karabo_id_control = ""  # if control is on a different ID, set to empty string for using the same as for data
-receiver_id = "MODULE_3" # inset for receiver devices
-control_id = "CONTROL" # inset for control devices
-db_module = "Jungfrau_M273" # ID of module in calibration database
-path_inset = "JNGFR03" # file inset for image data
-path_inset_control = "JNGFR01" # file inset for control data
+db_module = ["Jungfrau_M233"] # ID of module in calibration database
 manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values
 chunk_size = 0

-def balance_sequences(in_folder, run, sequences, sequences_per_node, path_inset):
+def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
-    return bs(in_folder, run, sequences, sequences_per_node, path_inset)
+    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import warnings
 warnings.filterwarnings('ignore')

 import matplotlib
 matplotlib.use('agg')
 import matplotlib.pyplot as plt
 %matplotlib inline
 import numpy as np

 import XFELDetAna.xfelpyanatools as xana
 import XFELDetAna.xfelpycaltools as xcal

 from cal_tools.enums import BadPixels
 from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions

 from cal_tools.tools import (get_dir_creation_date, get_constant_from_db_and_time)
 from matplotlib.colors import LogNorm

 import os
 import h5py
 from h5py import File as h5file
 import copy
 from IPython.display import HTML, display, Markdown, Latex
 import tabulate

 from XFELDetAna.util import env
 env.iprofile = cluster_profile

 from functools import partial
 from ipyparallel import Client
 ```

 %% Cell type:code id: tags:

 ``` python
 client = Client(profile=cluster_profile)
 view = client[:]
 view.use_dill()

+receiver_id = receiver_id.format(int(karabo_da[-2:]))
 h5path = h5path.format(karabo_id, receiver_id)
 ped_dir = "{}/r{:04d}".format(in_folder, run)

+# TODO
+# this trick is needed until proper mapping is introduced
+if len(db_module)>1:
+    db_module = db_module[int(karabo_da[-2:])-1]
+else:
+    db_module = db_module[0]
+
 if ped_dir[-1] == "/":
    ped_dir = ped_dir[:-1]

 if not os.path.exists(out_folder):
    os.makedirs(out_folder)
 elif not overwrite:
    raise AttributeError("Output path exists! Exiting")


-fp_name = path_template.format(run, path_inset)
+fp_name = path_template.format(run, karabo_da)
 fp_path = '{}/{}'.format(ped_dir, fp_name)

-fp_name_contr = path_template_control.format(run, path_inset_control)
+fp_name_contr = path_template.format(run, karabo_da_control)
 fp_path_contr = '{}/{}'.format(ped_dir, fp_name_contr)


 if sequences[0] == -1:
    sequences = None

 print("Reading data from {}".format(fp_path))
 print("Run is: {}".format(run))
 print("HDF5 path: {}".format(h5path))

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print("Using {} as creation time".format(creation_time))

 if karabo_id_control == "":
    karabo_id_control = karabo_id

 ```

 %% Cell type:code id: tags:

 ``` python
 def check_memoryCells(file_name, path):
    with h5file(file_name, 'r') as f:
        t_stamp = np.array(f[path + '/storageCells/timestamp'])
        st_cells = np.array(f[path + '/storageCells/value'])
        sc_start = np.array(f[path + '/storageCellStart/value'])

    valid_train = t_stamp > 0
    n_scs = st_cells[valid_train][0] + 1
    sc_s = sc_start[valid_train][0]

    return n_scs, sc_s
 ```

 %% Cell type:code id: tags:

 ``` python
 if not manual_slow_data:
    with h5py.File(fp_path_contr.format(0), 'r') as f:
-        integration_time = float(f['/RUN/{}/DET/{}/exposureTime/value'.format(karabo_id_control, control_id)][()]*1e6)
-        bias_voltage = int(np.squeeze(f['/RUN/{}/DET/{}/vHighVoltage/value'.format(karabo_id_control, control_id)])[0])
+        run_path = h5path_run.format(karabo_id_control, receiver_control_id)
+        integration_time = float(f[f'{run_path}/exposureTime/value'][()]*1e6)
+        bias_voltage = int(np.squeeze(f[f'{run_path}/vHighVoltage/value'])[0])


-control_path = '/CONTROL/{}/DET/{}'.format(karabo_id_control, control_id)
+control_path = h5path_cntrl.format(karabo_id_control, receiver_control_id)
 this_run_mcells, sc_start = check_memoryCells(fp_path_contr.format(0), control_path)
 if this_run_mcells == 1:
    memoryCells = 1
    print('Dark runs in single cell mode\n storage cell start: {:02d}'.format(sc_start))
 else:
    memoryCells = 16
    print('Dark runs in burst mode\n storage cell start: {:02d}'.format(sc_start))

 print("Integration time is {} us".format(integration_time))
 print("Bias voltage is {} V".format(bias_voltage))
 print("Number of memory cells is {}".format(memoryCells))
 ```

 %% Cell type:code id: tags:

 ``` python
 dirlist = sorted(os.listdir(ped_dir))
 file_list = []
 total_sequences = 0
 fsequences = []
 for entry in dirlist:
    #only h5 file
    abs_entry = "{}/{}".format(ped_dir, entry)
    if os.path.isfile(abs_entry) and os.path.splitext(abs_entry)[1] == ".h5":

        if sequences is None:
            for seq in range(len(dirlist)):

-                if path_template.format(run, path_inset).format(seq) in abs_entry:
+                if path_template.format(run, karabo_da).format(seq) in abs_entry:
                    file_list.append(abs_entry)
                    total_sequences += 1
                    fsequences.append(seq)
        else:
            for seq in sequences:
-                if path_template.format(run, path_inset).format(seq) in abs_entry:
+                if path_template.format(run, karabo_da).format(seq) in abs_entry:
                    file_list.append(os.path.abspath(abs_entry))
                    total_sequences += 1
                    fsequences.append(seq)
 sequences = fsequences
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Processing a total of {} sequence files".format(total_sequences))
 table = []


 for k, f in enumerate(file_list):
    table.append((k, f))
 if len(table) > 0:
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["#", "file"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 condition = Conditions.Dark.jungfrau(memory_cells=memoryCells,
                                     bias_voltage=bias_voltage,
                                     integration_time=integration_time)

 ## offset
 offset_map, _ = \
            get_constant_from_db_and_time(getattr(Detectors, db_module),
                                          Constants.jungfrau.Offset(),
                                          condition,
                                          np.zeros((1024, 512, 1, 3)),
                                          cal_db_interface,
                                          creation_time=creation_time,
                                          timeout=cal_db_timeout)

 mask, _ = \
            get_constant_from_db_and_time(getattr(Detectors, db_module),
                                          Constants.jungfrau.BadPixelsDark(),
                                          condition,
                                          np.zeros((1024, 512, 1, 3)),
                                          cal_db_interface,
                                          creation_time=creation_time,
                                          timeout=cal_db_timeout)

 gain_map, _ = \
            get_constant_from_db_and_time(getattr(Detectors, db_module),
                                          Constants.jungfrau.RelativeGain(),
                                          condition,
                                          None,
                                          cal_db_interface,
                                          creation_time=creation_time,
                                          timeout=cal_db_timeout)

 # move from x,y,cell,gain to cell,x,y,gain
 offset_map = np.squeeze(offset_map)
 mask = np.squeeze(mask)
 if memoryCells>1:
    offset_map = np.moveaxis(np.moveaxis(offset_map, 0, 2), 0, 2)
    mask = np.moveaxis(np.moveaxis(mask, 0, 2), 0, 2)

 if gain_map is None:
    print("No gain map found")
    no_relative_gain = True
 else:
    if memoryCells>1:
        gain_map = np.moveaxis(np.moveaxis(gain_map, 0, 2), 0, 1)
    else:
        gain_map = np.squeeze(gain_map)
        gain_map = np.moveaxis(gain_map, 1, 0)
 ```

 %% Cell type:code id: tags:

 ``` python
 def copy_and_sanitize_non_cal_data(infile, outfile, h5base):
    """ Copy and sanitize data in `infile` that is not touched by `correctLPD`
    """

    if h5base.startswith("/"):
        h5base = h5base[1:]
    dont_copy = ["adc",]
    dont_copy = [h5base+"/{}".format(do)
                for do in dont_copy]

    def visitor(k, item):
        if k not in dont_copy:
            if isinstance(item, h5py.Group):
                outfile.create_group(k)
            elif isinstance(item, h5py.Dataset):
                group = str(k).split("/")
                group = "/".join(group[:-1])
                infile.copy(k, outfile[group])

    infile.visititems(visitor)

 ```

 %% Cell type:code id: tags:

 ``` python
 # Loop over sequences
 for k, f in enumerate(file_list):
    with h5py.File(f, 'r') as infile:
        out_file = "{}/{}".format(out_folder, f.split("/")[-1])
        out_file = out_file.replace("RAW", "CORR")
        print('Process file: ', f)
        with h5py.File(out_file, "w") as ofile:
            copy_and_sanitize_non_cal_data(infile, ofile, h5path)

            oshape = infile[h5path+"/adc"].shape
            print('Data shape: ', oshape)
            ddset = ofile.create_dataset(h5path+"/adc",
                                         oshape,
                                         chunks=(chunk_size_idim, 1, oshape[1], oshape[2]),
                                         dtype=np.float32)

            mskset = ofile.create_dataset(h5path+"/mask",
                                          oshape,
                                          chunks=(chunk_size_idim, 1, oshape[1], oshape[2]),
                                          dtype=np.uint32,
                                          compression="gzip", compression_opts=1, shuffle=True)


            # Correct a chunk of images for offset and gain
            def correct_chunk(offset_map, mask, gain_map, memoryCells, no_relative_gain, inp):
                import numpy as np
                import copy
                import h5py

                fim_data = None
                gim_data = None
                rim_data = None
                msk_data = None
                err = ''

                try:
                    d, g, m, ind, copy_sample = inp
                    g[g==3] = 2

                    if copy_sample and ind==0:
                        if memoryCells==1:
                            rim_data = np.squeeze(copy.copy(d))
                        else:
                            rim_data = np.squeeze(copy.copy(d[:,0,...]))

                    # Select memory cells
                    if memoryCells>1:
                        m[m>16] = 0
                        offset_map_cell = offset_map[m,...]
                        mask_cell = mask[m,...]
                    else:
                        offset_map_cell = offset_map
                        mask_cell = mask

                    # Offset correction
                    offset = np.choose(g, (offset_map_cell[...,0], offset_map_cell[...,1], offset_map_cell[...,2]))
                    d -= offset

                    # Gain correction
                    if not no_relative_gain:
                        if memoryCells>1:
                            gain_map_cell = gain_map[m,...]
                        else:
                            gain_map_cell = gain_map
                        cal = np.choose(g, (gain_map_cell[..., 0], gain_map_cell[..., 1], gain_map_cell[..., 2]))
                        d /= cal

                    msk = np.choose(g, (mask_cell[...,0], mask_cell[...,1], mask_cell[...,2]))

                    # Store sample of data for plotting
                    if copy_sample and ind==0:
                        if memoryCells==1:
                            fim_data = np.squeeze(copy.copy(d))
                            gim_data = np.squeeze(copy.copy(g))
                            msk_data = np.squeeze(copy.copy(msk))
                        else:
                            fim_data = np.squeeze(copy.copy(d[:,1,...]))
                            gim_data = np.squeeze(copy.copy(g[:,1,...]))
                            msk_data = np.squeeze(copy.copy(msk[:,1,...]))

                    print('All done')

                except Exception as e:
                    print(e)
                    err = e

                return ind, d, msk, rim_data, fim_data, gim_data, msk_data, err

            # Run ip Cluster parallelization over chunks of images
            inp = []
            max_ind = oshape[0]
            ind = 0

            # If chunk size is not given maximum 12+1 chunks is expected
            if chunk_size == 0:
                chunk_size = max_ind//12
                print('Chunk size: {}'.format(chunk_size))

            while ind<max_ind:
                d = infile[h5path+"/adc"][ind:ind+chunk_size,...].astype(np.float32)
                g = infile[h5path+"/gain"][ind:ind+chunk_size,...]
                m = infile[h5path+"/memoryCell"][ind:ind+chunk_size,...]
                print('To process: ', d.shape)
                inp.append((d,g,m, ind, k==0))
                ind += chunk_size

            print('Run {} processes'.format(len(inp)) )
            p = partial(correct_chunk, offset_map, mask, gain_map, memoryCells, no_relative_gain)

            r = view.map_sync(p, inp)
            #r = list(map(p, inp))
            if k==0:
                _,_,_, rim_data, fim_data, gim_data, msk_data, _ = r[0]

            for rr in r:
                ind, cdata, cmask, _,_,_,_, err = rr
                data_size = cdata.shape[0]
                ddset[ind:ind+data_size,...] = cdata
                mskset[ind:ind+data_size,...] = cmask
                if err != '':
                    print('Error: '.format(err))
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis):
    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1,:], extent=extent, aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(data)))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")

 ```

 %% Cell type:code id: tags:

 ``` python
 h, ex, ey = np.histogram2d(rim_data.flatten(), gim_data.flatten(),
                                                           bins=[100, 4], range=[[0, 10000], [0,4]])
 do_2d_plot(h, (ex, ey), "Signal (ADU)", "Gain Bit Value")
 ```

 %% Cell type:markdown id: tags:

 ### Mean RAW Preview ###

 The per pixel mean of the sequence file of RAW data

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.mean(rim_data,axis=0),
               vmin=min(0.75*np.median(rim_data[rim_data > 0]), 2000),
               vmax=max(1.5*np.median(rim_data[rim_data > 0]), 16000), cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Mean CORRECTED Preview ###

 The per pixel mean of the sequence file of CORR data

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.mean(fim_data,axis=0),
               vmin=min(0.75*np.median(fim_data[fim_data > 0]), -0.5),
               vmax=max(2.*np.median(fim_data[fim_data > 0]), 100), cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Single Train Preview ###

 A single image from the first train

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(fim_data[0,...],
               vmin=min(0.75*np.median(fim_data[0,...]), -0.5),
               vmax=max(2.*np.median(fim_data[0,...]), 100), cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ## Signal Distribution ##

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(211)
 h = ax.hist(fim_data.flatten(), bins=1000, range=(-100, 1000), log=True)
 l = ax.set_xlabel("Signal (keV)")
 l = ax.set_ylabel("Counts")

 ax = fig.add_subplot(212)
 h = ax.hist(fim_data.flatten(), bins=1000, range=(-1000, 10000), log=True)
 l = ax.set_xlabel("Signal (keV)")
 l = ax.set_ylabel("Counts")
 ```

 %% Cell type:markdown id: tags:

 ### Maximum GAIN Preview ###

 The per pixel maximum of the first train of the GAIN data

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.max(gim_data, axis=0), vmin=0,
               vmax=3, cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:markdown id: tags:

 ### Single Image Bad Pixels ###

 A single image bad pixel map for the first image of the first train

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.log2(msk_data[0,...]), vmin=0, vmax=32, cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:markdown id: tags:

 # Jungfrau Offline Correction #

 Author: European XFEL Detector Group, Version: 0.1

 Offline Calibration for the Jungfrau Detector

 %% Cell type:code id: tags:

 ``` python
-in_folder = "/gpfs/exfel/exp/SPB/201922/p002566/raw" # the folder to read data from, required
-run = 70 # runs to process, required
+cluster_profile = "noDB" # cluster profile to use
+in_folder = "/gpfs/exfel/exp/CALLAB/202031/p900113/raw" # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/karnem/test/005"  # the folder to output to, required
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
+run = 9999 # runs to process, required
+
+karabo_id = "FXE_XAD_JF1M" # karabo prefix of Jungfrau devices
+karabo_da = 'JNGFR01' # data aggregators
+receiver_id = "RECEIVER-{}" # inset for receiver devices
+receiver_control_id = "CONTROL" # inset for control devices
+path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5'  # template to use for file name, double escape sequence number
+h5path = '/INSTRUMENT/{}/DET/{}:daqOutput/data'  # path in H5 file under which images are located
+h5path_run = '/RUN/{}/DET/{}' # path to run data
+h5path_cntrl = '/CONTROL/{}/DET/{}' # path to control data
+karabo_id_control = ""  # if control is on a different ID, set to empty string if it is the same a karabo-id
+karabo_da_control = "JNGFR01" # file inset for control data
+
+use_dir_creation_date = True # use the creation data of the input dir for database queries
+cal_db_interface = "tcp://max-exfl017:8016" #"tcp://max-exfl016:8015#8025" # the database interface to use
+cal_db_timeout = 180000 # timeout on caldb requests",
+
 overwrite = True # set to True if existing data should be overwritten
 no_relative_gain = False # do not do relative gain correction
-cluster_profile = "noDB" # cluster profile to use
 bias_voltage = 180 # will be overwritten by value in file
-cal_db_interface = "tcp://max-exfl016:8016" #"tcp://max-exfl016:8015#8025" # the database interface to use
-cal_db_timeout = 180000 # timeout on caldb requests",
-use_dir_creation_date = True # use the creation data of the input dir for database queries
 sequences_per_node = 5 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
 photon_energy = 9.2 # photon energy in keV
 chunk_size_idim = 1  # chunking size of imaging dimension, adjust if user software is sensitive to this.
-path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5'  # template to use for file name, double escape sequence number
-path_template_control = 'RAW-R{:04d}-{}-S{{:05d}}.h5'  # template to use for file name, double escape sequence number
 integration_time = 4.96 # integration time in us, will be overwritten by value in file
-h5path = '/INSTRUMENT/{}/DET/{}:daqOutput/data'  # path in H5 file under which images are located
 gmapfile = "" # variable is not used but left here for back compatibility
-karabo_id = "SPB_IRDA_JNGFR" # karabo prefix of Jungfrau devices
-karabo_id_control = ""  # if control is on a different ID, set to empty string for using the same as for data
-receiver_id = "MODULE_3" # inset for receiver devices
-control_id = "CONTROL" # inset for control devices
-db_module = "Jungfrau_M273" # ID of module in calibration database
-path_inset = "JNGFR03" # file inset for image data
-path_inset_control = "JNGFR01" # file inset for control data
+db_module = ["Jungfrau_M233"] # ID of module in calibration database
 manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values
 chunk_size = 0

-def balance_sequences(in_folder, run, sequences, sequences_per_node, path_inset):
+def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
-    return bs(in_folder, run, sequences, sequences_per_node, path_inset)
+    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import warnings
 warnings.filterwarnings('ignore')

 import matplotlib
 matplotlib.use('agg')
 import matplotlib.pyplot as plt
 %matplotlib inline
 import numpy as np

 import XFELDetAna.xfelpyanatools as xana
 import XFELDetAna.xfelpycaltools as xcal

 from cal_tools.enums import BadPixels
 from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions

 from cal_tools.tools import (get_dir_creation_date, get_constant_from_db_and_time)
 from matplotlib.colors import LogNorm

 import os
 import h5py
 from h5py import File as h5file
 import copy
 from IPython.display import HTML, display, Markdown, Latex
 import tabulate

 from XFELDetAna.util import env
 env.iprofile = cluster_profile

 from functools import partial
 from ipyparallel import Client
 ```

 %% Cell type:code id: tags:

 ``` python
 client = Client(profile=cluster_profile)
 view = client[:]
 view.use_dill()

+receiver_id = receiver_id.format(int(karabo_da[-2:]))
 h5path = h5path.format(karabo_id, receiver_id)
 ped_dir = "{}/r{:04d}".format(in_folder, run)

+# TODO
+# this trick is needed until proper mapping is introduced
+if len(db_module)>1:
+    db_module = db_module[int(karabo_da[-2:])-1]
+else:
+    db_module = db_module[0]
+
 if ped_dir[-1] == "/":
    ped_dir = ped_dir[:-1]

 if not os.path.exists(out_folder):
    os.makedirs(out_folder)
 elif not overwrite:
    raise AttributeError("Output path exists! Exiting")


-fp_name = path_template.format(run, path_inset)
+fp_name = path_template.format(run, karabo_da)
 fp_path = '{}/{}'.format(ped_dir, fp_name)

-fp_name_contr = path_template_control.format(run, path_inset_control)
+fp_name_contr = path_template.format(run, karabo_da_control)
 fp_path_contr = '{}/{}'.format(ped_dir, fp_name_contr)


 if sequences[0] == -1:
    sequences = None

 print("Reading data from {}".format(fp_path))
 print("Run is: {}".format(run))
 print("HDF5 path: {}".format(h5path))

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print("Using {} as creation time".format(creation_time))

 if karabo_id_control == "":
    karabo_id_control = karabo_id

 ```

 %% Cell type:code id: tags:

 ``` python
 def check_memoryCells(file_name, path):
    with h5file(file_name, 'r') as f:
        t_stamp = np.array(f[path + '/storageCells/timestamp'])
        st_cells = np.array(f[path + '/storageCells/value'])
        sc_start = np.array(f[path + '/storageCellStart/value'])

    valid_train = t_stamp > 0
    n_scs = st_cells[valid_train][0] + 1
    sc_s = sc_start[valid_train][0]

    return n_scs, sc_s
 ```

 %% Cell type:code id: tags:

 ``` python
 if not manual_slow_data:
    with h5py.File(fp_path_contr.format(0), 'r') as f:
-        integration_time = float(f['/RUN/{}/DET/{}/exposureTime/value'.format(karabo_id_control, control_id)][()]*1e6)
-        bias_voltage = int(np.squeeze(f['/RUN/{}/DET/{}/vHighVoltage/value'.format(karabo_id_control, control_id)])[0])
+        run_path = h5path_run.format(karabo_id_control, receiver_control_id)
+        integration_time = float(f[f'{run_path}/exposureTime/value'][()]*1e6)
+        bias_voltage = int(np.squeeze(f[f'{run_path}/vHighVoltage/value'])[0])


-control_path = '/CONTROL/{}/DET/{}'.format(karabo_id_control, control_id)
+control_path = h5path_cntrl.format(karabo_id_control, receiver_control_id)
 this_run_mcells, sc_start = check_memoryCells(fp_path_contr.format(0), control_path)
 if this_run_mcells == 1:
    memoryCells = 1
    print('Dark runs in single cell mode\n storage cell start: {:02d}'.format(sc_start))
 else:
    memoryCells = 16
    print('Dark runs in burst mode\n storage cell start: {:02d}'.format(sc_start))

 print("Integration time is {} us".format(integration_time))
 print("Bias voltage is {} V".format(bias_voltage))
 print("Number of memory cells is {}".format(memoryCells))
 ```

 %% Cell type:code id: tags:

 ``` python
 dirlist = sorted(os.listdir(ped_dir))
 file_list = []
 total_sequences = 0
 fsequences = []
 for entry in dirlist:
    #only h5 file
    abs_entry = "{}/{}".format(ped_dir, entry)
    if os.path.isfile(abs_entry) and os.path.splitext(abs_entry)[1] == ".h5":

        if sequences is None:
            for seq in range(len(dirlist)):

-                if path_template.format(run, path_inset).format(seq) in abs_entry:
+                if path_template.format(run, karabo_da).format(seq) in abs_entry:
                    file_list.append(abs_entry)
                    total_sequences += 1
                    fsequences.append(seq)
        else:
            for seq in sequences:
-                if path_template.format(run, path_inset).format(seq) in abs_entry:
+                if path_template.format(run, karabo_da).format(seq) in abs_entry:
                    file_list.append(os.path.abspath(abs_entry))
                    total_sequences += 1
                    fsequences.append(seq)
 sequences = fsequences
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Processing a total of {} sequence files".format(total_sequences))
 table = []


 for k, f in enumerate(file_list):
    table.append((k, f))
 if len(table) > 0:
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["#", "file"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 condition = Conditions.Dark.jungfrau(memory_cells=memoryCells,
                                     bias_voltage=bias_voltage,
                                     integration_time=integration_time)

 ## offset
 offset_map, _ = \
            get_constant_from_db_and_time(getattr(Detectors, db_module),
                                          Constants.jungfrau.Offset(),
                                          condition,
                                          np.zeros((1024, 512, 1, 3)),
                                          cal_db_interface,
                                          creation_time=creation_time,
                                          timeout=cal_db_timeout)

 mask, _ = \
            get_constant_from_db_and_time(getattr(Detectors, db_module),
                                          Constants.jungfrau.BadPixelsDark(),
                                          condition,
                                          np.zeros((1024, 512, 1, 3)),
                                          cal_db_interface,
                                          creation_time=creation_time,
                                          timeout=cal_db_timeout)

 gain_map, _ = \
            get_constant_from_db_and_time(getattr(Detectors, db_module),
                                          Constants.jungfrau.RelativeGain(),
                                          condition,
                                          None,
                                          cal_db_interface,
                                          creation_time=creation_time,
                                          timeout=cal_db_timeout)

 # move from x,y,cell,gain to cell,x,y,gain
 offset_map = np.squeeze(offset_map)
 mask = np.squeeze(mask)
 if memoryCells>1:
    offset_map = np.moveaxis(np.moveaxis(offset_map, 0, 2), 0, 2)
    mask = np.moveaxis(np.moveaxis(mask, 0, 2), 0, 2)

 if gain_map is None:
    print("No gain map found")
    no_relative_gain = True
 else:
    if memoryCells>1:
        gain_map = np.moveaxis(np.moveaxis(gain_map, 0, 2), 0, 1)
    else:
        gain_map = np.squeeze(gain_map)
        gain_map = np.moveaxis(gain_map, 1, 0)
 ```

 %% Cell type:code id: tags:

 ``` python
 def copy_and_sanitize_non_cal_data(infile, outfile, h5base):
    """ Copy and sanitize data in `infile` that is not touched by `correctLPD`
    """

    if h5base.startswith("/"):
        h5base = h5base[1:]
    dont_copy = ["adc",]
    dont_copy = [h5base+"/{}".format(do)
                for do in dont_copy]

    def visitor(k, item):
        if k not in dont_copy:
            if isinstance(item, h5py.Group):
                outfile.create_group(k)
            elif isinstance(item, h5py.Dataset):
                group = str(k).split("/")
                group = "/".join(group[:-1])
                infile.copy(k, outfile[group])

    infile.visititems(visitor)

 ```

 %% Cell type:code id: tags:

 ``` python
 # Loop over sequences
 for k, f in enumerate(file_list):
    with h5py.File(f, 'r') as infile:
        out_file = "{}/{}".format(out_folder, f.split("/")[-1])
        out_file = out_file.replace("RAW", "CORR")
        print('Process file: ', f)
        with h5py.File(out_file, "w") as ofile:
            copy_and_sanitize_non_cal_data(infile, ofile, h5path)

            oshape = infile[h5path+"/adc"].shape
            print('Data shape: ', oshape)
            ddset = ofile.create_dataset(h5path+"/adc",
                                         oshape,
                                         chunks=(chunk_size_idim, 1, oshape[1], oshape[2]),
                                         dtype=np.float32)

            mskset = ofile.create_dataset(h5path+"/mask",
                                          oshape,
                                          chunks=(chunk_size_idim, 1, oshape[1], oshape[2]),
                                          dtype=np.uint32,
                                          compression="gzip", compression_opts=1, shuffle=True)


            # Correct a chunk of images for offset and gain
            def correct_chunk(offset_map, mask, gain_map, memoryCells, no_relative_gain, inp):
                import numpy as np
                import copy
                import h5py

                fim_data = None
                gim_data = None
                rim_data = None
                msk_data = None
                err = ''

                try:
                    d, g, m, ind, copy_sample = inp
                    g[g==3] = 2

                    if copy_sample and ind==0:
                        if memoryCells==1:
                            rim_data = np.squeeze(copy.copy(d))
                        else:
                            rim_data = np.squeeze(copy.copy(d[:,0,...]))

                    # Select memory cells
                    if memoryCells>1:
                        m[m>16] = 0
                        offset_map_cell = offset_map[m,...]
                        mask_cell = mask[m,...]
                    else:
                        offset_map_cell = offset_map
                        mask_cell = mask

                    # Offset correction
                    offset = np.choose(g, (offset_map_cell[...,0], offset_map_cell[...,1], offset_map_cell[...,2]))
                    d -= offset

                    # Gain correction
                    if not no_relative_gain:
                        if memoryCells>1:
                            gain_map_cell = gain_map[m,...]
                        else:
                            gain_map_cell = gain_map
                        cal = np.choose(g, (gain_map_cell[..., 0], gain_map_cell[..., 1], gain_map_cell[..., 2]))
                        d /= cal

                    msk = np.choose(g, (mask_cell[...,0], mask_cell[...,1], mask_cell[...,2]))

                    # Store sample of data for plotting
                    if copy_sample and ind==0:
                        if memoryCells==1:
                            fim_data = np.squeeze(copy.copy(d))
                            gim_data = np.squeeze(copy.copy(g))
                            msk_data = np.squeeze(copy.copy(msk))
                        else:
                            fim_data = np.squeeze(copy.copy(d[:,1,...]))
                            gim_data = np.squeeze(copy.copy(g[:,1,...]))
                            msk_data = np.squeeze(copy.copy(msk[:,1,...]))

                    print('All done')

                except Exception as e:
                    print(e)
                    err = e

                return ind, d, msk, rim_data, fim_data, gim_data, msk_data, err

            # Run ip Cluster parallelization over chunks of images
            inp = []
            max_ind = oshape[0]
            ind = 0

            # If chunk size is not given maximum 12+1 chunks is expected
            if chunk_size == 0:
                chunk_size = max_ind//12
                print('Chunk size: {}'.format(chunk_size))

            while ind<max_ind:
                d = infile[h5path+"/adc"][ind:ind+chunk_size,...].astype(np.float32)
                g = infile[h5path+"/gain"][ind:ind+chunk_size,...]
                m = infile[h5path+"/memoryCell"][ind:ind+chunk_size,...]
                print('To process: ', d.shape)
                inp.append((d,g,m, ind, k==0))
                ind += chunk_size

            print('Run {} processes'.format(len(inp)) )
            p = partial(correct_chunk, offset_map, mask, gain_map, memoryCells, no_relative_gain)

            r = view.map_sync(p, inp)
            #r = list(map(p, inp))
            if k==0:
                _,_,_, rim_data, fim_data, gim_data, msk_data, _ = r[0]

            for rr in r:
                ind, cdata, cmask, _,_,_,_, err = rr
                data_size = cdata.shape[0]
                ddset[ind:ind+data_size,...] = cdata
                mskset[ind:ind+data_size,...] = cmask
                if err != '':
                    print('Error: '.format(err))
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis):
    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1,:], extent=extent, aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(data)))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")

 ```

 %% Cell type:code id: tags:

 ``` python
 h, ex, ey = np.histogram2d(rim_data.flatten(), gim_data.flatten(),
                                                           bins=[100, 4], range=[[0, 10000], [0,4]])
 do_2d_plot(h, (ex, ey), "Signal (ADU)", "Gain Bit Value")
 ```

 %% Cell type:markdown id: tags:

 ### Mean RAW Preview ###

 The per pixel mean of the sequence file of RAW data

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.mean(rim_data,axis=0),
               vmin=min(0.75*np.median(rim_data[rim_data > 0]), 2000),
               vmax=max(1.5*np.median(rim_data[rim_data > 0]), 16000), cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Mean CORRECTED Preview ###

 The per pixel mean of the sequence file of CORR data

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.mean(fim_data,axis=0),
               vmin=min(0.75*np.median(fim_data[fim_data > 0]), -0.5),
               vmax=max(2.*np.median(fim_data[fim_data > 0]), 100), cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Single Train Preview ###

 A single image from the first train

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(fim_data[0,...],
               vmin=min(0.75*np.median(fim_data[0,...]), -0.5),
               vmax=max(2.*np.median(fim_data[0,...]), 100), cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ## Signal Distribution ##

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(211)
 h = ax.hist(fim_data.flatten(), bins=1000, range=(-100, 1000), log=True)
 l = ax.set_xlabel("Signal (keV)")
 l = ax.set_ylabel("Counts")

 ax = fig.add_subplot(212)
 h = ax.hist(fim_data.flatten(), bins=1000, range=(-1000, 10000), log=True)
 l = ax.set_xlabel("Signal (keV)")
 l = ax.set_ylabel("Counts")
 ```

 %% Cell type:markdown id: tags:

 ### Maximum GAIN Preview ###

 The per pixel maximum of the first train of the GAIN data

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.max(gim_data, axis=0), vmin=0,
               vmax=3, cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:markdown id: tags:

 ### Single Image Bad Pixels ###

 A single image bad pixel map for the first image of the first train

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.log2(msk_data[0,...]), vmin=0, vmax=32, cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

--- a/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Jungfrau Dark Image Characterization #

 Version: 0.1, Author: M. Ramilli, S. Hauf

 Analyzes Jungfrau dark image data to deduce offset, noise and resulting bad pixel maps

 %% Cell type:code id: tags:

 ``` python
-in_folder = '/gpfs/exfel/exp/FXE/201931/p900088/raw/'  # folder under which runs are located, required
+cluster_profile = 'noDB'  # the ipcluster profile name
+in_folder = '/gpfs/exfel/exp/FXE/201931/p900089/raw/'  # folder under which runs are located, required
 out_folder = '/gpfs/exfel/data/scratch/karnem/test_dark/' # path to place reports at, required
 sequences = 1  # number of sequence files in that run
+run_high = 86 # run number for G0 dark run, required
+run_med = 87 # run number for G1 dark run, required
+run_low = 88 # run number for G2 dark run, required
+
+karabo_da = ['JNGFR01'] # list of data aggregators, which corresponds to different JF modules
+karabo_id = "FXE_XAD_JF1M"  # bla karabo prefix of Jungfrau devices
+karabo_id_control = ""  # if control is on a different ID, set to empty string if it is the same a karabo-id
+receiver_id = 'RECEIVER-{}' # inset for receiver devices
+receiver_control_id = "CONTROL" # inset for control devices
 path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5'  # template to use for file name, double escape sequence number
-path_inset = "JNGFR02"  # file inset for image data
-path_inset_control = "JNGFR02" # file inset for control data
-cluster_profile = 'noDB'  # the ipcluster profile name
+h5path = '/INSTRUMENT/{}/DET/{}:daqOutput/data'  # path in H5 file under which images are located
+h5path_run = '/RUN/{}/DET/{}' # path to run data
+h5path_cntrl = '/CONTROL/{}/DET/{}' # path to control data
+karabo_da_control = "JNGFR01" # file inset for control data
+
+use_dir_creation_date = True # use dir creation date
 cal_db_interface = 'tcp://max-exfl016:8016'  # calibrate db interface to connect to
+cal_db_timeout = 300000 # timeout on caldb requests
+local_output = True # output constants locally
+db_output = False # output constants to database
+
 integration_time = 1000 # integration time in us, will be overwritten by value in file
 bias_voltage = 90  # sensor bias voltage in V, will be overwritten by value in file
 badpixel_threshold_sigma = 20.  # bad pixels defined by values outside n times this std from median
 offset_abs_threshold_low = [1000, 10000, 10000]  # absolute bad pixel threshold in terms of offset, lower values
 offset_abs_threshold_high = [8000, 15000, 15000]  # absolute bad pixel threshold in terms of offset, upper values
 chunkSize = 10  # iteration chunk size, needs to match or be less than number of images in a sequence file
 imageRange = [0, 500]  # image range in which to evaluate
 memoryCells = 16  # number of memory cells
-h5path = '/INSTRUMENT/{}/DET/{}:daqOutput/data'  # path in H5 file under which images are located
-run_high = 144 # run number for G0 dark run, required
-run_med = 145 # run number for G1 dark run, required
-run_low = 146 # run number for G2 dark run, required
-karabo_id = "FXE_XAD_JF500K"  # karabo prefix of Jungfrau devices
-karabo_id_control = ""  # if control is on a different ID, set to empty string for using the same as for data
-receiver_id = "RECEIVER" # inset for receiver devices
-control_id = "CONTROL" # inset for control devices
-db_module = "Jungfrau_M260" # ID of module in calibration database
-use_dir_creation_date = True # use dir creation date
+db_module = ["Jungfrau_M260"] # ID of module in calibration database
 manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values
 time_limits = 0.025 #  to find calibration constants later on, the integration time is allowed to vary by 0.5 us
-local_output = True # output constants locally
-db_output = False # output constants to database
 ```

 %% Cell type:code id: tags:

 ``` python
 import warnings
 warnings.filterwarnings('ignore')

 import matplotlib
 matplotlib.use('agg')
 import numpy as np
 import h5py
 from h5py import File as h5file

 from cal_tools.enums import BadPixels
 from cal_tools.tools import get_dir_creation_date, get_random_db_interface
 from cal_tools.ana_tools import save_dict_to_hdf5
 from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions

 from XFELDetAna.util import env
 env.iprofile = cluster_profile

 from XFELDetAna.detectors.jungfrau import readerPSI as jfreaderPSI
 from XFELDetAna.detectors.jungfrau import reader as jfreader
 from XFELDetAna.detectors.jungfrau.jf_chunk_reader import JFChunkReader
 from XFELDetAna.detectors.jungfrau.util import non_empty_trains, count_n_files, rollout_data, sanitize_data_cellid
 import glob
 import matplotlib.pyplot as plt
 %matplotlib inline

 from XFELDetAna.plotting.histogram import histPlot
 from XFELDetAna.plotting.heatmap import heatmapPlot

+import os
+os.makedirs(out_folder, exist_ok=True)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+path_inset = karabo_da[0] # karabo_da is a concurrency parameter
+receiver_id = receiver_id.format(int(path_inset[-2:]))
+proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
+file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_high, run_med, run_low)
+
+# TODO
+# this trick is needed until proper mapping is introduced
+if len(db_module)>1:
+    db_module = db_module[int(path_inset[-2:])-1]
+else:
+    db_module = db_module[0]
+
 # Constants relevant for the analysis
 run_nums = [run_high, run_med, run_low] # run number for G0/HG0, G1, G2
 sensorSize = [1024, 512]
 blockSize = [1024, 512]
 xRange = [0, 0+sensorSize[0]]
 yRange = [0, 0+sensorSize[1]]
 gains = [0, 1, 2]
 h5path = h5path.format(karabo_id, receiver_id)

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)
    print("Using {} as creation time".format(creation_time))

 cal_db_interface = get_random_db_interface(cal_db_interface)
 print('Calibration database interface: {}'.format(cal_db_interface))

 offset_abs_threshold = [offset_abs_threshold_low, offset_abs_threshold_high]

 if karabo_id_control == "":
    karabo_id_control = karabo_id

-import os
-os.makedirs(out_folder, exist_ok=True)
-```
-
-%% Cell type:code id: tags:
-
-``` python
-proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
-file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_high, run_med, run_low)
+print('Path inset ', path_inset)
+print('Receiver Id ', receiver_id)
 ```

 %% Cell type:code id: tags:

 ``` python
 def check_memoryCells(file_name, path):
    with h5file(file_name, 'r') as f:
        t_stamp = np.array(f[path + '/storageCells/timestamp'])
        st_cells = np.array(f[path + '/storageCells/value'])
        sc_start = np.array(f[path + '/storageCellStart/value'])

    valid_train = t_stamp > 0
    n_scs = st_cells[valid_train][0] + 1
    sc_s = sc_start[valid_train][0]

    return n_scs, sc_s
 ```

 %% Cell type:code id: tags:

 ``` python
 chunkSize = 100
 filep_size = 1000
 noiseCal = None
 noise_map = None
 offset_map = None
 memoryCells = None
 for i, r_n in enumerate(run_nums):

    gain = i
    print(f"Gain stage {gain}, run {r_n}")
    valid_data = []
    valid_cellids = []
    if r_n is not None:
        n_tr = 0
        n_empty_trains = 0
        n_empty_sc = 0

        ped_dir = "{}/r{:04d}/".format(in_folder, r_n)
-        fp_name = path_template.format(r_n, path_inset_control)
+        fp_name = path_template.format(r_n, karabo_da_control)
        fp_path = '{}/{}'.format(ped_dir, fp_name)

        n_files = len(glob.glob("{}/*{}*.h5".format(ped_dir, path_inset)))
        myRange = range(0, n_files)
-        control_path = '/CONTROL/{}/DET/{}'.format(karabo_id_control, control_id)
+        control_path = h5path_cntrl.format(karabo_id_control, receiver_control_id)

        this_run_mcells, sc_start = check_memoryCells(fp_path.format(0).format(myRange[0]), control_path)

        if noise_map is None:
            if not manual_slow_data:
                with h5py.File(fp_path.format(0), 'r') as f:
-                    integration_time = float(f['/RUN/{}/DET/{}/exposureTime/value'.format(karabo_id_control, control_id)][()]*1e6)
-                    bias_voltage = int(np.squeeze(f['/RUN/{}/DET/{}/vHighVoltage/value'.format(karabo_id_control, control_id)])[0])
+                    run_path = h5path_run.format(karabo_id_control, receiver_control_id)
+                    integration_time = float(f[f'{run_path}/exposureTime/value'][()]*1e6)
+                    bias_voltage = int(np.squeeze(f[f'{run_path}/vHighVoltage/value'])[0])
            print("Integration time is {} us".format(integration_time))
            print("Bias voltage is {} V".format(bias_voltage))
            if this_run_mcells == 1:
                memoryCells = 1
                print('Dark runs in single cell mode\n storage cell start: {:02d}'.format(sc_start))
            else:
                memoryCells = 16
                print('Dark runs in burst mode\n storage cell start: {:02d}'.format(sc_start))

            noise_map = np.zeros(sensorSize+[memoryCells, 3])
            offset_map = np.zeros(sensorSize+[memoryCells, 3])

        fp_name = path_template.format(r_n, path_inset)
        fp_path = '{}/{}'.format(ped_dir, fp_name)
        myRange_P = range(0, sequences)
        path = h5path

        print("Reading data from {}".format(fp_path))
        print("Run is: {}".format(r_n))
        print("HDF5 path: {}".format(h5path))

        imageRange = [0, filep_size*len(myRange)]
        reader = JFChunkReader(filename = fp_path, readFun = jfreader.readData, size = filep_size, chunkSize = chunkSize,
                               path = h5path, image_range=imageRange, pixels_x = sensorSize[0], pixels_y = sensorSize[1],
                               x_range = xRange, y_range = yRange, imagesPerChunk=chunkSize, filesRange = myRange,
                               memoryCells=this_run_mcells, blockSize=blockSize)

        for data in reader.readChunks():

            images = np.array(data[0], dtype=np.float)
            gainmaps = np.array(data[1], dtype=np.uint16)
            trainId = np.array(data[2])
            fr_num = np.array(data[3])
            acelltable = np.array(data[4])
            n_tr += acelltable.shape[-1]
            this_tr = acelltable.shape[-1]



            idxs = non_empty_trains(trainId)
            images = images[..., idxs]
            gainmaps = gainmaps[..., idxs]
            fr_num = fr_num[..., idxs]
            acelltable = acelltable[..., idxs]

            if memoryCells == 1:
                acelltable -= sc_start

            n_empty_trains += this_tr - acelltable.shape[-1]
            n_empty_sc += len(acelltable[acelltable > 15])

            if i > 0 and memoryCells == 16: ## throwing away the second memory cell entry in lower gains
                acelltable[1] = 255


            images, gainmaps, acelltable = rollout_data([images, gainmaps, acelltable]) # makes 4-dim vecs into 3-dim
                                                                                        # makes 2-dim into 1-dim
                                                                                        # leaves  1-dim and 3-dim vecs

            images, gainmaps, acelltable = sanitize_data_cellid([images, gainmaps], acelltable) # removes entries with cellID 255
            valid_data.append(images)
            valid_cellids.append(acelltable)

        valid_data = np.concatenate(valid_data, axis=2)
        valid_cellids = np.concatenate(valid_cellids, axis=0)

        for cell in range(memoryCells):

            thiscell = valid_data[...,valid_cellids == cell]
            noise_map[...,cell,gain] = np.std(thiscell, axis=2)
            offset_map[...,cell,gain] = np.mean(thiscell, axis=2)


        print('G{:01d} dark calibration'.format(i))
        print('Missed {:d} out of {:d} trains'.format(n_empty_trains, n_tr))
        print('Lost {:d} images out of {:d}'.format(n_empty_sc, this_run_mcells * (n_tr - n_empty_trains)))

    else:
        print('missing G{:01d}'.format(i))

 ```

 %% Cell type:markdown id: tags:

 ## Offset and Noise Maps ##

 Below offset and noise maps for the high ($g_0$) gain stage are shown, alongside the distribution of these values. One expects block-like structures mapping to the ASICs of the detector

 %% Cell type:code id: tags:

 ``` python
 import matplotlib.pyplot as plt
 from XFELDetAna.core.util import remove_nans
 %matplotlib inline
 #%matplotlib notebook

 from XFELDetAna.plotting.histogram import histPlot
 from XFELDetAna.plotting.heatmap import heatmapPlot

-
 g_name = ['G0', 'G1', 'G2']
 g_range = [(0, 8000), (8000, 16000), (8000, 16000)]
 n_range = [(0., 50.), (0., 50.), (0., 50.)]

 unit = '[ADCu]'
 ```

 %% Cell type:code id: tags:

 ``` python

 for g_idx in gains:
    for cell in range(0, memoryCells):
        f_o0 = heatmapPlot(np.swapaxes(offset_map[..., cell, g_idx], 0, 1),
                           y_label="Row",
                           x_label="Column",
                           lut_label="Pedestal {} [ADCu]".format(g_name[g_idx]),
                           aspect=1.,
                           vmin=g_range[g_idx][0],
                           vmax=g_range[g_idx][1])

        fo0, ax_o0 = plt.subplots()
        res_o0 = histPlot(ax_o0, offset_map[..., cell, g_idx],
                          bins=800,
                          range=g_range[g_idx],
                          facecolor='b',
                          histotype='stepfilled')

        ax_o0.tick_params(axis='both',which='major',labelsize=15)
        ax_o0.set_title('Module pedestal distribution Cell {:02d}'.format(cell), fontsize=15)
        ax_o0.set_xlabel('Pedestal {} [ADCu]'.format(g_name[g_idx]),fontsize=15)
        ax_o0.set_yscale('log')

        f_n0 = heatmapPlot(np.swapaxes(noise_map[..., cell, g_idx], 0, 1),
                           y_label="Row",
                           x_label="Column",
                           lut_label="RMS noise {} ".format(g_name[g_idx]) + unit,
                           aspect=1.,
                           vmin=n_range[g_idx][0],
                           vmax=n_range[g_idx][1])

        fn0, ax_n0 = plt.subplots()
        res_n0 = histPlot(ax_n0, noise_map[..., cell, g_idx],
                          bins=100,
                          range=n_range[g_idx],
                          facecolor='b',
                          histotype='stepfilled')

        ax_n0.tick_params(axis='both',which='major',labelsize=15)
        ax_n0.set_title('Module noise distribution Cell {:02d}'.format(cell), fontsize=15)
        ax_n0.set_xlabel('RMS noise {} '.format(g_name[g_idx]) + unit,fontsize=15)
        #ax_n0.set_yscale('log')

        plt.show()


 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixel Map ###

 The bad pixel map is deduced by comparing offset and noise of each pixel ($v_i$) and each gain ($g$) against the median value for that gain stage:

 $$
 v_i > \mathrm{median}(v_{k,g}) + n \sigma_{v_{k,g}}
 $$
 or
 $$
 v_i < \mathrm{median}(v_{k,g}) - n \sigma_{v_{k,g}}
 $$

 Values are encoded in a 32 bit mask, where for the dark image deduced bad pixels the following non-zero entries are relevant:

 %% Cell type:code id: tags:

 ``` python
 def print_bp_entry(bp):
    print("{:<30s} {:032b}".format(bp.name, bp.value))

 print_bp_entry(BadPixels.OFFSET_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.NOISE_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.OFFSET_NOISE_EVAL_ERROR)
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixels_map = np.zeros(noise_map.shape, np.uint32)
 def eval_bpidx(d):

    mdn = np.nanmedian(d, axis=(0, 1, 2))[None, None, None, :]
    std = np.nanstd(d, axis=(0, 1, 2))[None, None, None, :]
    idx = (d > badpixel_threshold_sigma*std+mdn) | (d < (-badpixel_threshold_sigma)*std+mdn)

    return idx

 offset_abs_threshold = np.array(offset_abs_threshold)

 bad_pixels_map[eval_bpidx(offset_map)] = BadPixels.OFFSET_OUT_OF_THRESHOLD.value
 bad_pixels_map[~np.isfinite(offset_map)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value
 bad_pixels_map[eval_bpidx(noise_map)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
 bad_pixels_map[~np.isfinite(noise_map)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value
 bad_pixels_map[(offset_map < offset_abs_threshold[0][None, None, None, :]) | (offset_map > offset_abs_threshold[1][None, None, None, :])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value

 for g_idx in gains:
    for cell in range(0, memoryCells):

        bad_pixels = bad_pixels_map[:, :, cell, g_idx]

        fn_0 = heatmapPlot(np.swapaxes(bad_pixels, 0, 1),
                           y_label="Row",
                           x_label="Column",
                           lut_label="Badpixels {} [ADCu]".format(g_name[g_idx]),
                           aspect=1.,
                           vmin=0)
 ```

 %% Cell type:code id: tags:

 ``` python
 constants = {'Offset': np.moveaxis(offset_map, 0, 1),
             'Noise': np.moveaxis(noise_map, 0, 1),
             'BadPixelsDark': np.moveaxis(bad_pixels_map, 0, 1)}

 for key, const_data in constants.items():

    metadata = ConstantMetaData()
    const =  getattr(Constants.jungfrau, key)()
    const.data = const_data
    metadata.calibration_constant = const

    # set the operating condition
    condition = Conditions.Dark.jungfrau(memory_cells=memoryCells, bias_voltage=bias_voltage,
                                         integration_time=integration_time)

    for parm in condition.parameters:
        if parm.name == "Integration Time":
            parm.lower_deviation = time_limits
            parm.upper_deviation = time_limits

    device = getattr(Detectors, db_module)
    metadata.detector_condition = condition

    # specify the version for this constant
    if creation_time is None:
        metadata.calibration_constant_version = Versions.Now(device=device)
    else:
        metadata.calibration_constant_version = Versions.Timespan(device=device,
                                                                  start=creation_time)

    metadata.calibration_constant_version.raw_data_location = file_loc
    if db_output:
-        metadata.send(cal_db_interface)
+        metadata.send(cal_db_interface, timeout=cal_db_timeout)
        print('Constants {} is sent to the data base'.format(key))

    # save everything to file.
    # one file - one entrt in the DB
    if local_output:
        dpar = {}
        for parm in metadata.detector_condition.parameters:
            dpar[parm.name] = {'lower_deviation': parm.lower_deviation,
                               'upper_deviation': parm.upper_deviation,
                               'value': parm.value}

        data_to_store = {}
        data_to_store['condition'] = dpar
        data_to_store['db_module'] = db_module
        data_to_store['constant'] = key
        data_to_store['data'] = const_data
        data_to_store['creation_time'] = creation_time
        data_to_store['dclass'] = 'jungfrau'
        data_to_store['file_loc'] = file_loc

        ofile = "{}/const_{}_{}.h5".format(out_folder, key, db_module)
        save_dict_to_hdf5(data_to_store, ofile)
 ```

 %% Cell type:markdown id: tags:

 # Jungfrau Dark Image Characterization #

 Version: 0.1, Author: M. Ramilli, S. Hauf

 Analyzes Jungfrau dark image data to deduce offset, noise and resulting bad pixel maps

 %% Cell type:code id: tags:

 ``` python
-in_folder = '/gpfs/exfel/exp/FXE/201931/p900088/raw/'  # folder under which runs are located, required
+cluster_profile = 'noDB'  # the ipcluster profile name
+in_folder = '/gpfs/exfel/exp/FXE/201931/p900089/raw/'  # folder under which runs are located, required
 out_folder = '/gpfs/exfel/data/scratch/karnem/test_dark/' # path to place reports at, required
 sequences = 1  # number of sequence files in that run
+run_high = 86 # run number for G0 dark run, required
+run_med = 87 # run number for G1 dark run, required
+run_low = 88 # run number for G2 dark run, required
+
+karabo_da = ['JNGFR01'] # list of data aggregators, which corresponds to different JF modules
+karabo_id = "FXE_XAD_JF1M"  # bla karabo prefix of Jungfrau devices
+karabo_id_control = ""  # if control is on a different ID, set to empty string if it is the same a karabo-id
+receiver_id = 'RECEIVER-{}' # inset for receiver devices
+receiver_control_id = "CONTROL" # inset for control devices
 path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5'  # template to use for file name, double escape sequence number
-path_inset = "JNGFR02"  # file inset for image data
-path_inset_control = "JNGFR02" # file inset for control data
-cluster_profile = 'noDB'  # the ipcluster profile name
+h5path = '/INSTRUMENT/{}/DET/{}:daqOutput/data'  # path in H5 file under which images are located
+h5path_run = '/RUN/{}/DET/{}' # path to run data
+h5path_cntrl = '/CONTROL/{}/DET/{}' # path to control data
+karabo_da_control = "JNGFR01" # file inset for control data
+
+use_dir_creation_date = True # use dir creation date
 cal_db_interface = 'tcp://max-exfl016:8016'  # calibrate db interface to connect to
+cal_db_timeout = 300000 # timeout on caldb requests
+local_output = True # output constants locally
+db_output = False # output constants to database
+
 integration_time = 1000 # integration time in us, will be overwritten by value in file
 bias_voltage = 90  # sensor bias voltage in V, will be overwritten by value in file
 badpixel_threshold_sigma = 20.  # bad pixels defined by values outside n times this std from median
 offset_abs_threshold_low = [1000, 10000, 10000]  # absolute bad pixel threshold in terms of offset, lower values
 offset_abs_threshold_high = [8000, 15000, 15000]  # absolute bad pixel threshold in terms of offset, upper values
 chunkSize = 10  # iteration chunk size, needs to match or be less than number of images in a sequence file
 imageRange = [0, 500]  # image range in which to evaluate
 memoryCells = 16  # number of memory cells
-h5path = '/INSTRUMENT/{}/DET/{}:daqOutput/data'  # path in H5 file under which images are located
-run_high = 144 # run number for G0 dark run, required
-run_med = 145 # run number for G1 dark run, required
-run_low = 146 # run number for G2 dark run, required
-karabo_id = "FXE_XAD_JF500K"  # karabo prefix of Jungfrau devices
-karabo_id_control = ""  # if control is on a different ID, set to empty string for using the same as for data
-receiver_id = "RECEIVER" # inset for receiver devices
-control_id = "CONTROL" # inset for control devices
-db_module = "Jungfrau_M260" # ID of module in calibration database
-use_dir_creation_date = True # use dir creation date
+db_module = ["Jungfrau_M260"] # ID of module in calibration database
 manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values
 time_limits = 0.025 #  to find calibration constants later on, the integration time is allowed to vary by 0.5 us
-local_output = True # output constants locally
-db_output = False # output constants to database
 ```

 %% Cell type:code id: tags:

 ``` python
 import warnings
 warnings.filterwarnings('ignore')

 import matplotlib
 matplotlib.use('agg')
 import numpy as np
 import h5py
 from h5py import File as h5file

 from cal_tools.enums import BadPixels
 from cal_tools.tools import get_dir_creation_date, get_random_db_interface
 from cal_tools.ana_tools import save_dict_to_hdf5
 from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions

 from XFELDetAna.util import env
 env.iprofile = cluster_profile

 from XFELDetAna.detectors.jungfrau import readerPSI as jfreaderPSI
 from XFELDetAna.detectors.jungfrau import reader as jfreader
 from XFELDetAna.detectors.jungfrau.jf_chunk_reader import JFChunkReader
 from XFELDetAna.detectors.jungfrau.util import non_empty_trains, count_n_files, rollout_data, sanitize_data_cellid
 import glob
 import matplotlib.pyplot as plt
 %matplotlib inline

 from XFELDetAna.plotting.histogram import histPlot
 from XFELDetAna.plotting.heatmap import heatmapPlot

+import os
+os.makedirs(out_folder, exist_ok=True)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+path_inset = karabo_da[0] # karabo_da is a concurrency parameter
+receiver_id = receiver_id.format(int(path_inset[-2:]))
+proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
+file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_high, run_med, run_low)
+
+# TODO
+# this trick is needed until proper mapping is introduced
+if len(db_module)>1:
+    db_module = db_module[int(path_inset[-2:])-1]
+else:
+    db_module = db_module[0]
+
 # Constants relevant for the analysis
 run_nums = [run_high, run_med, run_low] # run number for G0/HG0, G1, G2
 sensorSize = [1024, 512]
 blockSize = [1024, 512]
 xRange = [0, 0+sensorSize[0]]
 yRange = [0, 0+sensorSize[1]]
 gains = [0, 1, 2]
 h5path = h5path.format(karabo_id, receiver_id)

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)
    print("Using {} as creation time".format(creation_time))

 cal_db_interface = get_random_db_interface(cal_db_interface)
 print('Calibration database interface: {}'.format(cal_db_interface))

 offset_abs_threshold = [offset_abs_threshold_low, offset_abs_threshold_high]

 if karabo_id_control == "":
    karabo_id_control = karabo_id

-import os
-os.makedirs(out_folder, exist_ok=True)
-```
-
-%% Cell type:code id: tags:
-
-``` python
-proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
-file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_high, run_med, run_low)
+print('Path inset ', path_inset)
+print('Receiver Id ', receiver_id)
 ```

 %% Cell type:code id: tags:

 ``` python
 def check_memoryCells(file_name, path):
    with h5file(file_name, 'r') as f:
        t_stamp = np.array(f[path + '/storageCells/timestamp'])
        st_cells = np.array(f[path + '/storageCells/value'])
        sc_start = np.array(f[path + '/storageCellStart/value'])

    valid_train = t_stamp > 0
    n_scs = st_cells[valid_train][0] + 1
    sc_s = sc_start[valid_train][0]

    return n_scs, sc_s
 ```

 %% Cell type:code id: tags:

 ``` python
 chunkSize = 100
 filep_size = 1000
 noiseCal = None
 noise_map = None
 offset_map = None
 memoryCells = None
 for i, r_n in enumerate(run_nums):

    gain = i
    print(f"Gain stage {gain}, run {r_n}")
    valid_data = []
    valid_cellids = []
    if r_n is not None:
        n_tr = 0
        n_empty_trains = 0
        n_empty_sc = 0

        ped_dir = "{}/r{:04d}/".format(in_folder, r_n)
-        fp_name = path_template.format(r_n, path_inset_control)
+        fp_name = path_template.format(r_n, karabo_da_control)
        fp_path = '{}/{}'.format(ped_dir, fp_name)

        n_files = len(glob.glob("{}/*{}*.h5".format(ped_dir, path_inset)))
        myRange = range(0, n_files)
-        control_path = '/CONTROL/{}/DET/{}'.format(karabo_id_control, control_id)
+        control_path = h5path_cntrl.format(karabo_id_control, receiver_control_id)

        this_run_mcells, sc_start = check_memoryCells(fp_path.format(0).format(myRange[0]), control_path)

        if noise_map is None:
            if not manual_slow_data:
                with h5py.File(fp_path.format(0), 'r') as f:
-                    integration_time = float(f['/RUN/{}/DET/{}/exposureTime/value'.format(karabo_id_control, control_id)][()]*1e6)
-                    bias_voltage = int(np.squeeze(f['/RUN/{}/DET/{}/vHighVoltage/value'.format(karabo_id_control, control_id)])[0])
+                    run_path = h5path_run.format(karabo_id_control, receiver_control_id)
+                    integration_time = float(f[f'{run_path}/exposureTime/value'][()]*1e6)
+                    bias_voltage = int(np.squeeze(f[f'{run_path}/vHighVoltage/value'])[0])
            print("Integration time is {} us".format(integration_time))
            print("Bias voltage is {} V".format(bias_voltage))
            if this_run_mcells == 1:
                memoryCells = 1
                print('Dark runs in single cell mode\n storage cell start: {:02d}'.format(sc_start))
            else:
                memoryCells = 16
                print('Dark runs in burst mode\n storage cell start: {:02d}'.format(sc_start))

            noise_map = np.zeros(sensorSize+[memoryCells, 3])
            offset_map = np.zeros(sensorSize+[memoryCells, 3])

        fp_name = path_template.format(r_n, path_inset)
        fp_path = '{}/{}'.format(ped_dir, fp_name)
        myRange_P = range(0, sequences)
        path = h5path

        print("Reading data from {}".format(fp_path))
        print("Run is: {}".format(r_n))
        print("HDF5 path: {}".format(h5path))

        imageRange = [0, filep_size*len(myRange)]
        reader = JFChunkReader(filename = fp_path, readFun = jfreader.readData, size = filep_size, chunkSize = chunkSize,
                               path = h5path, image_range=imageRange, pixels_x = sensorSize[0], pixels_y = sensorSize[1],
                               x_range = xRange, y_range = yRange, imagesPerChunk=chunkSize, filesRange = myRange,
                               memoryCells=this_run_mcells, blockSize=blockSize)

        for data in reader.readChunks():

            images = np.array(data[0], dtype=np.float)
            gainmaps = np.array(data[1], dtype=np.uint16)
            trainId = np.array(data[2])
            fr_num = np.array(data[3])
            acelltable = np.array(data[4])
            n_tr += acelltable.shape[-1]
            this_tr = acelltable.shape[-1]



            idxs = non_empty_trains(trainId)
            images = images[..., idxs]
            gainmaps = gainmaps[..., idxs]
            fr_num = fr_num[..., idxs]
            acelltable = acelltable[..., idxs]

            if memoryCells == 1:
                acelltable -= sc_start

            n_empty_trains += this_tr - acelltable.shape[-1]
            n_empty_sc += len(acelltable[acelltable > 15])

            if i > 0 and memoryCells == 16: ## throwing away the second memory cell entry in lower gains
                acelltable[1] = 255


            images, gainmaps, acelltable = rollout_data([images, gainmaps, acelltable]) # makes 4-dim vecs into 3-dim
                                                                                        # makes 2-dim into 1-dim
                                                                                        # leaves  1-dim and 3-dim vecs

            images, gainmaps, acelltable = sanitize_data_cellid([images, gainmaps], acelltable) # removes entries with cellID 255
            valid_data.append(images)
            valid_cellids.append(acelltable)

        valid_data = np.concatenate(valid_data, axis=2)
        valid_cellids = np.concatenate(valid_cellids, axis=0)

        for cell in range(memoryCells):

            thiscell = valid_data[...,valid_cellids == cell]
            noise_map[...,cell,gain] = np.std(thiscell, axis=2)
            offset_map[...,cell,gain] = np.mean(thiscell, axis=2)


        print('G{:01d} dark calibration'.format(i))
        print('Missed {:d} out of {:d} trains'.format(n_empty_trains, n_tr))
        print('Lost {:d} images out of {:d}'.format(n_empty_sc, this_run_mcells * (n_tr - n_empty_trains)))

    else:
        print('missing G{:01d}'.format(i))

 ```

 %% Cell type:markdown id: tags:

 ## Offset and Noise Maps ##

 Below offset and noise maps for the high ($g_0$) gain stage are shown, alongside the distribution of these values. One expects block-like structures mapping to the ASICs of the detector

 %% Cell type:code id: tags:

 ``` python
 import matplotlib.pyplot as plt
 from XFELDetAna.core.util import remove_nans
 %matplotlib inline
 #%matplotlib notebook

 from XFELDetAna.plotting.histogram import histPlot
 from XFELDetAna.plotting.heatmap import heatmapPlot

-
 g_name = ['G0', 'G1', 'G2']
 g_range = [(0, 8000), (8000, 16000), (8000, 16000)]
 n_range = [(0., 50.), (0., 50.), (0., 50.)]

 unit = '[ADCu]'
 ```

 %% Cell type:code id: tags:

 ``` python

 for g_idx in gains:
    for cell in range(0, memoryCells):
        f_o0 = heatmapPlot(np.swapaxes(offset_map[..., cell, g_idx], 0, 1),
                           y_label="Row",
                           x_label="Column",
                           lut_label="Pedestal {} [ADCu]".format(g_name[g_idx]),
                           aspect=1.,
                           vmin=g_range[g_idx][0],
                           vmax=g_range[g_idx][1])

        fo0, ax_o0 = plt.subplots()
        res_o0 = histPlot(ax_o0, offset_map[..., cell, g_idx],
                          bins=800,
                          range=g_range[g_idx],
                          facecolor='b',
                          histotype='stepfilled')

        ax_o0.tick_params(axis='both',which='major',labelsize=15)
        ax_o0.set_title('Module pedestal distribution Cell {:02d}'.format(cell), fontsize=15)
        ax_o0.set_xlabel('Pedestal {} [ADCu]'.format(g_name[g_idx]),fontsize=15)
        ax_o0.set_yscale('log')

        f_n0 = heatmapPlot(np.swapaxes(noise_map[..., cell, g_idx], 0, 1),
                           y_label="Row",
                           x_label="Column",
                           lut_label="RMS noise {} ".format(g_name[g_idx]) + unit,
                           aspect=1.,
                           vmin=n_range[g_idx][0],
                           vmax=n_range[g_idx][1])

        fn0, ax_n0 = plt.subplots()
        res_n0 = histPlot(ax_n0, noise_map[..., cell, g_idx],
                          bins=100,
                          range=n_range[g_idx],
                          facecolor='b',
                          histotype='stepfilled')

        ax_n0.tick_params(axis='both',which='major',labelsize=15)
        ax_n0.set_title('Module noise distribution Cell {:02d}'.format(cell), fontsize=15)
        ax_n0.set_xlabel('RMS noise {} '.format(g_name[g_idx]) + unit,fontsize=15)
        #ax_n0.set_yscale('log')

        plt.show()


 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixel Map ###

 The bad pixel map is deduced by comparing offset and noise of each pixel ($v_i$) and each gain ($g$) against the median value for that gain stage:

 $$
 v_i > \mathrm{median}(v_{k,g}) + n \sigma_{v_{k,g}}
 $$
 or
 $$
 v_i < \mathrm{median}(v_{k,g}) - n \sigma_{v_{k,g}}
 $$

 Values are encoded in a 32 bit mask, where for the dark image deduced bad pixels the following non-zero entries are relevant:

 %% Cell type:code id: tags:

 ``` python
 def print_bp_entry(bp):
    print("{:<30s} {:032b}".format(bp.name, bp.value))

 print_bp_entry(BadPixels.OFFSET_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.NOISE_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.OFFSET_NOISE_EVAL_ERROR)
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixels_map = np.zeros(noise_map.shape, np.uint32)
 def eval_bpidx(d):

    mdn = np.nanmedian(d, axis=(0, 1, 2))[None, None, None, :]
    std = np.nanstd(d, axis=(0, 1, 2))[None, None, None, :]
    idx = (d > badpixel_threshold_sigma*std+mdn) | (d < (-badpixel_threshold_sigma)*std+mdn)

    return idx

 offset_abs_threshold = np.array(offset_abs_threshold)

 bad_pixels_map[eval_bpidx(offset_map)] = BadPixels.OFFSET_OUT_OF_THRESHOLD.value
 bad_pixels_map[~np.isfinite(offset_map)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value
 bad_pixels_map[eval_bpidx(noise_map)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
 bad_pixels_map[~np.isfinite(noise_map)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value
 bad_pixels_map[(offset_map < offset_abs_threshold[0][None, None, None, :]) | (offset_map > offset_abs_threshold[1][None, None, None, :])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value

 for g_idx in gains:
    for cell in range(0, memoryCells):

        bad_pixels = bad_pixels_map[:, :, cell, g_idx]

        fn_0 = heatmapPlot(np.swapaxes(bad_pixels, 0, 1),
                           y_label="Row",
                           x_label="Column",
                           lut_label="Badpixels {} [ADCu]".format(g_name[g_idx]),
                           aspect=1.,
                           vmin=0)
 ```

 %% Cell type:code id: tags:

 ``` python
 constants = {'Offset': np.moveaxis(offset_map, 0, 1),
             'Noise': np.moveaxis(noise_map, 0, 1),
             'BadPixelsDark': np.moveaxis(bad_pixels_map, 0, 1)}

 for key, const_data in constants.items():

    metadata = ConstantMetaData()
    const =  getattr(Constants.jungfrau, key)()
    const.data = const_data
    metadata.calibration_constant = const

    # set the operating condition
    condition = Conditions.Dark.jungfrau(memory_cells=memoryCells, bias_voltage=bias_voltage,
                                         integration_time=integration_time)

    for parm in condition.parameters:
        if parm.name == "Integration Time":
            parm.lower_deviation = time_limits
            parm.upper_deviation = time_limits

    device = getattr(Detectors, db_module)
    metadata.detector_condition = condition

    # specify the version for this constant
    if creation_time is None:
        metadata.calibration_constant_version = Versions.Now(device=device)
    else:
        metadata.calibration_constant_version = Versions.Timespan(device=device,
                                                                  start=creation_time)

    metadata.calibration_constant_version.raw_data_location = file_loc
    if db_output:
-        metadata.send(cal_db_interface)
+        metadata.send(cal_db_interface, timeout=cal_db_timeout)
        print('Constants {} is sent to the data base'.format(key))

    # save everything to file.
    # one file - one entrt in the DB
    if local_output:
        dpar = {}
        for parm in metadata.detector_condition.parameters:
            dpar[parm.name] = {'lower_deviation': parm.lower_deviation,
                               'upper_deviation': parm.upper_deviation,
                               'value': parm.value}

        data_to_store = {}
        data_to_store['condition'] = dpar
        data_to_store['db_module'] = db_module
        data_to_store['constant'] = key
        data_to_store['data'] = const_data
        data_to_store['creation_time'] = creation_time
        data_to_store['dclass'] = 'jungfrau'
        data_to_store['file_loc'] = file_loc

        ofile = "{}/const_{}_{}.h5".format(out_folder, key, db_module)
        save_dict_to_hdf5(data_to_store, ofile)
 ```

--- a/requirements.txt
+++ b/requirements.txt
@@ -20,7 +20,7 @@ jupyter
 jupyter_console
 metadata_client
 sphinx == 1.4.5
-dill
+dill == 0.3.0
 pyyaml
 ipyparallel
 iminuit
No results found