Compare revisions

Thomas Kluyver · Thomas Kluyver · Thomas Kluyver · Thomas Kluyver · Thomas Kluyver · Thomas Kluyver
--- a/bin/slurm_calibrate.sh
+++ b/bin/slurm_calibrate.sh
@@ -43,9 +43,14 @@ then
    sleep 15
 fi

-
 echo "Running notebook"
-${python_path} -m princess ${nb_path} --save
+if [ "$caltype" == "CORRECT" ]
+then
+  # calparrot stores and repeats calcat queries
+  ${python_path} -m calparrot -- ${python_path} -m princess ${nb_path} --save
+else
+  ${python_path} -m princess ${nb_path} --save
+fi

 # stop the cluster if requested
 if [ "${ipcluster_profile}" != "NO_CLUSTER" ]

--- a/notebooks/Gotthard2/Correction_Gotthard2_NBC.ipynb
+++ b/notebooks/Gotthard2/Correction_Gotthard2_NBC.ipynb
 %% Cell type:markdown id:bed7bd15-21d9-4735-82c1-c27c1a5e3346 tags:

 # Gotthard2 Offline Correction #

 Author: European XFEL Detector Group, Version: 1.0

 Offline Calibration for the Gothard2 Detector

 %% Cell type:code id:570322ed-f611-4fd1-b2ec-c12c13d55843 tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/FXE/202221/p003225/raw"  # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/gotthard2"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run = 50  # run to process, required
 sequences = [-1]  # sequences to correct, set to [-1] for all, range allowed
 sequences_per_node = 1  # number of sequence files per node if notebook executed through xfel-calibrate, set to 0 to not run SLURM parallel

 # Parameters used to access raw data.
 karabo_id = "FXE_XAD_G2XES"  # karabo prefix of Gotthard-II devices
 karabo_da = ["GH201"]  # data aggregators
 receiver_template = "RECEIVER"  # receiver template used to read INSTRUMENT keys.
 control_template = "CONTROL"  # control template used to read CONTROL keys.
 instrument_source_template = "{}/DET/{}:daqOutput"  # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = "{}/DET/{}"  # template for control source name (filled with karabo_id_control)
 karabo_id_control = ""  # Control karabo ID. Set to empty string to use the karabo-id

 # Parameters for calibration database.
 use_dir_creation_date = True  # use the creation data of the input dir for database queries.
 cal_db_interface = "tcp://max-exfl016:8016#8025"  # the database interface to use.
 cal_db_timeout = 180000  # timeout on caldb requests.
 overwrite_creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC.00 e.g. "2022-06-28 13:00:00.00"

 # Parameters affecting corrected data.
 constants_file = ""  # Use constants in given constant file path. /gpfs/exfel/data/scratch/ahmedk/dont_remove/gotthard2/constants/calibration_constants_GH2.h5
 offset_correction = True  # apply offset correction. This can be disabled to only apply LUT or apply LUT and gain correction for non-linear differential results.
 gain_correction = True  # apply gain correction.

 # Parameter conditions.
 bias_voltage = -1  # Detector bias voltage, set to -1 to use value in raw file.
 exposure_time = -1.  # Detector exposure time, set to -1 to use value in raw file.
 exposure_period = -1.  # Detector exposure period, set to -1 to use value in raw file.
 acquisition_rate = -1.  # Detector acquisition rate (1.1/4.5), set to -1 to use value in raw file.
 single_photon = -1  # Detector single photon mode (High/Low CDS), set to -1 to use value in raw file.

 # Parameters for plotting
 skip_plots = False  # exit after writing corrected files
 pulse_idx_preview = 3  # pulse index to preview. The following even/odd pulse index is used for preview. # TODO: update to pulseId preview.


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id:6e9730d8-3908-41d7-abe2-d78e046d5de2 tags:

 ``` python
 import datetime
 import warnings
 from functools import partial

 import h5py
 import pasha as psh
 import numpy as np
 import matplotlib.pyplot as plt
 from IPython.display import Markdown, display
 from extra_data import RunDirectory, H5File
 from pathlib import Path

 from cal_tools import h5_copy_except
 from cal_tools.gotthard2 import gotthard2algs, gotthard2lib
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    get_constant_from_db_and_time,
    get_dir_creation_date,
    get_pdu_from_db,
    CalibrationMetadata,
 )
 from iCalibrationDB import Conditions, Constants
 from XFELDetAna.plotting.heatmap import heatmapPlot

 warnings.filterwarnings('ignore')

 %matplotlib inline
 ```

 %% Cell type:code id:d7c02c48-4429-42ea-a42e-de45366d7fa3 tags:

 ``` python
 in_folder = Path(in_folder)
 run_folder = in_folder / f"r{run:04d}"
 out_folder = Path(out_folder)
 out_folder.mkdir(parents=True, exist_ok=True)

 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # NOTE: this notebook will not overwrite calibration metadata file
 const_yaml = metadata.get("retrieved-constants", {})

 if not karabo_id_control:
    karabo_id_control = karabo_id

 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 ctrl_src = ctrl_source_template.format(karabo_id_control, control_template)

 print(f"Process modules: {karabo_da} for run {run}")

 creation_time = None
 if overwrite_creation_time:
    creation_time = datetime.datetime.strptime(
        overwrite_creation_time, "%Y-%m-%d %H:%M:%S.%f"
    )
 elif use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time} as creation time")
 ```

 %% Cell type:code id:b5eb816e-b5f2-44ce-9907-0273d82341b6 tags:

 ``` python
 # Select only sequence files to process for the selected detector.
 if sequences == [-1]:
    possible_patterns = list(f"*{mod}*.h5" for mod in karabo_da)
 else:
    possible_patterns = list(
        f"*{mod}-S{s:05d}.h5" for mod in karabo_da for s in sequences
    )

 run_folder = Path(in_folder / f"r{run:04d}")
 seq_files = [
    f for f in run_folder.glob("*.h5") if any(f.match(p) for p in possible_patterns)
 ]

 seq_files = sorted(seq_files)

 if not seq_files:
    raise IndexError("No sequence files available for the selected sequences.")

 print(f"Processing a total of {len(seq_files)} sequence files")
 ```

 %% Cell type:code id:f9a8d1eb-ce6a-4ed0-abf4-4a6029734672 tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id:892172d8 tags:

 ``` python
 # Read slow data
 run_dc = RunDirectory(run_folder)
 g2ctrl = gotthard2lib.Gotthard2Ctrl(run_dc=run_dc, ctrl_src=ctrl_src)

 if bias_voltage == -1:
    bias_voltage = g2ctrl.get_bias_voltage()
 if exposure_time == -1:
    exposure_time = g2ctrl.get_exposure_time()
 if exposure_period == -1:
    exposure_period = g2ctrl.get_exposure_period()
 if acquisition_rate == -1:
    acquisition_rate = g2ctrl.get_acquisition_rate()
 if single_photon == -1:
    single_photon = g2ctrl.get_single_photon()

 print("Bias Voltage:", bias_voltage)
 print("Exposure Time:", exposure_time)
 print("Exposure Period:", exposure_period)
 print("Acquisition Rate:", acquisition_rate)
 print("Single Photon:", single_photon)
 ```

 %% Cell type:markdown id:8c852392-bb19-4c40-b2ce-3b787538a92d tags:

 ### Retrieving calibration constants

 %% Cell type:code id:5717d722 tags:

 ``` python
 # Used for old FXE (p003225) runs before adding Gotthard2 to CALCAT
 const_data = dict()
 if constants_file:
    for mod in karabo_da:
        const_data[mod] = dict()
        # load constants temporarily using defined local paths.
        with h5py.File(constants_file, "r") as cfile:
            const_data[mod]["LUT"] = cfile["LUT"][()]
            const_data[mod]["Offset"] = cfile["offset_map"][()].astype(np.float32)
            const_data[mod]["RelativeGain"] = cfile["gain_map"][()].astype(np.float32)
            const_data[mod]["Mask"] = cfile["bpix_ff"][()].astype(np.uint32)
 ```

 %% Cell type:code id:1cdbe818 tags:

 ``` python
 # Conditions iCalibrationDB object.
 condition = Conditions.Dark.Gotthard2(
    bias_voltage=bias_voltage,
    exposure_time=exposure_time,
    exposure_period=exposure_period,
    single_photon=single_photon,
    acquisition_rate=acquisition_rate,
 )

 # TODO: Maybe this condition and previous cell can be removed later after the initial phase.
 if not constants_file:
    # Prepare a dictionary of empty constants to loop on
    # it's keys and initiate non-retrieved constants.
    empty_lut = (np.arange(2 ** 12).astype(np.float64) * 2 ** 10 / 2 ** 12).astype(
        np.uint16
    )
    empty_lut = np.stack(1280 * [np.stack([empty_lut] * 2)], axis=0)
    empty_constants = {
        "LUT": empty_lut,
        "Offset": np.zeros((1280, 2, 3), dtype=np.float32),
        "BadPixelsDark": np.zeros((1280, 2, 3), dtype=np.uint32),
        "RelativeGain": np.ones((1280, 2, 3), dtype=np.float32),
        "BadPixelsFF": np.zeros((1280, 2, 3), dtype=np.uint32),
    }

    for mod in karabo_da:
        const_data[mod] = dict()
        # Only used for printing timestamps within the loop.
        when = dict()
        # Check YAML file for constant metadata of file path and creation-time
        if const_yaml:
            for cname, mdata in const_yaml[mod]["constants"].items():
                const_data[mod][cname] = dict()
                when[cname] = mdata["creation-time"]
                if when[cname]:
                    with h5py.File(mdata["file-path"], "r") as cf:
                        const_data[mod][cname] = np.copy(
                            cf[f"{mdata['dataset-name']}/data"]
                        )
                else:
                    const_data[mod][cname] = empty_constants[cname]
        else:  # Retrieve constants from CALCAT. Missing YAML file or running notebook interactively.
            for cname, cempty in empty_constants.items():
                const_data[mod][cname] = dict()
                const_data[mod][cname], when[cname] = get_constant_from_db_and_time(
                    karabo_id=karabo_id,
                    karabo_da=mod,
                    cal_db_interface=cal_db_interface,
                    creation_time=creation_time,
                    timeout=cal_db_timeout,
                    print_once=False,
                    condition=condition,
                    constant=getattr(Constants.Gotthard2, cname)(),
                    empty_constant=cempty,
                )
        bpix = const_data[mod]["BadPixelsDark"]
        bpix |= const_data[mod]["BadPixelsFF"]
        const_data[mod]["Mask"] = bpix

        # Print timestamps for the retrieved constants.
        print(f"Constants for module {mod}:")
        for cname, ctime in when.items():
            print(f"  {cname} injected at {ctime}")
        del when
 ```

 %% Cell type:code id:23fcf7f4-351a-4df7-8829-d8497d94fecc tags:

 ``` python
 context = psh.ProcessContext(num_workers=23)
 ```

 %% Cell type:code id:daecd662-26d2-4cb8-aa70-383a579cf9f9 tags:

 ``` python
 def correct_train(wid, index, d):
    g = gain[index]
    gotthard2algs.convert_to_10bit(d, const_data[mod]["LUT"], data_corr[index, ...])
    gotthard2algs.correct_train(
        data_corr[index, ...],
        mask[index, ...],
        g,
        const_data[mod]["Offset"],
-        const_data[mod]["RelativeGain"],
+        const_data[mod]["RelativeGain"].astype(np.float32, copy=False),
        const_data[mod]["Mask"],
        apply_offset=offset_correction,
        apply_gain=gain_correction,
    )
 ```

 %% Cell type:code id:f88c1aa6-a735-4b72-adce-b30162f5daea tags:

 ``` python
 for mod in karabo_da:
    # This is used in case receiver template consists of
    # karabo data aggregator index. e.g. detector at DETLAB
    instr_mod_src = instrument_src.format(mod[-2:])
    data_path = "INSTRUMENT/" + instr_mod_src + "/data"
    for raw_file in seq_files:
        step_timer.start()

        dc = H5File(raw_file)
        out_file = out_folder / raw_file.name.replace("RAW", "CORR")

        # Select module INSTRUMENT source and deselect empty trains.
        dc = dc.select(instr_mod_src, require_all=True)
        data = dc[instr_mod_src, "data.adc"].ndarray()
        gain = dc[instr_mod_src, "data.gain"].ndarray()
        step_timer.done_step("preparing raw data")
        dshape = data.shape

        step_timer.start()

        # Allocate shared arrays.
        data_corr = context.alloc(shape=dshape, dtype=np.float32)
        mask = context.alloc(shape=dshape, dtype=np.uint32)
        context.map(correct_train, data)
        step_timer.done_step("Correcting one sequence file")

        step_timer.start()

        # Provided PSI gain map has 0 values. Set inf values to nan.
        # TODO: This can maybe be removed after creating XFEL gain maps.?
        data_corr[np.isinf(data_corr)] = np.nan
        # Create CORR files and add corrected data sources.
        # Exclude raw data images (data/adc)
        with h5py.File(out_file, "w") as ofile:
            # Copy RAW non-calibrated sources.
            with h5py.File(raw_file, "r") as sfile:
                h5_copy_except.h5_copy_except_paths(sfile, ofile, [f"{data_path}/adc"])
            # Create datasets with the available corrected data
            ddset = ofile.create_dataset(
                f"{data_path}/adc",
                data=data_corr,
                chunks=((1,) + dshape[1:]),  # 1 chunk == 1 image
                dtype=np.float32,
            )
            # Create datasets with the available corrected data
            ddset = ofile.create_dataset(
                f"{data_path}/mask",
                data=mask,
                chunks=((1,) + dshape[1:]),  # 1 chunk == 1 image
                dtype=np.uint32,
                compression="gzip",
                compression_opts=1,
                shuffle=True,
            )
        step_timer.done_step("Storing data")
 ```

 %% Cell type:code id:94b8e4d2-9f8c-4c23-a509-39238dd8435c tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id:0ccc7f7e-2a3f-4ac0-b854-7d505410d2fd tags:

 ``` python
 if skip_plots:
    print("Skipping plots")
    import sys

    sys.exit(0)
 ```

 %% Cell type:code id:ff203f77-3811-46f3-bf7d-226d2dcab13f tags:

 ``` python
 mod_dcs = {}
 first_seq_raw = seq_files[0]
 first_seq_corr = out_folder / first_seq_raw.name.replace("RAW", "CORR")
 for mod in karabo_da:
    mod_dcs[mod] = {}
    with H5File(first_seq_corr) as out_dc:
        tid, mod_dcs[mod]["train_corr_data"] = next(
            out_dc[instr_mod_src, "data.adc"].trains()
        )
    with H5File(first_seq_raw) as in_dc:
        train_dict = in_dc.train_from_id(tid)[1][instr_mod_src]
        mod_dcs[mod]["train_raw_data"] = train_dict["data.adc"]
        mod_dcs[mod]["train_raw_gain"] = train_dict["data.gain"]
 ```

 %% Cell type:code id:494b453a tags:

 ``` python
 # Keep as long as it is essential to correct
 # RAW data (FXE p003225) before the data mapping was added to CALCAT.
 try:
    db_modules = get_pdu_from_db(
        karabo_id=karabo_id,
        karabo_da=karabo_da,
        constant=Constants.jungfrau.Offset(),
        condition=condition,
        cal_db_interface=cal_db_interface,
        snapshot_at=creation_time,
    )
 except RuntimeError:
    print(
        "No Physical detector units found for this"
        " detector mapping at the RAW data creation time."
    )
    db_modules = [None] * len(karabo_da)
 ```

 %% Cell type:code id:1b379438-eb1d-42b2-ac83-eb8cf88c46db tags:

 ``` python
 display(Markdown("### Mean RAW and CORRECTED across pulses for one train:"))
 display(Markdown(f"Train: {tid}"))

 step_timer.start()
 for mod, pdu in zip(karabo_da, db_modules):

    fig, ax = plt.subplots(figsize=(20, 10))
    raw_data = mod_dcs[mod]["train_raw_data"]
    im = ax.plot(np.mean(raw_data, axis=0))
    ax.set_title(f"RAW module {mod} ({pdu})")
    ax.set_xlabel("Strip #", size=20)
    ax.set_ylabel("12-bit ADC output", size=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    pass

    fig, ax = plt.subplots(figsize=(20, 10))
    corr_data = mod_dcs[mod]["train_corr_data"]
    im = ax.plot(np.mean(corr_data, axis=0))
    ax.set_title(f"CORRECTED module {mod} ({pdu})")
    ax.set_xlabel("Strip #", size=20)
    ax.set_ylabel("10-bit KeV. output", size=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    pass
 step_timer.done_step("Plotting mean data")
 ```

 %% Cell type:code id:58a6a276 tags:

 ``` python
 display(Markdown(f"### RAW and CORRECTED strips across pulses for train {tid}"))

 step_timer.start()
 for mod, pdu in zip(karabo_da, db_modules):
    for plt_data, dname in zip(
        ["train_raw_data", "train_corr_data"], ["RAW", "CORRECTED"]
    ):
        fig, ax = plt.subplots(figsize=(15, 20))
        plt.rcParams.update({"font.size": 20})

        heatmapPlot(
            mod_dcs[mod][plt_data],
            y_label="Pulses",
            x_label="Strips",
            title=f"{dname} module {mod} ({pdu})",
            use_axis=ax,
        )
        pass
 step_timer.done_step("Plotting RAW and CORRECTED data for one train")
 ```

 %% Cell type:code id:cd8f5e08-fcee-4bff-ba63-6452b3d892a2 tags:

 ``` python
 # Validate given "pulse_idx_preview"

 if pulse_idx_preview + 1 > data.shape[1]:
    print(
        f"WARNING: selected pulse_idx_preview {pulse_idx_preview} is not available in data."
        " Previewing 1st pulse."
    )
    pulse_idx_preview = 1

 if data.shape[1] == 1:
    odd_pulse = 1
    even_pulse = None
 else:
    odd_pulse = pulse_idx_preview if pulse_idx_preview % 2 else pulse_idx_preview + 1
    even_pulse = (
        pulse_idx_preview if not (pulse_idx_preview % 2) else pulse_idx_preview + 1
    )

 if pulse_idx_preview + 1 > data.shape[1]:
    pulse_idx_preview = 1
    if data.shape[1] > 1:
        pulse_idx_preview = 2
 ```

 %% Cell type:code id:e5f0d4d8-e32c-4f2c-8469-4ebbfd3f644c tags:

 ``` python
 display(Markdown("### RAW and CORRECTED even/odd pulses for one train:"))
 display(Markdown(f"Train: {tid}"))
 for mod, pdu in zip(karabo_da, db_modules):
    fig, ax = plt.subplots(figsize=(20, 20))
    raw_data = mod_dcs[mod]["train_raw_data"]
    corr_data = mod_dcs[mod]["train_corr_data"]

    ax.plot(raw_data[odd_pulse], label=f"Odd Pulse {odd_pulse}")
    if even_pulse:
        ax.plot(raw_data[even_pulse], label=f"Even Pulse {even_pulse}")

    ax.set_title(f"RAW module {mod} ({pdu})")
    ax.set_xlabel("Strip #", size=20)
    ax.set_ylabel("12-bit ADC RAW", size=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    ax.legend()
    pass

    fig, ax = plt.subplots(figsize=(20, 20))
    ax.plot(corr_data[odd_pulse], label=f"Odd Pulse {odd_pulse}")
    if even_pulse:
        ax.plot(corr_data[even_pulse], label=f"Even Pulse {even_pulse}")
    ax.set_title(f"CORRECTED module {mod} ({pdu})")
    ax.set_xlabel("Strip #", size=20)
    ax.set_ylabel("10-bit KeV CORRECTED", size=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    ax.legend()
    pass
 step_timer.done_step("Plotting RAW and CORRECTED odd/even pulses.")
 ```

 %% Cell type:markdown id:bed7bd15-21d9-4735-82c1-c27c1a5e3346 tags:

 # Gotthard2 Offline Correction #

 Author: European XFEL Detector Group, Version: 1.0

 Offline Calibration for the Gothard2 Detector

 %% Cell type:code id:570322ed-f611-4fd1-b2ec-c12c13d55843 tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/FXE/202221/p003225/raw"  # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/gotthard2"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run = 50  # run to process, required
 sequences = [-1]  # sequences to correct, set to [-1] for all, range allowed
 sequences_per_node = 1  # number of sequence files per node if notebook executed through xfel-calibrate, set to 0 to not run SLURM parallel

 # Parameters used to access raw data.
 karabo_id = "FXE_XAD_G2XES"  # karabo prefix of Gotthard-II devices
 karabo_da = ["GH201"]  # data aggregators
 receiver_template = "RECEIVER"  # receiver template used to read INSTRUMENT keys.
 control_template = "CONTROL"  # control template used to read CONTROL keys.
 instrument_source_template = "{}/DET/{}:daqOutput"  # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = "{}/DET/{}"  # template for control source name (filled with karabo_id_control)
 karabo_id_control = ""  # Control karabo ID. Set to empty string to use the karabo-id

 # Parameters for calibration database.
 use_dir_creation_date = True  # use the creation data of the input dir for database queries.
 cal_db_interface = "tcp://max-exfl016:8016#8025"  # the database interface to use.
 cal_db_timeout = 180000  # timeout on caldb requests.
 overwrite_creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC.00 e.g. "2022-06-28 13:00:00.00"

 # Parameters affecting corrected data.
 constants_file = ""  # Use constants in given constant file path. /gpfs/exfel/data/scratch/ahmedk/dont_remove/gotthard2/constants/calibration_constants_GH2.h5
 offset_correction = True  # apply offset correction. This can be disabled to only apply LUT or apply LUT and gain correction for non-linear differential results.
 gain_correction = True  # apply gain correction.

 # Parameter conditions.
 bias_voltage = -1  # Detector bias voltage, set to -1 to use value in raw file.
 exposure_time = -1.  # Detector exposure time, set to -1 to use value in raw file.
 exposure_period = -1.  # Detector exposure period, set to -1 to use value in raw file.
 acquisition_rate = -1.  # Detector acquisition rate (1.1/4.5), set to -1 to use value in raw file.
 single_photon = -1  # Detector single photon mode (High/Low CDS), set to -1 to use value in raw file.

 # Parameters for plotting
 skip_plots = False  # exit after writing corrected files
 pulse_idx_preview = 3  # pulse index to preview. The following even/odd pulse index is used for preview. # TODO: update to pulseId preview.


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id:6e9730d8-3908-41d7-abe2-d78e046d5de2 tags:

 ``` python
 import datetime
 import warnings
 from functools import partial

 import h5py
 import pasha as psh
 import numpy as np
 import matplotlib.pyplot as plt
 from IPython.display import Markdown, display
 from extra_data import RunDirectory, H5File
 from pathlib import Path

 from cal_tools import h5_copy_except
 from cal_tools.gotthard2 import gotthard2algs, gotthard2lib
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    get_constant_from_db_and_time,
    get_dir_creation_date,
    get_pdu_from_db,
    CalibrationMetadata,
 )
 from iCalibrationDB import Conditions, Constants
 from XFELDetAna.plotting.heatmap import heatmapPlot

 warnings.filterwarnings('ignore')

 %matplotlib inline
 ```

 %% Cell type:code id:d7c02c48-4429-42ea-a42e-de45366d7fa3 tags:

 ``` python
 in_folder = Path(in_folder)
 run_folder = in_folder / f"r{run:04d}"
 out_folder = Path(out_folder)
 out_folder.mkdir(parents=True, exist_ok=True)

 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # NOTE: this notebook will not overwrite calibration metadata file
 const_yaml = metadata.get("retrieved-constants", {})

 if not karabo_id_control:
    karabo_id_control = karabo_id

 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 ctrl_src = ctrl_source_template.format(karabo_id_control, control_template)

 print(f"Process modules: {karabo_da} for run {run}")

 creation_time = None
 if overwrite_creation_time:
    creation_time = datetime.datetime.strptime(
        overwrite_creation_time, "%Y-%m-%d %H:%M:%S.%f"
    )
 elif use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time} as creation time")
 ```

 %% Cell type:code id:b5eb816e-b5f2-44ce-9907-0273d82341b6 tags:

 ``` python
 # Select only sequence files to process for the selected detector.
 if sequences == [-1]:
    possible_patterns = list(f"*{mod}*.h5" for mod in karabo_da)
 else:
    possible_patterns = list(
        f"*{mod}-S{s:05d}.h5" for mod in karabo_da for s in sequences
    )

 run_folder = Path(in_folder / f"r{run:04d}")
 seq_files = [
    f for f in run_folder.glob("*.h5") if any(f.match(p) for p in possible_patterns)
 ]

 seq_files = sorted(seq_files)

 if not seq_files:
    raise IndexError("No sequence files available for the selected sequences.")

 print(f"Processing a total of {len(seq_files)} sequence files")
 ```

 %% Cell type:code id:f9a8d1eb-ce6a-4ed0-abf4-4a6029734672 tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id:892172d8 tags:

 ``` python
 # Read slow data
 run_dc = RunDirectory(run_folder)
 g2ctrl = gotthard2lib.Gotthard2Ctrl(run_dc=run_dc, ctrl_src=ctrl_src)

 if bias_voltage == -1:
    bias_voltage = g2ctrl.get_bias_voltage()
 if exposure_time == -1:
    exposure_time = g2ctrl.get_exposure_time()
 if exposure_period == -1:
    exposure_period = g2ctrl.get_exposure_period()
 if acquisition_rate == -1:
    acquisition_rate = g2ctrl.get_acquisition_rate()
 if single_photon == -1:
    single_photon = g2ctrl.get_single_photon()

 print("Bias Voltage:", bias_voltage)
 print("Exposure Time:", exposure_time)
 print("Exposure Period:", exposure_period)
 print("Acquisition Rate:", acquisition_rate)
 print("Single Photon:", single_photon)
 ```

 %% Cell type:markdown id:8c852392-bb19-4c40-b2ce-3b787538a92d tags:

 ### Retrieving calibration constants

 %% Cell type:code id:5717d722 tags:

 ``` python
 # Used for old FXE (p003225) runs before adding Gotthard2 to CALCAT
 const_data = dict()
 if constants_file:
    for mod in karabo_da:
        const_data[mod] = dict()
        # load constants temporarily using defined local paths.
        with h5py.File(constants_file, "r") as cfile:
            const_data[mod]["LUT"] = cfile["LUT"][()]
            const_data[mod]["Offset"] = cfile["offset_map"][()].astype(np.float32)
            const_data[mod]["RelativeGain"] = cfile["gain_map"][()].astype(np.float32)
            const_data[mod]["Mask"] = cfile["bpix_ff"][()].astype(np.uint32)
 ```

 %% Cell type:code id:1cdbe818 tags:

 ``` python
 # Conditions iCalibrationDB object.
 condition = Conditions.Dark.Gotthard2(
    bias_voltage=bias_voltage,
    exposure_time=exposure_time,
    exposure_period=exposure_period,
    single_photon=single_photon,
    acquisition_rate=acquisition_rate,
 )

 # TODO: Maybe this condition and previous cell can be removed later after the initial phase.
 if not constants_file:
    # Prepare a dictionary of empty constants to loop on
    # it's keys and initiate non-retrieved constants.
    empty_lut = (np.arange(2 ** 12).astype(np.float64) * 2 ** 10 / 2 ** 12).astype(
        np.uint16
    )
    empty_lut = np.stack(1280 * [np.stack([empty_lut] * 2)], axis=0)
    empty_constants = {
        "LUT": empty_lut,
        "Offset": np.zeros((1280, 2, 3), dtype=np.float32),
        "BadPixelsDark": np.zeros((1280, 2, 3), dtype=np.uint32),
        "RelativeGain": np.ones((1280, 2, 3), dtype=np.float32),
        "BadPixelsFF": np.zeros((1280, 2, 3), dtype=np.uint32),
    }

    for mod in karabo_da:
        const_data[mod] = dict()
        # Only used for printing timestamps within the loop.
        when = dict()
        # Check YAML file for constant metadata of file path and creation-time
        if const_yaml:
            for cname, mdata in const_yaml[mod]["constants"].items():
                const_data[mod][cname] = dict()
                when[cname] = mdata["creation-time"]
                if when[cname]:
                    with h5py.File(mdata["file-path"], "r") as cf:
                        const_data[mod][cname] = np.copy(
                            cf[f"{mdata['dataset-name']}/data"]
                        )
                else:
                    const_data[mod][cname] = empty_constants[cname]
        else:  # Retrieve constants from CALCAT. Missing YAML file or running notebook interactively.
            for cname, cempty in empty_constants.items():
                const_data[mod][cname] = dict()
                const_data[mod][cname], when[cname] = get_constant_from_db_and_time(
                    karabo_id=karabo_id,
                    karabo_da=mod,
                    cal_db_interface=cal_db_interface,
                    creation_time=creation_time,
                    timeout=cal_db_timeout,
                    print_once=False,
                    condition=condition,
                    constant=getattr(Constants.Gotthard2, cname)(),
                    empty_constant=cempty,
                )
        bpix = const_data[mod]["BadPixelsDark"]
        bpix |= const_data[mod]["BadPixelsFF"]
        const_data[mod]["Mask"] = bpix

        # Print timestamps for the retrieved constants.
        print(f"Constants for module {mod}:")
        for cname, ctime in when.items():
            print(f"  {cname} injected at {ctime}")
        del when
 ```

 %% Cell type:code id:23fcf7f4-351a-4df7-8829-d8497d94fecc tags:

 ``` python
 context = psh.ProcessContext(num_workers=23)
 ```

 %% Cell type:code id:daecd662-26d2-4cb8-aa70-383a579cf9f9 tags:

 ``` python
 def correct_train(wid, index, d):
    g = gain[index]
    gotthard2algs.convert_to_10bit(d, const_data[mod]["LUT"], data_corr[index, ...])
    gotthard2algs.correct_train(
        data_corr[index, ...],
        mask[index, ...],
        g,
        const_data[mod]["Offset"],
-        const_data[mod]["RelativeGain"],
+        const_data[mod]["RelativeGain"].astype(np.float32, copy=False),
        const_data[mod]["Mask"],
        apply_offset=offset_correction,
        apply_gain=gain_correction,
    )
 ```

 %% Cell type:code id:f88c1aa6-a735-4b72-adce-b30162f5daea tags:

 ``` python
 for mod in karabo_da:
    # This is used in case receiver template consists of
    # karabo data aggregator index. e.g. detector at DETLAB
    instr_mod_src = instrument_src.format(mod[-2:])
    data_path = "INSTRUMENT/" + instr_mod_src + "/data"
    for raw_file in seq_files:
        step_timer.start()

        dc = H5File(raw_file)
        out_file = out_folder / raw_file.name.replace("RAW", "CORR")

        # Select module INSTRUMENT source and deselect empty trains.
        dc = dc.select(instr_mod_src, require_all=True)
        data = dc[instr_mod_src, "data.adc"].ndarray()
        gain = dc[instr_mod_src, "data.gain"].ndarray()
        step_timer.done_step("preparing raw data")
        dshape = data.shape

        step_timer.start()

        # Allocate shared arrays.
        data_corr = context.alloc(shape=dshape, dtype=np.float32)
        mask = context.alloc(shape=dshape, dtype=np.uint32)
        context.map(correct_train, data)
        step_timer.done_step("Correcting one sequence file")

        step_timer.start()

        # Provided PSI gain map has 0 values. Set inf values to nan.
        # TODO: This can maybe be removed after creating XFEL gain maps.?
        data_corr[np.isinf(data_corr)] = np.nan
        # Create CORR files and add corrected data sources.
        # Exclude raw data images (data/adc)
        with h5py.File(out_file, "w") as ofile:
            # Copy RAW non-calibrated sources.
            with h5py.File(raw_file, "r") as sfile:
                h5_copy_except.h5_copy_except_paths(sfile, ofile, [f"{data_path}/adc"])
            # Create datasets with the available corrected data
            ddset = ofile.create_dataset(
                f"{data_path}/adc",
                data=data_corr,
                chunks=((1,) + dshape[1:]),  # 1 chunk == 1 image
                dtype=np.float32,
            )
            # Create datasets with the available corrected data
            ddset = ofile.create_dataset(
                f"{data_path}/mask",
                data=mask,
                chunks=((1,) + dshape[1:]),  # 1 chunk == 1 image
                dtype=np.uint32,
                compression="gzip",
                compression_opts=1,
                shuffle=True,
            )
        step_timer.done_step("Storing data")
 ```

 %% Cell type:code id:94b8e4d2-9f8c-4c23-a509-39238dd8435c tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id:0ccc7f7e-2a3f-4ac0-b854-7d505410d2fd tags:

 ``` python
 if skip_plots:
    print("Skipping plots")
    import sys

    sys.exit(0)
 ```

 %% Cell type:code id:ff203f77-3811-46f3-bf7d-226d2dcab13f tags:

 ``` python
 mod_dcs = {}
 first_seq_raw = seq_files[0]
 first_seq_corr = out_folder / first_seq_raw.name.replace("RAW", "CORR")
 for mod in karabo_da:
    mod_dcs[mod] = {}
    with H5File(first_seq_corr) as out_dc:
        tid, mod_dcs[mod]["train_corr_data"] = next(
            out_dc[instr_mod_src, "data.adc"].trains()
        )
    with H5File(first_seq_raw) as in_dc:
        train_dict = in_dc.train_from_id(tid)[1][instr_mod_src]
        mod_dcs[mod]["train_raw_data"] = train_dict["data.adc"]
        mod_dcs[mod]["train_raw_gain"] = train_dict["data.gain"]
 ```

 %% Cell type:code id:494b453a tags:

 ``` python
 # Keep as long as it is essential to correct
 # RAW data (FXE p003225) before the data mapping was added to CALCAT.
 try:
    db_modules = get_pdu_from_db(
        karabo_id=karabo_id,
        karabo_da=karabo_da,
        constant=Constants.jungfrau.Offset(),
        condition=condition,
        cal_db_interface=cal_db_interface,
        snapshot_at=creation_time,
    )
 except RuntimeError:
    print(
        "No Physical detector units found for this"
        " detector mapping at the RAW data creation time."
    )
    db_modules = [None] * len(karabo_da)
 ```

 %% Cell type:code id:1b379438-eb1d-42b2-ac83-eb8cf88c46db tags:

 ``` python
 display(Markdown("### Mean RAW and CORRECTED across pulses for one train:"))
 display(Markdown(f"Train: {tid}"))

 step_timer.start()
 for mod, pdu in zip(karabo_da, db_modules):

    fig, ax = plt.subplots(figsize=(20, 10))
    raw_data = mod_dcs[mod]["train_raw_data"]
    im = ax.plot(np.mean(raw_data, axis=0))
    ax.set_title(f"RAW module {mod} ({pdu})")
    ax.set_xlabel("Strip #", size=20)
    ax.set_ylabel("12-bit ADC output", size=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    pass

    fig, ax = plt.subplots(figsize=(20, 10))
    corr_data = mod_dcs[mod]["train_corr_data"]
    im = ax.plot(np.mean(corr_data, axis=0))
    ax.set_title(f"CORRECTED module {mod} ({pdu})")
    ax.set_xlabel("Strip #", size=20)
    ax.set_ylabel("10-bit KeV. output", size=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    pass
 step_timer.done_step("Plotting mean data")
 ```

 %% Cell type:code id:58a6a276 tags:

 ``` python
 display(Markdown(f"### RAW and CORRECTED strips across pulses for train {tid}"))

 step_timer.start()
 for mod, pdu in zip(karabo_da, db_modules):
    for plt_data, dname in zip(
        ["train_raw_data", "train_corr_data"], ["RAW", "CORRECTED"]
    ):
        fig, ax = plt.subplots(figsize=(15, 20))
        plt.rcParams.update({"font.size": 20})

        heatmapPlot(
            mod_dcs[mod][plt_data],
            y_label="Pulses",
            x_label="Strips",
            title=f"{dname} module {mod} ({pdu})",
            use_axis=ax,
        )
        pass
 step_timer.done_step("Plotting RAW and CORRECTED data for one train")
 ```

 %% Cell type:code id:cd8f5e08-fcee-4bff-ba63-6452b3d892a2 tags:

 ``` python
 # Validate given "pulse_idx_preview"

 if pulse_idx_preview + 1 > data.shape[1]:
    print(
        f"WARNING: selected pulse_idx_preview {pulse_idx_preview} is not available in data."
        " Previewing 1st pulse."
    )
    pulse_idx_preview = 1

 if data.shape[1] == 1:
    odd_pulse = 1
    even_pulse = None
 else:
    odd_pulse = pulse_idx_preview if pulse_idx_preview % 2 else pulse_idx_preview + 1
    even_pulse = (
        pulse_idx_preview if not (pulse_idx_preview % 2) else pulse_idx_preview + 1
    )

 if pulse_idx_preview + 1 > data.shape[1]:
    pulse_idx_preview = 1
    if data.shape[1] > 1:
        pulse_idx_preview = 2
 ```

 %% Cell type:code id:e5f0d4d8-e32c-4f2c-8469-4ebbfd3f644c tags:

 ``` python
 display(Markdown("### RAW and CORRECTED even/odd pulses for one train:"))
 display(Markdown(f"Train: {tid}"))
 for mod, pdu in zip(karabo_da, db_modules):
    fig, ax = plt.subplots(figsize=(20, 20))
    raw_data = mod_dcs[mod]["train_raw_data"]
    corr_data = mod_dcs[mod]["train_corr_data"]

    ax.plot(raw_data[odd_pulse], label=f"Odd Pulse {odd_pulse}")
    if even_pulse:
        ax.plot(raw_data[even_pulse], label=f"Even Pulse {even_pulse}")

    ax.set_title(f"RAW module {mod} ({pdu})")
    ax.set_xlabel("Strip #", size=20)
    ax.set_ylabel("12-bit ADC RAW", size=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    ax.legend()
    pass

    fig, ax = plt.subplots(figsize=(20, 20))
    ax.plot(corr_data[odd_pulse], label=f"Odd Pulse {odd_pulse}")
    if even_pulse:
        ax.plot(corr_data[even_pulse], label=f"Even Pulse {even_pulse}")
    ax.set_title(f"CORRECTED module {mod} ({pdu})")
    ax.set_xlabel("Strip #", size=20)
    ax.set_ylabel("10-bit KeV CORRECTED", size=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    ax.legend()
    pass
 step_timer.done_step("Plotting RAW and CORRECTED odd/even pulses.")
 ```

--- a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Jungfrau Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the Jungfrau Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202130/p900204/raw"  # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/remove"  # the folder to output to, required
 run = 91  # run to process, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1]  # sequences to correct, set to [-1] for all, range allowed
 sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel

 # Parameters used to access raw data.
 karabo_id = "SPB_IRDA_JF4M"  # karabo prefix of Jungfrau devices
 karabo_da = ['JNGFR01', 'JNGFR02', 'JNGFR03', 'JNGFR04', 'JNGFR05', 'JNGFR06', 'JNGFR07', 'JNGFR08']  # data aggregators
 receiver_template = "JNGFR{:02d}"  # Detector receiver template for accessing raw data files. e.g. "JNGFR{:02d}"
 instrument_source_template = '{}/DET/{}:daqOutput'  # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = '{}/DET/CONTROL'  # template for control source name (filled with karabo_id_control)
 karabo_id_control = ""  # if control is on a different ID, set to empty string if it is the same a karabo-id

 # Parameters for calibration database.
 use_dir_creation_date = True  # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8017#8025" # the database interface to use
 cal_db_timeout = 180000  # timeout on caldb requests

 # Parameters affecting corrected data.
 relative_gain = True  # do relative gain correction.
 strixel_sensor = False  # reordering for strixel detector layout.
 strixel_double_norm = 2.0  # normalization to use for double-size pixels, only applied for strixel sensors.
 limit_trains = 0  # ONLY FOR TESTING. process only first N trains, Use 0 to process all.
 chunks_ids = 32  # HDF chunk size for memoryCell and frameNumber.
 chunks_data = 1  # HDF chunk size for pixel data in number of frames.

 # Parameters for retrieving calibration constants
 manual_slow_data = False  # if true, use manually entered bias_voltage, integration_time, gain_setting, and gain_mode values
 integration_time = 4.96  # integration time in us, will be overwritten by value in file
 gain_setting = 0  # 0 for dynamic gain, 1 for dynamic HG0, will be overwritten by value in file
 gain_mode = 0  # 0 for runs with dynamic gain setting, 1 for fixgain. It will be overwritten by value in file, if manual_slow_data is set to True.
 mem_cells = -1  # Set mem_cells to -1 to automatically use the value stored in RAW data.
 bias_voltage = 180  # will be overwritten by value in file

 # Parameters for plotting
 skip_plots = False  # exit after writing corrected files
 plot_trains = 500  # Number of trains to plot for RAW and CORRECTED plots. Set to -1 to automatically plot all trains.
 cell_id_preview = 15  # cell Id used for preview in single-shot plots

 # Parameters for ROI selection and reduction
 roi_definitions = [-1]  # List with groups of 6 values defining ROIs, e.g. [3, 120, 180, 200, 550, -2] for module 3 (JNGFR03), slice 120:180, 200:550, average along axis -2 (slow scan, or -1 for fast scan)

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import multiprocessing
 import sys
 import warnings
 from functools import partial
 from logging import warning
 from pathlib import Path

 import h5py
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
 import pasha as psh
 import tabulate
 from IPython.display import Latex, Markdown, display
 from extra_data import H5File, RunDirectory, by_id, components
 from extra_geom import JUNGFRAUGeometry
 from matplotlib.colors import LogNorm

 from cal_tools import h5_copy_except
 from cal_tools.jungfraulib import JungfrauCtrl
 from cal_tools.enums import BadPixels
 from cal_tools.files import DataFile
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    get_constant_from_db_and_time,
    get_dir_creation_date,
    get_pdu_from_db,
    map_seq_files,
    CalibrationMetadata,
 )
 from iCalibrationDB import Conditions, Constants

 warnings.filterwarnings('ignore')

 matplotlib.use('agg')
 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_folder = in_folder / f'r{run:04d}'
 run_dc = RunDirectory(run_folder)
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)

 out_folder.mkdir(parents=True, exist_ok=True)

 print(f"Run is: {run}")
 print(f"Instrument H5File source: {instrument_src}")
 print(f"Process modules: {karabo_da}")

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time} as creation time")

 if karabo_id_control == "":
    karabo_id_control = karabo_id

 if any(axis_no not in {-2, -1, 2, 3} for axis_no in roi_definitions[5::6]):
    print("ROI averaging must be on axis 2/3 (or equivalently -2/-1). "
          f"Axis numbers given: {roi_definitions[5::6]}")
    sys.exit(1)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read available sequence files to correct.
 mapped_files, num_seq_files = map_seq_files(
    run_folder, karabo_da, sequences)

 if not len(mapped_files):
    raise IndexError(
        "No sequence files available to correct for the selected sequences and karabo_da.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Processing a total of {num_seq_files} sequence files")
 table = []
 fi = 0
 for kda, sfiles in mapped_files.items():
    for k, f in enumerate(sfiles):
        if k == 0:
            table.append((fi, kda, k, f))
        else:
            table.append((fi, "", k,  f))
        fi += 1
 md = display(Latex(tabulate.tabulate(
    table, tablefmt='latex',
    headers=["#", "module", "# module", "file"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 ctrl_src = ctrl_source_template.format(karabo_id_control)
 ctrl_data = JungfrauCtrl(run_dc, ctrl_src)

 if mem_cells < 0:
    memory_cells, sc_start = ctrl_data.get_memory_cells()

    mem_cells_name = "single cell" if memory_cells == 1 else "burst"
    print(f"Run is in {mem_cells_name} mode.\nStorage cell start: {sc_start:02d}")
 else:
    memory_cells = mem_cells
    mem_cells_name = "single cell" if memory_cells == 1 else "burst"
    print(f"Run is in manually set to {mem_cells_name} mode. With {memory_cells} memory cells")

 if not manual_slow_data:
    integration_time = ctrl_data.get_integration_time()
    bias_voltage = ctrl_data.get_bias_voltage()
    gain_setting = ctrl_data.get_gain_setting()
    gain_mode = ctrl_data.get_gain_mode()

 print(f"Integration time is {integration_time} us")
 print(f"Gain setting is {gain_setting} (run settings: {ctrl_data.run_settings})")
 print(f"Gain mode is {gain_mode} ({ctrl_data.run_mode})")
 print(f"Bias voltage is {bias_voltage} V")
 print(f"Number of memory cells are {memory_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 if strixel_sensor:
    from cal_tools.jfstrixel import STRIXEL_SHAPE as strixel_frame_shape, double_pixel_indices, to_strixel
    Ydouble, Xdouble = double_pixel_indices()
    print('Strixel sensor transformation enabled')
 ```

 %% Cell type:markdown id: tags:

 ### Retrieving calibration constants ###

 %% Cell type:code id: tags:

 ``` python
 condition = Conditions.Dark.jungfrau(
    memory_cells=memory_cells,
    bias_voltage=bias_voltage,
    integration_time=integration_time,
    gain_setting=gain_setting,
    gain_mode=gain_mode,
 )

 empty_constants = {
    "Offset": np.zeros((512, 1024, memory_cells, 3), dtype=np.float32),
    "BadPixelsDark": np.zeros((512, 1024, memory_cells, 3), dtype=np.uint32),
    "RelativeGain": None,
    "BadPixelsFF": None,
 }
 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # NOTE: this notebook will not overwrite calibration metadata file
 const_yaml = metadata.get("retrieved-constants", {})

 def get_constants_for_module(karabo_da: str):
    """ Get calibration constants for given module of Jungfrau

    :return:
        offset_map (offset map),
        mask (mask of bad pixels),
        gain_map (map of relative gain factors),
        db_module (name of DB module),
        when (dictionary: constant - creation time)
    """

    when = dict()
    const_data = dict()

    if const_yaml:
        for cname, mdata in const_yaml[karabo_da]["constants"].items():
            const_data[cname] = dict()
            when[cname] = mdata["creation-time"]
            if when[cname]:
                with h5py.File(mdata["file-path"], "r") as cf:
                    const_data[cname] = np.copy(
                        cf[f"{mdata['dataset-name']}/data"])
            else:
                const_data[cname] = empty_constants[cname]
    else:
        retrieval_function = partial(
            get_constant_from_db_and_time,
            karabo_id=karabo_id,
            karabo_da=karabo_da,
            cal_db_interface=cal_db_interface,
            creation_time=creation_time,
            timeout=cal_db_timeout,
            print_once=False,
        )

        for cname, cempty in empty_constants.items():
            const_data[cname], when[cname] = retrieval_function(
                condition=condition,
                constant=getattr(Constants.jungfrau, cname)(),
                empty_constant=cempty,
            )

    offset_map = const_data["Offset"]
    mask = const_data["BadPixelsDark"]
    gain_map = const_data["RelativeGain"]
    mask_ff = const_data["BadPixelsFF"]

    # Combine masks
    if mask_ff is not None:
        mask |= np.moveaxis(mask_ff, 0, 1)

    if memory_cells > 1:
        # move from x, y, cell, gain to cell, x, y, gain
        offset_map = np.moveaxis(offset_map, [0, 1], [1, 2])
        mask = np.moveaxis(mask, [0, 1], [1, 2])
    else:
        offset_map = np.squeeze(offset_map)
        mask = np.squeeze(mask)

    # masking double size pixels
    mask[..., [255, 256], :, :] |= BadPixels.NON_STANDARD_SIZE
    mask[..., [255, 256, 511, 512, 767, 768], :] |= BadPixels.NON_STANDARD_SIZE

    if gain_map is not None:
        if memory_cells > 1:
            gain_map = np.moveaxis(gain_map, [0, 2], [2, 0])
            # add extra empty cell constant
            b = np.ones(((1,)+gain_map.shape[1:]))
            gain_map = np.concatenate((gain_map, b), axis=0)
        else:
            gain_map = np.moveaxis(np.squeeze(gain_map), 1, 0)

    return offset_map, mask, gain_map, karabo_da, when

 with multiprocessing.Pool() as pool:
    r = pool.map(get_constants_for_module, karabo_da)

 # Print timestamps for the retrieved constants.
 constants = {}
 for offset_map, mask, gain_map, k_da, when in r:
    print(f'Constants for module {k_da}:')
    for const in when:
        print(f'  {const} injected at {when[const]}')

    if gain_map is None:
        print("No gain map found")
        relative_gain = False

    constants[k_da] = (offset_map, mask, gain_map)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Correct a chunk of images for offset and gain
 def correct_train(wid, index, d):
    d = d.astype(np.float32)  # [cells, x, y]
    g = gain[index]

    # Copy gain over first to keep it at the original 3 for low gain.
    if strixel_sensor:
        to_strixel(g, out=gain_corr[index, ...])
    else:
        gain_corr[index, ...] = g

    # Jungfrau gains 0[00], 1[01], 3[11]
    # Change low gain to 2 for indexing purposes.
    g[g==3] = 2

    # Select memory cells
    if memory_cells > 1:
        """
        Even though it is correct to assume that memory cells pattern
        can be the same across all trains (for one correction run
        taken with one acquisition), it is preferred to not assume
        this to account for exceptions that can happen.
        """
        m = memcells[index].copy()
        # 255 is a cell value pointing to no cell image data (image of 0 pixels).
        # Corresponding image will be corrected with constant of cell 0. To avoid values of 0.
        # This line is depending on not storing the modified memory cells in the corrected data.
        m[m==255] = 0

        offset_map_cell = offset_map[m, ...]  # [16 + empty cell, x, y]
        mask_cell = mask[m, ...]
    else:
        offset_map_cell = offset_map
        mask_cell = mask

    # Offset correction
    offset = np.choose(g, np.moveaxis(offset_map_cell, -1, 0))

    d -= offset

    # Gain correction
    if relative_gain:
        if memory_cells > 1:
            gain_map_cell = gain_map[m, ...]
        else:
            gain_map_cell = gain_map
        cal = np.choose(g, np.moveaxis(gain_map_cell, -1, 0))
        d /= cal

    msk = np.choose(g, np.moveaxis(mask_cell, -1, 0))

    if strixel_sensor:
        to_strixel(d, out=data_corr[index, ...])
        data_corr[index, :, Ydouble, Xdouble] /= strixel_double_norm
        to_strixel(msk, out=mask_corr[index, ...])
    else:
        data_corr[index, ...] = d
        mask_corr[index, ...] = msk
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()

 n_cpus = multiprocessing.cpu_count()
 context = psh.context.ProcessContext(num_workers=n_cpus)
 print(f"Using {n_cpus} workers for correction.")
 ```

 %% Cell type:code id: tags:

 ``` python
 def save_reduced_rois(ofile, data_corr, mask_corr, karabo_da):
    """If ROIs are defined for this karabo_da, reduce them and save to the output file"""
    rois_defined = 0
    module_no = int(karabo_da[-2:])
    params_source = f'{karabo_id}/ROIPROC/{karabo_da}'
    rois_source = f'{params_source}:output'
    if roi_definitions != [-1]:
        # Create Instrument and Control sections to later add datasets.
        outp_source = ofile.create_instrument_source(rois_source)
        ctrl_source = ofile.create_control_source(params_source)
    for i in range(len(roi_definitions) // 6):
        roi_module, a1, a2, b1, b2, mean_axis = roi_definitions[i*6 : (i+1)*6]
        if roi_module == module_no:
            rois_defined += 1
            # Apply the mask and average remaining pixels to 1D
            roi_data = data_corr[..., a1:a2, b1:b2].mean(
                axis=mean_axis, where=(mask_corr[..., a1:a2, b1:b2] == 0)
            )

            # Add roi corrected datasets
            outp_source.create_key(f'data.roi{rois_defined}.data', data=roi_data)

            # Add roi run control datasets.
            ctrl_source.create_run_key(f'roi{rois_defined}.region', np.array([[a1, a2, b1, b2]]))
            ctrl_source.create_run_key(f'roi{rois_defined}.reduce_axis', np.array([mean_axis]))

    if rois_defined:
        # Copy the index for the new source
        # Create count/first datasets at INDEX source.
        ofile.copy(f'INDEX/{karabo_id}/DET/{karabo_da}:daqOutput/data',
                   f'INDEX/{rois_source}/data')
        ntrains = ofile['INDEX/trainId'].shape[0]
        ctrl_source.create_index(ntrains)
 ```

 %% Cell type:markdown id: tags:

 ### Correcting RAW data ###

 %% Cell type:code id: tags:

 ``` python
 # Loop over modules
 empty_seq = 0
 for local_karabo_da, mapped_files_module in mapped_files.items():
    instrument_src_kda = instrument_src.format(int(local_karabo_da[-2:]))

    for sequence_file in mapped_files_module:
        # Save corrected data in an output file with name
        # of corresponding raw sequence file.
        ofile_name = sequence_file.name.replace("RAW", "CORR")
        out_file = out_folder / ofile_name

        # Load sequence file data collection, data.adc keydata,
        # the shape for data to later created arrays of the same shape,
        # and number of available trains to correct.
        seq_dc = H5File(sequence_file)
        seq_dc_adc = seq_dc[instrument_src_kda, "data.adc"]
        ishape = seq_dc_adc.shape  # input shape.
        corr_ntrains = ishape[0]  # number of available trains to correct.
        all_train_ids = seq_dc_adc.train_ids

        # Raise a WARNING if this sequence has no trains to correct.
        # Otherwise, print number of trains with no data.
        if corr_ntrains == 0:
            warning(f"No trains to correct for {sequence_file.name}: "
                 "Skipping the processing of this file.")
            empty_seq += 1
            continue
        elif len(all_train_ids) != corr_ntrains:
            print(f"{sequence_file.name} has {len(seq_dc_adc.train_ids) - corr_ntrains} "
                  "trains with missing data.")

        # For testing, limit corrected trains. i.e. Getting output faster.
        if limit_trains > 0:
            print(f"\nCorrected trains are limited to: {limit_trains} trains")
            corr_ntrains = min(corr_ntrains, limit_trains)

        print(f"\nNumber of corrected trains are: {corr_ntrains} for {ofile_name}")

        # Load constants from the constants dictionary.
        # These arrays are used by `correct_train()` function
        offset_map, mask, gain_map = constants[local_karabo_da]

        # Determine total output shape.
        if strixel_sensor:
            oshape = (*ishape[:-2], *strixel_frame_shape)
        else:
            oshape = ishape

        # Allocate shared arrays for corrected data. Used in `correct_train()`
        data_corr = context.alloc(shape=oshape, dtype=np.float32)
        gain_corr = context.alloc(shape=oshape, dtype=np.uint8)
        mask_corr = context.alloc(shape=oshape, dtype=np.uint32)

        step_timer.start()
        # Overwrite seq_dc after eliminating empty trains or/and applying limited images.
        seq_dc = seq_dc.select(
            instrument_src_kda, "*", require_all=True).select_trains(np.s_[:corr_ntrains])

        # Load raw images(adc), gain, memcells, and frame numbers.
        data = seq_dc[instrument_src_kda, "data.adc"].ndarray()
        gain = seq_dc[instrument_src_kda, "data.gain"].ndarray()
        memcells = seq_dc[instrument_src_kda, "data.memoryCell"].ndarray()
        frame_number = seq_dc[instrument_src_kda, "data.frameNumber"].ndarray()

        # Validate that the selected cell id to preview is available in raw data.
        if memory_cells > 1:
            # For plotting, assuming that memory cells are sorted the same for all trains.
            found_cells = memcells[0] == cell_id_preview
            if any(found_cells):
                cell_idx_preview = np.where(found_cells)[0][0]
            else:
                print(f"The selected cell_id_preview {cell_id_preview} is not available in burst mode. "
                      f"Previewing cell `{memcells[0]}`.")
                cell_idx_preview = 0
        else:
            cell_idx_preview = 0

        # Correct data per train
        context.map(correct_train, data)
        step_timer.done_step(f"Correction time.")

        step_timer.start()

        # Create CORR files and add corrected data sections.
        image_counts = seq_dc[instrument_src_kda, "data.adc"].data_counts(labelled=False)

        with DataFile(out_file, 'w') as outp_file:
            # Create INDEX datasets.
            outp_file.create_index(seq_dc.train_ids, from_file=seq_dc.files[0])

            # Create Instrument section to later add corrected datasets.
            outp_source = outp_file.create_instrument_source(instrument_src_kda)

            # Create count/first datasets at INDEX source.
            outp_source.create_index(data=image_counts)

            # RAW memoryCell and frameNumber are not corrected. But we are storing only
            # the values for the corrected trains.
            outp_source.create_key(
                "data.memoryCell", data=memcells,
                chunks=(min(chunks_ids, memcells.shape[0]), 1))
            outp_source.create_key(
                "data.frameNumber", data=frame_number,
                chunks=(min(chunks_ids, frame_number.shape[0]), 1))
            # Add main corrected `data.adc`` dataset and store corrected data.
            outp_source.create_key(
                "data.adc", data=data_corr,
                chunks=(min(chunks_data, data_corr.shape[0]), *oshape[1:]))
            outp_source.create_compressed_key(
                "data.gain", data=gain_corr)
            outp_source.create_compressed_key(
                "data.mask", data=mask_corr)

            # Temporary hotfix for FXE assuming this dataset is in corrected files.
            outp_source.create_key(
                "data.trainId", data=seq_dc.train_ids,
                chunks=(min(50, len(seq_dc.train_ids))))

            save_reduced_rois(outp_file, data_corr, mask_corr, local_karabo_da)

            # Create METDATA datasets
            outp_file.create_metadata(like=seq_dc)

        step_timer.done_step(f'Saving data time.')
 if empty_seq == sum([len(i) for i in mapped_files.values()]):
    warning("No valid trains for RAW data to correct.")
    sys.exit(0)
 ```

 %% Cell type:markdown id: tags:

 ### Processing time summary ###

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print('Skipping plots')
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Positions are given in pixels
 mod_width = (256 * 4) + (2 * 3)  # inc. 2px gaps between tiles
 mod_height = (256 * 2) + 2
 if karabo_id == "SPB_IRDA_JF4M":
    # The first 4 modules are rotated 180 degrees relative to the others.
    # We pass the bottom, beam-right corner of the module regardless of its
    # orientation, requiring a subtraction from the symmetric positions we'd
    # otherwise calculate.
    x_start, y_start = 1125, 1078
    module_pos = [
        (x_start - mod_width, y_start - mod_height - (i * (mod_height + 33)))
        for i in range(4)
    ] + [
        (-x_start, -y_start + (i * (mod_height + 33))) for i in range(4)
    ]
    orientations = [(-1, -1) for _ in range(4)] + [(1, 1) for _ in range(4)]
 elif karabo_id == "FXE_XAD_JF1M":
    module_pos = ((-mod_width//2, 33),(-mod_width//2, -mod_height -33))
    orientations = [(-1,-1), (1,1)]
 else:
    module_pos = ((-mod_width//2,-mod_height//2),)
    orientations = None

 geom = JUNGFRAUGeometry.from_module_positions(module_pos, orientations=orientations, asic_gap=0)
 ```

 %% Cell type:code id: tags:

 ``` python
 first_seq = 0 if sequences == [-1] else sequences[0]

 with RunDirectory(out_folder, f"*{run}*S{first_seq:05d}*") as corr_dc:
    # Reading CORR data for plotting.
    jf_corr = components.JUNGFRAU(
        corr_dc,
        detector_name=karabo_id,
    ).select_trains(np.s_[:plot_trains])
    tid, jf_corr_data = next(iter(jf_corr.trains(require_all=True)))

 # Shape = [modules, trains, cells, x, y]
 # TODO: Fix the case if not all modules were requested to be corrected.
 # For example if only one modules was corrected. An assertion error is expected
 # at `geom.plot_data_fast`, while plotting corrected images.
 corrected = jf_corr.get_array("data.adc")[:, :, cell_idx_preview, ...].values
 corrected_train = jf_corr_data["data.adc"][
    :, cell_idx_preview, ...
 ].values  # loose the train axis.

 mask = jf_corr.get_array("data.mask")[:, :, cell_idx_preview, ...].values
 mask_train = jf_corr_data["data.mask"][:, cell_idx_preview, ...].values

-with RunDirectory(f"{in_folder}/r{run:04d}/", f"*S{first_seq:05d}*") as raw_dc:
+with RunDirectory(f"{in_folder}/r{run:04d}/", f"*S{first_seq:05d}*", _use_voview=False) as raw_dc:

    # Reading RAW data for plotting.
    jf_raw = components.JUNGFRAU(raw_dc, detector_name=karabo_id).select_trains(
        np.s_[:plot_trains]
    )

 raw = jf_raw.get_array("data.adc")[:, :, cell_idx_preview, ...].values
 raw_train = (
    jf_raw.select_trains(by_id[[tid]])
    .get_array("data.adc")[:, 0, cell_idx_preview, ...]
    .values
 )

 gain = jf_raw.get_array("data.gain")[:, :, cell_idx_preview, ...].values
 gain_train_cells = (
    jf_raw.select_trains(by_id[[tid]]).get_array("data.gain")[:, :, :, ...].values
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 db_modules = get_pdu_from_db(
    karabo_id=karabo_id,
    karabo_da=karabo_da,
    constant=Constants.jungfrau.Offset(),
    condition=condition,
    cal_db_interface=cal_db_interface,
    snapshot_at=creation_time,
 )
 ```

 %% Cell type:markdown id: tags:

 ### Mean RAW Preview

 %% Cell type:code id: tags:

 ``` python
 print(f"The per pixel mean of the first {raw.shape[1]} trains of the first sequence file")

 fig, ax = plt.subplots(figsize=(18, 10))
 raw_mean = np.mean(raw, axis=1)
 geom.plot_data_fast(
    raw_mean,
    ax=ax,
    vmin=min(0.75*np.median(raw_mean[raw_mean > 0]), 2000),
    vmax=max(1.5*np.median(raw_mean[raw_mean > 0]), 16000),
    cmap="jet",
    colorbar={'shrink': 1, 'pad': 0.01},
 )
 ax.set_title(f'{karabo_id} - Mean RAW', size=18)
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ### Mean CORRECTED Preview

 %% Cell type:code id: tags:

 ``` python
 print(f"The per pixel mean of the first {corrected.shape[1]} trains of the first sequence file")

 fig, ax = plt.subplots(figsize=(18, 10))
 corrected_mean = np.mean(corrected, axis=1)
 _corrected_vmin = min(0.75*np.median(corrected_mean[corrected_mean > 0]), -0.5)
 _corrected_vmax = max(2.*np.median(corrected_mean[corrected_mean > 0]), 100)

 mean_plot_kwargs = dict(
    vmin=_corrected_vmin, vmax=_corrected_vmax, cmap="jet"
 )

 if not strixel_sensor:
    geom.plot_data_fast(
        corrected_mean,
        ax=ax,
        colorbar={'shrink': 1, 'pad': 0.01},
        **mean_plot_kwargs
    )
 else:
    ax.imshow(corrected_mean.squeeze(), aspect=10, **mean_plot_kwargs)

 ax.set_title(f'{karabo_id} - Mean CORRECTED', size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(figsize=(18, 10))
 corrected_masked = corrected.copy()
 corrected_masked[mask != 0] = np.nan
 corrected_masked_mean = np.nanmean(corrected_masked, axis=1)
 del corrected_masked

 if not strixel_sensor:
    geom.plot_data_fast(
        corrected_masked_mean,
        ax=ax,
        colorbar={'shrink': 1, 'pad': 0.01},
        **mean_plot_kwargs
    )
 else:
    ax.imshow(corrected_mean.squeeze(), aspect=10, **mean_plot_kwargs)

 ax.set_title(f'{karabo_id} - Mean CORRECTED with mask', size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown((f"#### A single image from train {tid}")))

 fig, ax = plt.subplots(figsize=(18, 10))

 single_plot_kwargs = dict(
    vmin=min(0.75 * np.median(corrected_train[corrected_train > 0]), -0.5),
    vmax=max(2.0 * np.median(corrected_train[corrected_train > 0]), 100),
    cmap="jet"
 )

 if not strixel_sensor:
    geom.plot_data_fast(
        corrected_train,
        ax=ax,
        colorbar={"shrink": 1, "pad": 0.01},
        **single_plot_kwargs
    )
 else:
    ax.imshow(corrected_train.squeeze(), aspect=10, **single_plot_kwargs)

 ax.set_title(f"{karabo_id} - CORRECTED train: {tid}", size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis, title):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [
        np.min(edges[1]),
        np.max(edges[1]),
        np.min(edges[0]),
        np.max(edges[0]),
    ]

    im = ax.imshow(
        data[::-1, :],
        extent=extent,
        aspect="auto",
        norm=LogNorm(vmin=1, vmax=np.max(data))
    )
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_title(title)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:markdown id: tags:

 ### Gain Bit Value

 %% Cell type:code id: tags:

 ``` python
 for i, (pdu, mod) in enumerate(zip(db_modules, karabo_da)):
    h, ex, ey = np.histogram2d(
        raw[i].flatten(),
        gain[i].flatten(),
        bins=[100, 4],
        range=[[0, 10000], [0, 4]],
    )
    do_2d_plot(
        h,
        (ex, ey),
        "Signal (ADU)",
        "Gain Bit Value (high gain=0[00], medium gain=1[01], low gain=3[11])",
        f"Module {mod} ({pdu})",
    )
 ```

 %% Cell type:markdown id: tags:

 ## Signal Distribution ##

 %% Cell type:code id: tags:

 ``` python
 for i, (pdu, mod) in enumerate(zip(db_modules, karabo_da)):
    fig, axs = plt.subplots(nrows=2, ncols=1, figsize=(18, 10))
    corrected_flatten = corrected[i].flatten()
    for ax, hist_range in zip(axs, [(-100, 1000), (-1000, 10000)]):
        h = ax.hist(
            corrected_flatten,
            bins=1000,
            range=hist_range,
            log=True,
        )
        l = ax.set_xlabel("Signal (keV)")
        l = ax.set_ylabel("Counts")
        _ = ax.set_title(f'Module {mod} ({pdu})')
 ```

 %% Cell type:markdown id: tags:

 ### Maximum GAIN Preview

 %% Cell type:code id: tags:

 ``` python
 display(Markdown((f"#### The per pixel maximum of train {tid} of the GAIN data")))

 fig, ax = plt.subplots(figsize=(18, 10))
 gain_max = np.max(gain_train_cells, axis=(1, 2))
 geom.plot_data_fast(
    gain_max,
    ax=ax,
    cmap="jet",
    colorbar={'shrink': 1, 'pad': 0.01},
 )
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append(
        (item.name, f"{item.value:016b}"))
 md = display(Latex(tabulate.tabulate(
    table, tablefmt='latex',
    headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:markdown id: tags:

 ### Single Image Bad Pixels ###

 A single image bad pixel map for the first image of the first train

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f"#### Bad pixels image for train {tid}"))

 fig, ax = plt.subplots(figsize=(18, 10))
 if not strixel_sensor:
    geom.plot_data_fast(
        np.log2(mask_train),
        ax=ax,
        vmin=0, vmax=32, cmap="jet",
        colorbar={'shrink': 1, 'pad': 0.01},
    )
 else:
    ax.imshow(np.log2(mask_train).squeeze(), vmin=0, vmax=32, cmap='jet', aspect=10)

 plt.show()
 ```

 %% Cell type:markdown id: tags:

 # Jungfrau Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the Jungfrau Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202130/p900204/raw"  # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/remove"  # the folder to output to, required
 run = 91  # run to process, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1]  # sequences to correct, set to [-1] for all, range allowed
 sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel

 # Parameters used to access raw data.
 karabo_id = "SPB_IRDA_JF4M"  # karabo prefix of Jungfrau devices
 karabo_da = ['JNGFR01', 'JNGFR02', 'JNGFR03', 'JNGFR04', 'JNGFR05', 'JNGFR06', 'JNGFR07', 'JNGFR08']  # data aggregators
 receiver_template = "JNGFR{:02d}"  # Detector receiver template for accessing raw data files. e.g. "JNGFR{:02d}"
 instrument_source_template = '{}/DET/{}:daqOutput'  # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = '{}/DET/CONTROL'  # template for control source name (filled with karabo_id_control)
 karabo_id_control = ""  # if control is on a different ID, set to empty string if it is the same a karabo-id

 # Parameters for calibration database.
 use_dir_creation_date = True  # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8017#8025" # the database interface to use
 cal_db_timeout = 180000  # timeout on caldb requests

 # Parameters affecting corrected data.
 relative_gain = True  # do relative gain correction.
 strixel_sensor = False  # reordering for strixel detector layout.
 strixel_double_norm = 2.0  # normalization to use for double-size pixels, only applied for strixel sensors.
 limit_trains = 0  # ONLY FOR TESTING. process only first N trains, Use 0 to process all.
 chunks_ids = 32  # HDF chunk size for memoryCell and frameNumber.
 chunks_data = 1  # HDF chunk size for pixel data in number of frames.

 # Parameters for retrieving calibration constants
 manual_slow_data = False  # if true, use manually entered bias_voltage, integration_time, gain_setting, and gain_mode values
 integration_time = 4.96  # integration time in us, will be overwritten by value in file
 gain_setting = 0  # 0 for dynamic gain, 1 for dynamic HG0, will be overwritten by value in file
 gain_mode = 0  # 0 for runs with dynamic gain setting, 1 for fixgain. It will be overwritten by value in file, if manual_slow_data is set to True.
 mem_cells = -1  # Set mem_cells to -1 to automatically use the value stored in RAW data.
 bias_voltage = 180  # will be overwritten by value in file

 # Parameters for plotting
 skip_plots = False  # exit after writing corrected files
 plot_trains = 500  # Number of trains to plot for RAW and CORRECTED plots. Set to -1 to automatically plot all trains.
 cell_id_preview = 15  # cell Id used for preview in single-shot plots

 # Parameters for ROI selection and reduction
 roi_definitions = [-1]  # List with groups of 6 values defining ROIs, e.g. [3, 120, 180, 200, 550, -2] for module 3 (JNGFR03), slice 120:180, 200:550, average along axis -2 (slow scan, or -1 for fast scan)

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import multiprocessing
 import sys
 import warnings
 from functools import partial
 from logging import warning
 from pathlib import Path

 import h5py
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
 import pasha as psh
 import tabulate
 from IPython.display import Latex, Markdown, display
 from extra_data import H5File, RunDirectory, by_id, components
 from extra_geom import JUNGFRAUGeometry
 from matplotlib.colors import LogNorm

 from cal_tools import h5_copy_except
 from cal_tools.jungfraulib import JungfrauCtrl
 from cal_tools.enums import BadPixels
 from cal_tools.files import DataFile
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    get_constant_from_db_and_time,
    get_dir_creation_date,
    get_pdu_from_db,
    map_seq_files,
    CalibrationMetadata,
 )
 from iCalibrationDB import Conditions, Constants

 warnings.filterwarnings('ignore')

 matplotlib.use('agg')
 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_folder = in_folder / f'r{run:04d}'
 run_dc = RunDirectory(run_folder)
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)

 out_folder.mkdir(parents=True, exist_ok=True)

 print(f"Run is: {run}")
 print(f"Instrument H5File source: {instrument_src}")
 print(f"Process modules: {karabo_da}")

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time} as creation time")

 if karabo_id_control == "":
    karabo_id_control = karabo_id

 if any(axis_no not in {-2, -1, 2, 3} for axis_no in roi_definitions[5::6]):
    print("ROI averaging must be on axis 2/3 (or equivalently -2/-1). "
          f"Axis numbers given: {roi_definitions[5::6]}")
    sys.exit(1)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read available sequence files to correct.
 mapped_files, num_seq_files = map_seq_files(
    run_folder, karabo_da, sequences)

 if not len(mapped_files):
    raise IndexError(
        "No sequence files available to correct for the selected sequences and karabo_da.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Processing a total of {num_seq_files} sequence files")
 table = []
 fi = 0
 for kda, sfiles in mapped_files.items():
    for k, f in enumerate(sfiles):
        if k == 0:
            table.append((fi, kda, k, f))
        else:
            table.append((fi, "", k,  f))
        fi += 1
 md = display(Latex(tabulate.tabulate(
    table, tablefmt='latex',
    headers=["#", "module", "# module", "file"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 ctrl_src = ctrl_source_template.format(karabo_id_control)
 ctrl_data = JungfrauCtrl(run_dc, ctrl_src)

 if mem_cells < 0:
    memory_cells, sc_start = ctrl_data.get_memory_cells()

    mem_cells_name = "single cell" if memory_cells == 1 else "burst"
    print(f"Run is in {mem_cells_name} mode.\nStorage cell start: {sc_start:02d}")
 else:
    memory_cells = mem_cells
    mem_cells_name = "single cell" if memory_cells == 1 else "burst"
    print(f"Run is in manually set to {mem_cells_name} mode. With {memory_cells} memory cells")

 if not manual_slow_data:
    integration_time = ctrl_data.get_integration_time()
    bias_voltage = ctrl_data.get_bias_voltage()
    gain_setting = ctrl_data.get_gain_setting()
    gain_mode = ctrl_data.get_gain_mode()

 print(f"Integration time is {integration_time} us")
 print(f"Gain setting is {gain_setting} (run settings: {ctrl_data.run_settings})")
 print(f"Gain mode is {gain_mode} ({ctrl_data.run_mode})")
 print(f"Bias voltage is {bias_voltage} V")
 print(f"Number of memory cells are {memory_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 if strixel_sensor:
    from cal_tools.jfstrixel import STRIXEL_SHAPE as strixel_frame_shape, double_pixel_indices, to_strixel
    Ydouble, Xdouble = double_pixel_indices()
    print('Strixel sensor transformation enabled')
 ```

 %% Cell type:markdown id: tags:

 ### Retrieving calibration constants ###

 %% Cell type:code id: tags:

 ``` python
 condition = Conditions.Dark.jungfrau(
    memory_cells=memory_cells,
    bias_voltage=bias_voltage,
    integration_time=integration_time,
    gain_setting=gain_setting,
    gain_mode=gain_mode,
 )

 empty_constants = {
    "Offset": np.zeros((512, 1024, memory_cells, 3), dtype=np.float32),
    "BadPixelsDark": np.zeros((512, 1024, memory_cells, 3), dtype=np.uint32),
    "RelativeGain": None,
    "BadPixelsFF": None,
 }
 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # NOTE: this notebook will not overwrite calibration metadata file
 const_yaml = metadata.get("retrieved-constants", {})

 def get_constants_for_module(karabo_da: str):
    """ Get calibration constants for given module of Jungfrau

    :return:
        offset_map (offset map),
        mask (mask of bad pixels),
        gain_map (map of relative gain factors),
        db_module (name of DB module),
        when (dictionary: constant - creation time)
    """

    when = dict()
    const_data = dict()

    if const_yaml:
        for cname, mdata in const_yaml[karabo_da]["constants"].items():
            const_data[cname] = dict()
            when[cname] = mdata["creation-time"]
            if when[cname]:
                with h5py.File(mdata["file-path"], "r") as cf:
                    const_data[cname] = np.copy(
                        cf[f"{mdata['dataset-name']}/data"])
            else:
                const_data[cname] = empty_constants[cname]
    else:
        retrieval_function = partial(
            get_constant_from_db_and_time,
            karabo_id=karabo_id,
            karabo_da=karabo_da,
            cal_db_interface=cal_db_interface,
            creation_time=creation_time,
            timeout=cal_db_timeout,
            print_once=False,
        )

        for cname, cempty in empty_constants.items():
            const_data[cname], when[cname] = retrieval_function(
                condition=condition,
                constant=getattr(Constants.jungfrau, cname)(),
                empty_constant=cempty,
            )

    offset_map = const_data["Offset"]
    mask = const_data["BadPixelsDark"]
    gain_map = const_data["RelativeGain"]
    mask_ff = const_data["BadPixelsFF"]

    # Combine masks
    if mask_ff is not None:
        mask |= np.moveaxis(mask_ff, 0, 1)

    if memory_cells > 1:
        # move from x, y, cell, gain to cell, x, y, gain
        offset_map = np.moveaxis(offset_map, [0, 1], [1, 2])
        mask = np.moveaxis(mask, [0, 1], [1, 2])
    else:
        offset_map = np.squeeze(offset_map)
        mask = np.squeeze(mask)

    # masking double size pixels
    mask[..., [255, 256], :, :] |= BadPixels.NON_STANDARD_SIZE
    mask[..., [255, 256, 511, 512, 767, 768], :] |= BadPixels.NON_STANDARD_SIZE

    if gain_map is not None:
        if memory_cells > 1:
            gain_map = np.moveaxis(gain_map, [0, 2], [2, 0])
            # add extra empty cell constant
            b = np.ones(((1,)+gain_map.shape[1:]))
            gain_map = np.concatenate((gain_map, b), axis=0)
        else:
            gain_map = np.moveaxis(np.squeeze(gain_map), 1, 0)

    return offset_map, mask, gain_map, karabo_da, when

 with multiprocessing.Pool() as pool:
    r = pool.map(get_constants_for_module, karabo_da)

 # Print timestamps for the retrieved constants.
 constants = {}
 for offset_map, mask, gain_map, k_da, when in r:
    print(f'Constants for module {k_da}:')
    for const in when:
        print(f'  {const} injected at {when[const]}')

    if gain_map is None:
        print("No gain map found")
        relative_gain = False

    constants[k_da] = (offset_map, mask, gain_map)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Correct a chunk of images for offset and gain
 def correct_train(wid, index, d):
    d = d.astype(np.float32)  # [cells, x, y]
    g = gain[index]

    # Copy gain over first to keep it at the original 3 for low gain.
    if strixel_sensor:
        to_strixel(g, out=gain_corr[index, ...])
    else:
        gain_corr[index, ...] = g

    # Jungfrau gains 0[00], 1[01], 3[11]
    # Change low gain to 2 for indexing purposes.
    g[g==3] = 2

    # Select memory cells
    if memory_cells > 1:
        """
        Even though it is correct to assume that memory cells pattern
        can be the same across all trains (for one correction run
        taken with one acquisition), it is preferred to not assume
        this to account for exceptions that can happen.
        """
        m = memcells[index].copy()
        # 255 is a cell value pointing to no cell image data (image of 0 pixels).
        # Corresponding image will be corrected with constant of cell 0. To avoid values of 0.
        # This line is depending on not storing the modified memory cells in the corrected data.
        m[m==255] = 0

        offset_map_cell = offset_map[m, ...]  # [16 + empty cell, x, y]
        mask_cell = mask[m, ...]
    else:
        offset_map_cell = offset_map
        mask_cell = mask

    # Offset correction
    offset = np.choose(g, np.moveaxis(offset_map_cell, -1, 0))

    d -= offset

    # Gain correction
    if relative_gain:
        if memory_cells > 1:
            gain_map_cell = gain_map[m, ...]
        else:
            gain_map_cell = gain_map
        cal = np.choose(g, np.moveaxis(gain_map_cell, -1, 0))
        d /= cal

    msk = np.choose(g, np.moveaxis(mask_cell, -1, 0))

    if strixel_sensor:
        to_strixel(d, out=data_corr[index, ...])
        data_corr[index, :, Ydouble, Xdouble] /= strixel_double_norm
        to_strixel(msk, out=mask_corr[index, ...])
    else:
        data_corr[index, ...] = d
        mask_corr[index, ...] = msk
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()

 n_cpus = multiprocessing.cpu_count()
 context = psh.context.ProcessContext(num_workers=n_cpus)
 print(f"Using {n_cpus} workers for correction.")
 ```

 %% Cell type:code id: tags:

 ``` python
 def save_reduced_rois(ofile, data_corr, mask_corr, karabo_da):
    """If ROIs are defined for this karabo_da, reduce them and save to the output file"""
    rois_defined = 0
    module_no = int(karabo_da[-2:])
    params_source = f'{karabo_id}/ROIPROC/{karabo_da}'
    rois_source = f'{params_source}:output'
    if roi_definitions != [-1]:
        # Create Instrument and Control sections to later add datasets.
        outp_source = ofile.create_instrument_source(rois_source)
        ctrl_source = ofile.create_control_source(params_source)
    for i in range(len(roi_definitions) // 6):
        roi_module, a1, a2, b1, b2, mean_axis = roi_definitions[i*6 : (i+1)*6]
        if roi_module == module_no:
            rois_defined += 1
            # Apply the mask and average remaining pixels to 1D
            roi_data = data_corr[..., a1:a2, b1:b2].mean(
                axis=mean_axis, where=(mask_corr[..., a1:a2, b1:b2] == 0)
            )

            # Add roi corrected datasets
            outp_source.create_key(f'data.roi{rois_defined}.data', data=roi_data)

            # Add roi run control datasets.
            ctrl_source.create_run_key(f'roi{rois_defined}.region', np.array([[a1, a2, b1, b2]]))
            ctrl_source.create_run_key(f'roi{rois_defined}.reduce_axis', np.array([mean_axis]))

    if rois_defined:
        # Copy the index for the new source
        # Create count/first datasets at INDEX source.
        ofile.copy(f'INDEX/{karabo_id}/DET/{karabo_da}:daqOutput/data',
                   f'INDEX/{rois_source}/data')
        ntrains = ofile['INDEX/trainId'].shape[0]
        ctrl_source.create_index(ntrains)
 ```

 %% Cell type:markdown id: tags:

 ### Correcting RAW data ###

 %% Cell type:code id: tags:

 ``` python
 # Loop over modules
 empty_seq = 0
 for local_karabo_da, mapped_files_module in mapped_files.items():
    instrument_src_kda = instrument_src.format(int(local_karabo_da[-2:]))

    for sequence_file in mapped_files_module:
        # Save corrected data in an output file with name
        # of corresponding raw sequence file.
        ofile_name = sequence_file.name.replace("RAW", "CORR")
        out_file = out_folder / ofile_name

        # Load sequence file data collection, data.adc keydata,
        # the shape for data to later created arrays of the same shape,
        # and number of available trains to correct.
        seq_dc = H5File(sequence_file)
        seq_dc_adc = seq_dc[instrument_src_kda, "data.adc"]
        ishape = seq_dc_adc.shape  # input shape.
        corr_ntrains = ishape[0]  # number of available trains to correct.
        all_train_ids = seq_dc_adc.train_ids

        # Raise a WARNING if this sequence has no trains to correct.
        # Otherwise, print number of trains with no data.
        if corr_ntrains == 0:
            warning(f"No trains to correct for {sequence_file.name}: "
                 "Skipping the processing of this file.")
            empty_seq += 1
            continue
        elif len(all_train_ids) != corr_ntrains:
            print(f"{sequence_file.name} has {len(seq_dc_adc.train_ids) - corr_ntrains} "
                  "trains with missing data.")

        # For testing, limit corrected trains. i.e. Getting output faster.
        if limit_trains > 0:
            print(f"\nCorrected trains are limited to: {limit_trains} trains")
            corr_ntrains = min(corr_ntrains, limit_trains)

        print(f"\nNumber of corrected trains are: {corr_ntrains} for {ofile_name}")

        # Load constants from the constants dictionary.
        # These arrays are used by `correct_train()` function
        offset_map, mask, gain_map = constants[local_karabo_da]

        # Determine total output shape.
        if strixel_sensor:
            oshape = (*ishape[:-2], *strixel_frame_shape)
        else:
            oshape = ishape

        # Allocate shared arrays for corrected data. Used in `correct_train()`
        data_corr = context.alloc(shape=oshape, dtype=np.float32)
        gain_corr = context.alloc(shape=oshape, dtype=np.uint8)
        mask_corr = context.alloc(shape=oshape, dtype=np.uint32)

        step_timer.start()
        # Overwrite seq_dc after eliminating empty trains or/and applying limited images.
        seq_dc = seq_dc.select(
            instrument_src_kda, "*", require_all=True).select_trains(np.s_[:corr_ntrains])

        # Load raw images(adc), gain, memcells, and frame numbers.
        data = seq_dc[instrument_src_kda, "data.adc"].ndarray()
        gain = seq_dc[instrument_src_kda, "data.gain"].ndarray()
        memcells = seq_dc[instrument_src_kda, "data.memoryCell"].ndarray()
        frame_number = seq_dc[instrument_src_kda, "data.frameNumber"].ndarray()

        # Validate that the selected cell id to preview is available in raw data.
        if memory_cells > 1:
            # For plotting, assuming that memory cells are sorted the same for all trains.
            found_cells = memcells[0] == cell_id_preview
            if any(found_cells):
                cell_idx_preview = np.where(found_cells)[0][0]
            else:
                print(f"The selected cell_id_preview {cell_id_preview} is not available in burst mode. "
                      f"Previewing cell `{memcells[0]}`.")
                cell_idx_preview = 0
        else:
            cell_idx_preview = 0

        # Correct data per train
        context.map(correct_train, data)
        step_timer.done_step(f"Correction time.")

        step_timer.start()

        # Create CORR files and add corrected data sections.
        image_counts = seq_dc[instrument_src_kda, "data.adc"].data_counts(labelled=False)

        with DataFile(out_file, 'w') as outp_file:
            # Create INDEX datasets.
            outp_file.create_index(seq_dc.train_ids, from_file=seq_dc.files[0])

            # Create Instrument section to later add corrected datasets.
            outp_source = outp_file.create_instrument_source(instrument_src_kda)

            # Create count/first datasets at INDEX source.
            outp_source.create_index(data=image_counts)

            # RAW memoryCell and frameNumber are not corrected. But we are storing only
            # the values for the corrected trains.
            outp_source.create_key(
                "data.memoryCell", data=memcells,
                chunks=(min(chunks_ids, memcells.shape[0]), 1))
            outp_source.create_key(
                "data.frameNumber", data=frame_number,
                chunks=(min(chunks_ids, frame_number.shape[0]), 1))
            # Add main corrected `data.adc`` dataset and store corrected data.
            outp_source.create_key(
                "data.adc", data=data_corr,
                chunks=(min(chunks_data, data_corr.shape[0]), *oshape[1:]))
            outp_source.create_compressed_key(
                "data.gain", data=gain_corr)
            outp_source.create_compressed_key(
                "data.mask", data=mask_corr)

            # Temporary hotfix for FXE assuming this dataset is in corrected files.
            outp_source.create_key(
                "data.trainId", data=seq_dc.train_ids,
                chunks=(min(50, len(seq_dc.train_ids))))

            save_reduced_rois(outp_file, data_corr, mask_corr, local_karabo_da)

            # Create METDATA datasets
            outp_file.create_metadata(like=seq_dc)

        step_timer.done_step(f'Saving data time.')
 if empty_seq == sum([len(i) for i in mapped_files.values()]):
    warning("No valid trains for RAW data to correct.")
    sys.exit(0)
 ```

 %% Cell type:markdown id: tags:

 ### Processing time summary ###

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print('Skipping plots')
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Positions are given in pixels
 mod_width = (256 * 4) + (2 * 3)  # inc. 2px gaps between tiles
 mod_height = (256 * 2) + 2
 if karabo_id == "SPB_IRDA_JF4M":
    # The first 4 modules are rotated 180 degrees relative to the others.
    # We pass the bottom, beam-right corner of the module regardless of its
    # orientation, requiring a subtraction from the symmetric positions we'd
    # otherwise calculate.
    x_start, y_start = 1125, 1078
    module_pos = [
        (x_start - mod_width, y_start - mod_height - (i * (mod_height + 33)))
        for i in range(4)
    ] + [
        (-x_start, -y_start + (i * (mod_height + 33))) for i in range(4)
    ]
    orientations = [(-1, -1) for _ in range(4)] + [(1, 1) for _ in range(4)]
 elif karabo_id == "FXE_XAD_JF1M":
    module_pos = ((-mod_width//2, 33),(-mod_width//2, -mod_height -33))
    orientations = [(-1,-1), (1,1)]
 else:
    module_pos = ((-mod_width//2,-mod_height//2),)
    orientations = None

 geom = JUNGFRAUGeometry.from_module_positions(module_pos, orientations=orientations, asic_gap=0)
 ```

 %% Cell type:code id: tags:

 ``` python
 first_seq = 0 if sequences == [-1] else sequences[0]

 with RunDirectory(out_folder, f"*{run}*S{first_seq:05d}*") as corr_dc:
    # Reading CORR data for plotting.
    jf_corr = components.JUNGFRAU(
        corr_dc,
        detector_name=karabo_id,
    ).select_trains(np.s_[:plot_trains])
    tid, jf_corr_data = next(iter(jf_corr.trains(require_all=True)))

 # Shape = [modules, trains, cells, x, y]
 # TODO: Fix the case if not all modules were requested to be corrected.
 # For example if only one modules was corrected. An assertion error is expected
 # at `geom.plot_data_fast`, while plotting corrected images.
 corrected = jf_corr.get_array("data.adc")[:, :, cell_idx_preview, ...].values
 corrected_train = jf_corr_data["data.adc"][
    :, cell_idx_preview, ...
 ].values  # loose the train axis.

 mask = jf_corr.get_array("data.mask")[:, :, cell_idx_preview, ...].values
 mask_train = jf_corr_data["data.mask"][:, cell_idx_preview, ...].values

-with RunDirectory(f"{in_folder}/r{run:04d}/", f"*S{first_seq:05d}*") as raw_dc:
+with RunDirectory(f"{in_folder}/r{run:04d}/", f"*S{first_seq:05d}*", _use_voview=False) as raw_dc:

    # Reading RAW data for plotting.
    jf_raw = components.JUNGFRAU(raw_dc, detector_name=karabo_id).select_trains(
        np.s_[:plot_trains]
    )

 raw = jf_raw.get_array("data.adc")[:, :, cell_idx_preview, ...].values
 raw_train = (
    jf_raw.select_trains(by_id[[tid]])
    .get_array("data.adc")[:, 0, cell_idx_preview, ...]
    .values
 )

 gain = jf_raw.get_array("data.gain")[:, :, cell_idx_preview, ...].values
 gain_train_cells = (
    jf_raw.select_trains(by_id[[tid]]).get_array("data.gain")[:, :, :, ...].values
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 db_modules = get_pdu_from_db(
    karabo_id=karabo_id,
    karabo_da=karabo_da,
    constant=Constants.jungfrau.Offset(),
    condition=condition,
    cal_db_interface=cal_db_interface,
    snapshot_at=creation_time,
 )
 ```

 %% Cell type:markdown id: tags:

 ### Mean RAW Preview

 %% Cell type:code id: tags:

 ``` python
 print(f"The per pixel mean of the first {raw.shape[1]} trains of the first sequence file")

 fig, ax = plt.subplots(figsize=(18, 10))
 raw_mean = np.mean(raw, axis=1)
 geom.plot_data_fast(
    raw_mean,
    ax=ax,
    vmin=min(0.75*np.median(raw_mean[raw_mean > 0]), 2000),
    vmax=max(1.5*np.median(raw_mean[raw_mean > 0]), 16000),
    cmap="jet",
    colorbar={'shrink': 1, 'pad': 0.01},
 )
 ax.set_title(f'{karabo_id} - Mean RAW', size=18)
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ### Mean CORRECTED Preview

 %% Cell type:code id: tags:

 ``` python
 print(f"The per pixel mean of the first {corrected.shape[1]} trains of the first sequence file")

 fig, ax = plt.subplots(figsize=(18, 10))
 corrected_mean = np.mean(corrected, axis=1)
 _corrected_vmin = min(0.75*np.median(corrected_mean[corrected_mean > 0]), -0.5)
 _corrected_vmax = max(2.*np.median(corrected_mean[corrected_mean > 0]), 100)

 mean_plot_kwargs = dict(
    vmin=_corrected_vmin, vmax=_corrected_vmax, cmap="jet"
 )

 if not strixel_sensor:
    geom.plot_data_fast(
        corrected_mean,
        ax=ax,
        colorbar={'shrink': 1, 'pad': 0.01},
        **mean_plot_kwargs
    )
 else:
    ax.imshow(corrected_mean.squeeze(), aspect=10, **mean_plot_kwargs)

 ax.set_title(f'{karabo_id} - Mean CORRECTED', size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(figsize=(18, 10))
 corrected_masked = corrected.copy()
 corrected_masked[mask != 0] = np.nan
 corrected_masked_mean = np.nanmean(corrected_masked, axis=1)
 del corrected_masked

 if not strixel_sensor:
    geom.plot_data_fast(
        corrected_masked_mean,
        ax=ax,
        colorbar={'shrink': 1, 'pad': 0.01},
        **mean_plot_kwargs
    )
 else:
    ax.imshow(corrected_mean.squeeze(), aspect=10, **mean_plot_kwargs)

 ax.set_title(f'{karabo_id} - Mean CORRECTED with mask', size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown((f"#### A single image from train {tid}")))

 fig, ax = plt.subplots(figsize=(18, 10))

 single_plot_kwargs = dict(
    vmin=min(0.75 * np.median(corrected_train[corrected_train > 0]), -0.5),
    vmax=max(2.0 * np.median(corrected_train[corrected_train > 0]), 100),
    cmap="jet"
 )

 if not strixel_sensor:
    geom.plot_data_fast(
        corrected_train,
        ax=ax,
        colorbar={"shrink": 1, "pad": 0.01},
        **single_plot_kwargs
    )
 else:
    ax.imshow(corrected_train.squeeze(), aspect=10, **single_plot_kwargs)

 ax.set_title(f"{karabo_id} - CORRECTED train: {tid}", size=18)

 plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis, title):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [
        np.min(edges[1]),
        np.max(edges[1]),
        np.min(edges[0]),
        np.max(edges[0]),
    ]

    im = ax.imshow(
        data[::-1, :],
        extent=extent,
        aspect="auto",
        norm=LogNorm(vmin=1, vmax=np.max(data))
    )
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_title(title)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:markdown id: tags:

 ### Gain Bit Value

 %% Cell type:code id: tags:

 ``` python
 for i, (pdu, mod) in enumerate(zip(db_modules, karabo_da)):
    h, ex, ey = np.histogram2d(
        raw[i].flatten(),
        gain[i].flatten(),
        bins=[100, 4],
        range=[[0, 10000], [0, 4]],
    )
    do_2d_plot(
        h,
        (ex, ey),
        "Signal (ADU)",
        "Gain Bit Value (high gain=0[00], medium gain=1[01], low gain=3[11])",
        f"Module {mod} ({pdu})",
    )
 ```

 %% Cell type:markdown id: tags:

 ## Signal Distribution ##

 %% Cell type:code id: tags:

 ``` python
 for i, (pdu, mod) in enumerate(zip(db_modules, karabo_da)):
    fig, axs = plt.subplots(nrows=2, ncols=1, figsize=(18, 10))
    corrected_flatten = corrected[i].flatten()
    for ax, hist_range in zip(axs, [(-100, 1000), (-1000, 10000)]):
        h = ax.hist(
            corrected_flatten,
            bins=1000,
            range=hist_range,
            log=True,
        )
        l = ax.set_xlabel("Signal (keV)")
        l = ax.set_ylabel("Counts")
        _ = ax.set_title(f'Module {mod} ({pdu})')
 ```

 %% Cell type:markdown id: tags:

 ### Maximum GAIN Preview

 %% Cell type:code id: tags:

 ``` python
 display(Markdown((f"#### The per pixel maximum of train {tid} of the GAIN data")))

 fig, ax = plt.subplots(figsize=(18, 10))
 gain_max = np.max(gain_train_cells, axis=(1, 2))
 geom.plot_data_fast(
    gain_max,
    ax=ax,
    cmap="jet",
    colorbar={'shrink': 1, 'pad': 0.01},
 )
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append(
        (item.name, f"{item.value:016b}"))
 md = display(Latex(tabulate.tabulate(
    table, tablefmt='latex',
    headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:markdown id: tags:

 ### Single Image Bad Pixels ###

 A single image bad pixel map for the first image of the first train

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f"#### Bad pixels image for train {tid}"))

 fig, ax = plt.subplots(figsize=(18, 10))
 if not strixel_sensor:
    geom.plot_data_fast(
        np.log2(mask_train),
        ax=ax,
        vmin=0, vmax=32, cmap="jet",
        colorbar={'shrink': 1, 'pad': 0.01},
    )
 else:
    ax.imshow(np.log2(mask_train).squeeze(), vmin=0, vmax=32, cmap='jet', aspect=10)

 plt.show()
 ```

--- a/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Jungfrau Dark Image Characterization #

 Author: European XFEL Detector Group, Version: 2.0

 Analyzes Jungfrau dark image data to deduce offset, noise and resulting bad pixel maps

 %% Cell type:code id: tags:

 ``` python
 in_folder = '/gpfs/exfel/exp/SPB/202130/p900204/raw/'  # folder under which runs are located, required
 out_folder = '/gpfs/exfel/data/scratch/ahmedk/test/remove' # path to place reports at, required
 metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run_high = 141 # run number for G0 dark run, required
 run_med = 142 # run number for G1 dark run, required
 run_low = 143 # run number for G2 dark run, required

 # Parameters used to access raw data.
 karabo_da = ['JNGFR01', 'JNGFR02','JNGFR03','JNGFR04', 'JNGFR05', 'JNGFR06','JNGFR07','JNGFR08'] # list of data aggregators, which corresponds to different JF modules
 karabo_id = 'SPB_IRDA_JF4M'  # karabo_id (detector identifier) prefix of Jungfrau detector to process.
 karabo_id_control = ''  # if control is on a different ID, set to empty string if it is the same a karabo-id
 receiver_template = 'JNGFR{:02}' # inset for receiver devices
 instrument_source_template = '{}/DET/{}:daqOutput'  # template for instrument source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = '{}/DET/CONTROL'  # template for control source name (filled with karabo_id_control)

 # Parameters for calibration database and storing constants.
 use_dir_creation_date = True  # use dir creation date
 cal_db_interface = 'tcp://max-exfl016:8016#8045'  # calibrate db interface to connect to
 cal_db_timeout = 300000 # timeout on caldb requests
 local_output = True  # output constants locally
 db_output = False  # output constants to database

 # Parameters affecting creating dark calibration constants.
 badpixel_threshold_sigma = 5.  # bad pixels defined by values outside n times this std from median
 offset_abs_threshold_low = [1000, 10000, 10000]  # absolute bad pixel threshold in terms of offset, lower values
 offset_abs_threshold_high = [8000, 15000, 15000]  # absolute bad pixel threshold in terms of offset, upper values
-max_trains = 0  # Maximum trains to process darks. Set to 0 to process all available train images.
-min_trains = 1  # Minimum number of trains that should be available to process dark constants. Default 1.
+max_trains = 1000  # Maximum trains to process darks. Set to 0 to process all available train images. 1000 trains is enough resolution to create the dark constants
+min_trains = 100  # Minimum number of trains to process dark constants. Raise a warning if the run has fewer trains.
 manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values
 time_limits = 0.025  # to find calibration constants later on, the integration time is allowed to vary by 0.5 us

 # Parameters to be used for injecting dark calibration constants.
 integration_time = 1000 # integration time in us, will be overwritten by value in file
 gain_setting = 0  # 0 for dynamic, forceswitchg1, forceswitchg2, 1 for dynamichg0, fixgain1, fixgain2. Will be overwritten by value in file
 gain_mode = 0  # 1 if medium and low runs are  fixgain1 and fixgain2, otherwise 0. It will be overwritten by value in file, if manual_slow_data
 bias_voltage = 90  # sensor bias voltage in V, will be overwritten by value in file
 memory_cells = 16  # number of memory cells

 # Parameters used for plotting
 detailed_report = False

 # TODO: this is used for only Warning check at AGIPD dark.
 # Need to rethink if it makes sense to use it here as well.
 operation_mode = 'ADAPTIVE_GAIN'  # Detector operation mode, optional
 ```

 %% Cell type:code id: tags:

 ``` python
-import glob
 import os
 import warnings
-from pathlib import Path
+from logging import warning
 warnings.filterwarnings('ignore')

 import matplotlib
 import matplotlib.pyplot as plt
 import multiprocessing
 import numpy as np
 import pasha as psh
 import yaml
 from IPython.display import Markdown, display
 from extra_data import RunDirectory

 matplotlib.use('agg')
 %matplotlib inline

 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.histogram import histPlot
 from cal_tools import jungfraulib, step_timing
-from cal_tools.ana_tools import save_dict_to_hdf5
 from cal_tools.enums import BadPixels, JungfrauGainMode
 from cal_tools.tools import (
    get_dir_creation_date,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    save_const_to_h5,
    send_to_db,
 )
 from iCalibrationDB import Conditions, Constants
 ```

 %% Cell type:code id: tags:

 ``` python
 # Constants relevant for the analysis
 run_nums = [run_high, run_med, run_low]  # run number for G0/HG0, G1, G2
 sensor_size = (1024, 512)
 gains = [0, 1, 2]

 fixed_settings = [
    JungfrauGainMode.FIX_GAIN_1.value, JungfrauGainMode.FIX_GAIN_2.value]
 dynamic_settings = [
    JungfrauGainMode.FORCE_SWITCH_HG1.value, JungfrauGainMode.FORCE_SWITCH_HG2.value]
 old_fixed_settings = ["fixgain1", "fixgain2"]

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)
    print(f"Using {creation_time} as creation time")
 os.makedirs(out_folder, exist_ok=True)

 cal_db_interface = get_random_db_interface(cal_db_interface)
 print(f'Calibration database interface: {cal_db_interface}')

 if karabo_id_control == "":
    karabo_id_control = karabo_id
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = f"proposal:{proposal} runs:{run_high} {run_med} {run_low}"

 report = get_report(metadata_folder)

 step_timer = step_timing.StepTimer()
 ```

 %% Cell type:markdown id: tags:

 ## Reading control data

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 gain_runs = dict()

 med_low_settings = []

 ctrl_src = ctrl_source_template.format(karabo_id_control)

 for gain, run_n in enumerate(run_nums):
    run_dc = RunDirectory(f"{in_folder}/r{run_n:04d}/")
    gain_runs[run_n] = [gain, run_dc]
    ctrl_data = jungfraulib.JungfrauCtrl(run_dc, ctrl_src)
    # Read control data for the high gain run only.
    if run_n == run_high:

        run_mcells, sc_start = ctrl_data.get_memory_cells()

        if not manual_slow_data:
            integration_time = ctrl_data.get_integration_time()
            bias_voltage = ctrl_data.get_bias_voltage()
            gain_setting = ctrl_data.get_gain_setting()
            print(f"Gain setting is {gain_setting} ({ctrl_data.run_settings})")
            print(f"Integration time is {integration_time} us")
            print(f"Bias voltage is {bias_voltage} V")
        if run_mcells == 1:
            memory_cells = 1
            print('Dark runs in single cell mode, '
                  f'storage cell start: {sc_start:02d}')
        else:
            memory_cells = 16
            print('Dark runs in burst mode, '
                  f'storage cell start: {sc_start:02d}')
    else:
        gain_mode = ctrl_data.get_gain_mode()
        med_low_settings.append(ctrl_data.run_mode)

 # A transperent workaround for old raw data with wrong/missing medium and low settings
 if med_low_settings == [None, None]:
-    print("WARNING: run.settings is not stored in the data to read. "
-          f"Hence assuming gain_mode = {gain_mode} for adaptive old data.")
+    warning("run.settings is not stored in the data to read. "
+            f"Hence assuming gain_mode = {gain_mode} for adaptive old data.")
 elif med_low_settings == ["dynamicgain", "forceswitchg1"]:
-    print(f"WARNING: run.settings for medium and low gain runs are wrong {med_low_settings}. "
-          f"This is an expected bug for old raw data. Setting gain_mode to {gain_mode}.")
+    warning(f"run.settings for medium and low gain runs are wrong {med_low_settings}. "
+            f"This is an expected bug for old raw data. Setting gain_mode to {gain_mode}.")
 # Validate that low_med_settings is not a mix of adaptive and fixed settings.
 elif not (sorted(med_low_settings) in [fixed_settings, dynamic_settings, old_fixed_settings]):  # noqa
    raise ValueError(
        "Medium and low run settings are not as expected. "
        f"Either {dynamic_settings}, {fixed_settings}, or {old_fixed_settings} are expected.\n"
        f"Got {sorted(med_low_settings)} for both runs, respectively.")

 print(f"Gain mode is {gain_mode} ({med_low_settings})")

 step_timer.done_step(f'Reading control data.')
 ```

 %% Cell type:code id: tags:

 ``` python
 # set the operating condition
 condition = Conditions.Dark.jungfrau(
    memory_cells=memory_cells,
    bias_voltage=bias_voltage,
    integration_time=integration_time,
    gain_setting=gain_setting,
    gain_mode=gain_mode,
 )

 db_modules = get_pdu_from_db(
    karabo_id=karabo_id,
    karabo_da=karabo_da,
    constant=Constants.jungfrau.Offset(),
    condition=condition,
    cal_db_interface=cal_db_interface,
    snapshot_at=creation_time)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Start retrieving existing constants for comparison
 mod_x_const = [(mod, const) for const in ["Offset", "Noise", "BadPixelsDark"] for mod in karabo_da]

 from cal_tools.tools import get_from_db
 from datetime import timedelta

 def retrieve_old_constant(mod, const):
    dconst = getattr(Constants.jungfrau, const)()

    data, mdata = get_from_db(
        karabo_id=karabo_id,
        karabo_da=mod,
        constant=dconst,
        condition=condition,
        empty_constant=None,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time-timedelta(seconds=60) if creation_time else None,
        strategy="pdu_prior_in_time",
        verbosity=1,
        timeout=cal_db_timeout
    )

    if mdata is None or data is None:
        timestamp = "Not found"
        filepath = None
        h5path = None
    else:
        timestamp = mdata.calibration_constant_version.begin_at.isoformat()
        filepath = os.path.join(
            mdata.calibration_constant_version.hdf5path,
            mdata.calibration_constant_version.filename
        )
        h5path = mdata.calibration_constant_version.h5path

    return data, timestamp, filepath, h5path


 old_retrieval_pool = multiprocessing.Pool()
 old_retrieval_res = old_retrieval_pool.starmap_async(
    retrieve_old_constant, mod_x_const
 )
 old_retrieval_pool.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Use only high gain threshold for all gains in case of fixed_gain.

 if gain_mode:  # fixed_gain
    offset_abs_threshold = [[offset_abs_threshold_low[0]]*3, [offset_abs_threshold_high[0]]*3]
 else:
    offset_abs_threshold = [offset_abs_threshold_low, offset_abs_threshold_high]
 ```

 %% Cell type:code id: tags:

 ``` python
-context = psh.context.ThreadContext(num_workers=multiprocessing.cpu_count())
+context = psh.context.ThreadContext(num_workers=memory_cells)
 ```

 %% Cell type:code id: tags:

 ``` python
 """
 All jungfrau runs are taken through one acquisition, except for the forceswitch runs.
 While taking non-fixed dark runs, a procedure of multiple acquisitions is used to switch the storage cell indices.

 This is done for medium and low gain dark dynamic runs, only [forceswitchg1, forceswitchg2]:
 Switching the cell indices in burst mode is a work around for hardware procedure
 deficiency that produces wrong data for dark runs except for the first storage cell.
 This is why multiple acquisitions are taken to switch the used storage cells and
 acquire data through two cells for each of the 16 cells instead of acquiring darks through all 16 cells.
 """

 print(f"Maximum trains to process is set to {max_trains}")

 noise_map = dict()
 offset_map = dict()
 bad_pixels_map = dict()

 for mod in karabo_da:
    step_timer.start()
    instrument_src = instrument_source_template.format(
        karabo_id, receiver_template.format(int(mod[-2:])))

    print(f"\n- Instrument data path for {mod} is {instrument_src}.")

-    offset_map[mod] = context.alloc(shape=(sensor_size+(memory_cells, 3)), fill=0)
+    offset_map[mod] = context.alloc(
+        shape=(sensor_size+(memory_cells, 3)), fill=0, dtype=np.float32)
    noise_map[mod] = context.alloc(like=offset_map[mod], fill=0)
    bad_pixels_map[mod] = context.alloc(like=offset_map[mod], dtype=np.uint32, fill=0)

    for run_n, [gain, run_dc] in gain_runs.items():

        def process_cell(worker_id, array_index, cell_number):
            cell_slice_idx = acelltable == cell_number
-            thiscell = images[..., cell_slice_idx]
+            thiscell = images[..., cell_slice_idx]  # [1024, 512, n_trains]

            # Identify cells/trains with images of 0 pixels.
            # TODO: An investigation is ongoing by DET to identify reason for these empty images.
-            nonzero_adc = np.any(thiscell != 0 , axis=(0, 1))
+            nonzero_adc = np.any(thiscell != 0 , axis=(0, 1))  # [n_trains]

            # Exclude empty images with 0 pixels, before calculating offset and noise
            thiscell = thiscell[..., nonzero_adc]
-            offset_map[mod][..., cell_number, gain] = np.mean(thiscell, axis=2)
-            noise_map[mod][..., cell_number, gain] = np.std(thiscell, axis=2)
-
+            offset_map[mod][..., cell_number, gain] = np.mean(  # [1024, 512]
+                thiscell, axis=2, dtype=np.float32)
+            noise_map[mod][..., cell_number, gain] = np.std(  # [1024, 512]
+                thiscell, axis=2, dtype=np.float32)
+            del thiscell
            # Check if there are wrong bad gain values.
            # 1. Exclude empty images.
            # 2. Indicate pixels with wrong gain value for any train for each cell.
            # TODO: mean is used to use thresholds for accepting gain values, even if not 0 mean value.
-            gain_avg = np.mean(
-                gain_vals[..., cell_slice_idx][..., nonzero_adc], axis=2)
+            gain_avg = np.mean(  # [1024, 512]
+                gain_vals[..., cell_slice_idx][..., nonzero_adc],
+                axis=2, dtype=np.float32
+            )

+            # [1024, 512]
            bad_pixels_map[mod][..., cell_number, gain][gain_avg != raw_g] |= BadPixels.WRONG_GAIN_VALUE.value
+
        print(f"Gain stage {gain}, run {run_n}")

        # load shape of data for memory cells, and detector size (imgs, cells, x, y)
-        n_imgs = run_dc[instrument_src, "data.adc"].shape[0]
+        n_trains = run_dc[instrument_src, "data.adc"].shape[0]
        # load number of data available, including trains with empty data.
-        n_trains = len(run_dc.train_ids)
+        all_trains = len(run_dc.train_ids)
        instr_dc = run_dc.select(instrument_src, require_all=True)
-        empty_trains = n_trains - n_imgs
+        empty_trains = all_trains - n_trains
        if empty_trains != 0:
-            print(f"\tWARNING: {mod} has {empty_trains} trains with empty data out of {n_trains} trains")  # noqa
+            print(f"{mod} has {empty_trains} empty trains out of {all_trains} trains")
        if max_trains > 0:
-            n_imgs = min(n_imgs, max_trains)
-        print(f"Processing {n_imgs} images.")
-        # Select only requested number of images to process darks.
-        instr_dc = instr_dc.select_trains(np.s_[:n_imgs])
+            n_trains = min(n_trains, max_trains)
+        print(f"Processing {n_trains} images.")

-        if n_imgs < min_trains:
-            raise ValueError(
-                f"Less than {min_trains} trains are available in RAW data."
-                " Not enough data to process darks.")
+        if n_trains == 0:
+            raise ValueError(f"{run_n} has no trains to process.")

+        if n_trains < min_trains:
+            warning(f"Less than {min_trains} trains are available in RAW data.")
+
+        # Select only requested number of images to process darks.
+        instr_dc = instr_dc.select_trains(np.s_[:n_trains])
        images = np.transpose(
            instr_dc[instrument_src, "data.adc"].ndarray(), (3, 2, 1, 0))
        acelltable = np.transpose(instr_dc[instrument_src, "data.memoryCell"].ndarray())
        gain_vals = np.transpose(
            instr_dc[instrument_src, "data.gain"].ndarray(), (3, 2, 1, 0))

        # define gain value as saved in raw gain map
        raw_g = 3 if gain == 2 else gain

        if memory_cells == 1:
            acelltable -= sc_start
        # Only for dynamic medium and low gain runs [forceswitchg1, forceswitchg2] in burst mode.

        if gain_mode == 0 and gain > 0 and memory_cells == 16:
            # 255 similar to the receiver which uses the 255
            # value to indicate a cell without an image.
            # image shape for forceswitchg1 and forceswitchg2 = (1024, 512, 2, trains)
            # compared to expected shape of (1024, 512, 16, trains) for high gain run.
            acelltable[1:] = 255

        # Calculate offset and noise maps
        context.map(process_cell, range(memory_cells))
-
+        del images
+        del acelltable
+        del gain_vals
    step_timer.done_step(f'Creating Offset and noise constants for a module.')
 ```

 %% Cell type:code id: tags:

 ``` python
 if detailed_report:
    display(Markdown("## Offset and Noise Maps:"))
    display(Markdown(
        "Below offset and noise maps for the high ($g_0$) gain stage are shown, "
        "alongside the distribution of these values. One expects block-like "
        "structures mapping to the ASICs of the detector"))
    g_name = ['G0', 'G1', 'G2']
    g_range = [(0, 8000), (8000, 16000), (8000, 16000)]
    n_range = [(0., 50.), (0., 50.), (0., 50.)]

    unit = '[ADCu]'
    # TODO: Fix plots arrangment and speed for Jungfrau burst mode.
    step_timer.start()
    for pdu, mod in zip(db_modules, karabo_da):
        for g_idx in gains:
            for cell in range(0, memory_cells):
                f_o0 = heatmapPlot(
                    np.swapaxes(offset_map[mod][..., cell, g_idx], 0, 1),
                    y_label="Row",
                    x_label="Column",
                    lut_label=unit,
                    aspect=1.,
                    vmin=g_range[g_idx][0],
                    vmax=g_range[g_idx][1],
                    title=f'Pedestal {g_name[g_idx]} - Cell {cell:02d} - Module {mod} ({pdu})')

                fo0, ax_o0 = plt.subplots()
                res_o0 = histPlot(
                    ax_o0, offset_map[mod][..., cell, g_idx],
                    bins=800,
                    range=g_range[g_idx],
                    facecolor='b',
                    histotype='stepfilled',
                )

                ax_o0.tick_params(axis='both',which='major',labelsize=15)
                ax_o0.set_title(
                    f'Module pedestal distribution - Cell {cell:02d} - Module {mod} ({pdu})',
                    fontsize=15)
                ax_o0.set_xlabel(f'Pedestal {g_name[g_idx]} {unit}',fontsize=15)
                ax_o0.set_yscale('log')

                f_n0 = heatmapPlot(
                    np.swapaxes(noise_map[mod][..., cell, g_idx], 0, 1),
                    y_label="Row",
                    x_label="Column",
                    lut_label= unit,
                    aspect=1.,
                    vmin=n_range[g_idx][0],
                    vmax=n_range[g_idx][1],
                    title=f"RMS noise {g_name[g_idx]} - Cell {cell:02d} - Module {mod} ({pdu})",
                )

                fn0, ax_n0 = plt.subplots()
                res_n0 = histPlot(
                    ax_n0,
                    noise_map[mod][..., cell, g_idx],
                    bins=100,
                    range=n_range[g_idx],
                    facecolor='b',
                    histotype='stepfilled',
                )

                ax_n0.tick_params(axis='both', which='major', labelsize=15)
                ax_n0.set_title(
                    f'Module noise distribution - Cell {cell:02d} - Module {mod} ({pdu})',
                    fontsize=15)
                ax_n0.set_xlabel(
                    f'RMS noise {g_name[g_idx]} ' + unit, fontsize=15)
                plt.show()
    step_timer.done_step(f'Plotting offset and noise maps.')
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixel Map ###

 The bad pixel map is deduced by comparing offset and noise of each pixel ($v_i$) and each gain ($g$) against the median value for that gain stage:

 $$
 v_i > \mathrm{median}(v_{k,g}) + n \sigma_{v_{k,g}}
 $$
 or
 $$
 v_i < \mathrm{median}(v_{k,g}) - n \sigma_{v_{k,g}}
 $$

 Values are encoded in a 32 bit mask, where for the dark image deduced bad pixels the following non-zero entries are relevant:

 %% Cell type:code id: tags:

 ``` python
 def print_bp_entry(bp):
    print("{:<30s} {:032b} -> {}".format(bp.name, bp.value, int(bp.value)))

 print_bp_entry(BadPixels.OFFSET_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.NOISE_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.OFFSET_NOISE_EVAL_ERROR)
 print_bp_entry(BadPixels.WRONG_GAIN_VALUE)

 def eval_bpidx(d):

    mdn = np.nanmedian(d, axis=(0, 1))[None, None, :, :]
    std = np.nanstd(d, axis=(0, 1))[None, None, :, :]
    idx = (d > badpixel_threshold_sigma*std+mdn) | (d < (-badpixel_threshold_sigma)*std+mdn)

    return idx
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 for pdu, mod in zip(db_modules, karabo_da):
    display(Markdown(f"### Badpixels for module {mod} ({pdu}):"))
    offset_abs_threshold = np.array(offset_abs_threshold)

    bad_pixels_map[mod][eval_bpidx(offset_map[mod])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value

    bad_pixels_map[mod][~np.isfinite(offset_map[mod])] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    bad_pixels_map[mod][eval_bpidx(noise_map[mod])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value

    bad_pixels_map[mod][~np.isfinite(noise_map[mod])] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    bad_pixels_map[mod][(offset_map[mod] < offset_abs_threshold[0][None, None, None, :]) | (offset_map[mod] > offset_abs_threshold[1][None, None, None, :])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value  # noqa

    if detailed_report:

        for g_idx in gains:
            for cell in range(memory_cells):
                bad_pixels = bad_pixels_map[mod][:, :, cell, g_idx]
                fn_0 = heatmapPlot(
                    np.swapaxes(bad_pixels, 0, 1),
                    y_label="Row",
                    x_label="Column",
                    lut_label=f"Badpixels {g_name[g_idx]} [ADCu]",
                    aspect=1.,
                    vmin=0, vmax=5,
                    title=f'G{g_idx} Bad pixel map - Cell {cell:02d} - Module {mod} ({pdu})')
 step_timer.done_step(f'Creating bad pixels constant')
 ```

 %% Cell type:markdown id: tags:

 ## Inject and save calibration constants

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 for mod, db_mod in zip(karabo_da, db_modules):
    constants = {
        'Offset': np.moveaxis(offset_map[mod], 0, 1),
        'Noise': np.moveaxis(noise_map[mod], 0, 1),
        'BadPixelsDark': np.moveaxis(bad_pixels_map[mod], 0, 1),
    }

    md = None

    for key, const_data in constants.items():

        const =  getattr(Constants.jungfrau, key)()
        const.data = const_data

        for parm in condition.parameters:
            if parm.name == "Integration Time":
                parm.lower_deviation = time_limits
                parm.upper_deviation = time_limits

        if db_output:
            md = send_to_db(
                db_module=db_mod,
                karabo_id=karabo_id,
                constant=const,
                condition=condition,
                file_loc=file_loc,
                report_path=report,
                cal_db_interface=cal_db_interface,
                creation_time=creation_time,
                timeout=cal_db_timeout,
            )
        if local_output:
            md = save_const_to_h5(
                db_module=db_mod,
                karabo_id=karabo_id,
                constant=const,
                condition=condition,
                data=const.data,
                file_loc=file_loc,
                report=report,
                creation_time=creation_time,
                out_folder=out_folder,
            )
            print(f"Calibration constant {key} is stored locally at {out_folder}.\n")

 print("Constants parameter conditions are:\n")
 print(
    f"• Bias voltage: {bias_voltage}\n"
    f"• Memory cells: {memory_cells}\n"
    f"• Integration time: {integration_time}\n"
    f"• Gain setting: {gain_setting}\n"
    f"• Gain mode: {gain_mode}\n"
    f"• Creation time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")  # noqa
 step_timer.done_step("Injecting constants.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 # now we need the old constants
 old_const = {}
 old_mdata = {}
 old_retrieval_res.wait()

 for (mod, const), (data, timestamp, filepath, h5path) in zip(
    mod_x_const, old_retrieval_res.get()):
    old_const.setdefault(mod, {})[const] = data
    old_mdata.setdefault(mod, {})[const] = {
        "timestamp": timestamp,
        "filepath": filepath,
        "h5path": h5path,
    }
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown("## The following pre-existing constants are used for comparison:"))

 for mod, consts in old_mdata.items():
    pdu = db_modules[karabo_da.index(mod)]
    display(Markdown(f"- {mod} ({pdu})"))
    for const in consts:
        display(Markdown(f"    - {const} at {consts[const]['timestamp']}"))
    # saving locations of old constants for summary notebook
    with open(f"{metadata_folder or out_folder}/module_metadata_{mod}.yml", "w") as fd:
        yaml.safe_dump(
            {
                "module": mod,
                "pdu": pdu,
                "old-constants": old_mdata[mod],
            },
            fd,
        )
 ```

 %% Cell type:markdown id: tags:

 # Jungfrau Dark Image Characterization #

 Author: European XFEL Detector Group, Version: 2.0

 Analyzes Jungfrau dark image data to deduce offset, noise and resulting bad pixel maps

 %% Cell type:code id: tags:

 ``` python
 in_folder = '/gpfs/exfel/exp/SPB/202130/p900204/raw/'  # folder under which runs are located, required
 out_folder = '/gpfs/exfel/data/scratch/ahmedk/test/remove' # path to place reports at, required
 metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run_high = 141 # run number for G0 dark run, required
 run_med = 142 # run number for G1 dark run, required
 run_low = 143 # run number for G2 dark run, required

 # Parameters used to access raw data.
 karabo_da = ['JNGFR01', 'JNGFR02','JNGFR03','JNGFR04', 'JNGFR05', 'JNGFR06','JNGFR07','JNGFR08'] # list of data aggregators, which corresponds to different JF modules
 karabo_id = 'SPB_IRDA_JF4M'  # karabo_id (detector identifier) prefix of Jungfrau detector to process.
 karabo_id_control = ''  # if control is on a different ID, set to empty string if it is the same a karabo-id
 receiver_template = 'JNGFR{:02}' # inset for receiver devices
 instrument_source_template = '{}/DET/{}:daqOutput'  # template for instrument source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = '{}/DET/CONTROL'  # template for control source name (filled with karabo_id_control)

 # Parameters for calibration database and storing constants.
 use_dir_creation_date = True  # use dir creation date
 cal_db_interface = 'tcp://max-exfl016:8016#8045'  # calibrate db interface to connect to
 cal_db_timeout = 300000 # timeout on caldb requests
 local_output = True  # output constants locally
 db_output = False  # output constants to database

 # Parameters affecting creating dark calibration constants.
 badpixel_threshold_sigma = 5.  # bad pixels defined by values outside n times this std from median
 offset_abs_threshold_low = [1000, 10000, 10000]  # absolute bad pixel threshold in terms of offset, lower values
 offset_abs_threshold_high = [8000, 15000, 15000]  # absolute bad pixel threshold in terms of offset, upper values
-max_trains = 0  # Maximum trains to process darks. Set to 0 to process all available train images.
-min_trains = 1  # Minimum number of trains that should be available to process dark constants. Default 1.
+max_trains = 1000  # Maximum trains to process darks. Set to 0 to process all available train images. 1000 trains is enough resolution to create the dark constants
+min_trains = 100  # Minimum number of trains to process dark constants. Raise a warning if the run has fewer trains.
 manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values
 time_limits = 0.025  # to find calibration constants later on, the integration time is allowed to vary by 0.5 us

 # Parameters to be used for injecting dark calibration constants.
 integration_time = 1000 # integration time in us, will be overwritten by value in file
 gain_setting = 0  # 0 for dynamic, forceswitchg1, forceswitchg2, 1 for dynamichg0, fixgain1, fixgain2. Will be overwritten by value in file
 gain_mode = 0  # 1 if medium and low runs are  fixgain1 and fixgain2, otherwise 0. It will be overwritten by value in file, if manual_slow_data
 bias_voltage = 90  # sensor bias voltage in V, will be overwritten by value in file
 memory_cells = 16  # number of memory cells

 # Parameters used for plotting
 detailed_report = False

 # TODO: this is used for only Warning check at AGIPD dark.
 # Need to rethink if it makes sense to use it here as well.
 operation_mode = 'ADAPTIVE_GAIN'  # Detector operation mode, optional
 ```

 %% Cell type:code id: tags:

 ``` python
-import glob
 import os
 import warnings
-from pathlib import Path
+from logging import warning
 warnings.filterwarnings('ignore')

 import matplotlib
 import matplotlib.pyplot as plt
 import multiprocessing
 import numpy as np
 import pasha as psh
 import yaml
 from IPython.display import Markdown, display
 from extra_data import RunDirectory

 matplotlib.use('agg')
 %matplotlib inline

 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.histogram import histPlot
 from cal_tools import jungfraulib, step_timing
-from cal_tools.ana_tools import save_dict_to_hdf5
 from cal_tools.enums import BadPixels, JungfrauGainMode
 from cal_tools.tools import (
    get_dir_creation_date,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    save_const_to_h5,
    send_to_db,
 )
 from iCalibrationDB import Conditions, Constants
 ```

 %% Cell type:code id: tags:

 ``` python
 # Constants relevant for the analysis
 run_nums = [run_high, run_med, run_low]  # run number for G0/HG0, G1, G2
 sensor_size = (1024, 512)
 gains = [0, 1, 2]

 fixed_settings = [
    JungfrauGainMode.FIX_GAIN_1.value, JungfrauGainMode.FIX_GAIN_2.value]
 dynamic_settings = [
    JungfrauGainMode.FORCE_SWITCH_HG1.value, JungfrauGainMode.FORCE_SWITCH_HG2.value]
 old_fixed_settings = ["fixgain1", "fixgain2"]

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)
    print(f"Using {creation_time} as creation time")
 os.makedirs(out_folder, exist_ok=True)

 cal_db_interface = get_random_db_interface(cal_db_interface)
 print(f'Calibration database interface: {cal_db_interface}')

 if karabo_id_control == "":
    karabo_id_control = karabo_id
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = f"proposal:{proposal} runs:{run_high} {run_med} {run_low}"

 report = get_report(metadata_folder)

 step_timer = step_timing.StepTimer()
 ```

 %% Cell type:markdown id: tags:

 ## Reading control data

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 gain_runs = dict()

 med_low_settings = []

 ctrl_src = ctrl_source_template.format(karabo_id_control)

 for gain, run_n in enumerate(run_nums):
    run_dc = RunDirectory(f"{in_folder}/r{run_n:04d}/")
    gain_runs[run_n] = [gain, run_dc]
    ctrl_data = jungfraulib.JungfrauCtrl(run_dc, ctrl_src)
    # Read control data for the high gain run only.
    if run_n == run_high:

        run_mcells, sc_start = ctrl_data.get_memory_cells()

        if not manual_slow_data:
            integration_time = ctrl_data.get_integration_time()
            bias_voltage = ctrl_data.get_bias_voltage()
            gain_setting = ctrl_data.get_gain_setting()
            print(f"Gain setting is {gain_setting} ({ctrl_data.run_settings})")
            print(f"Integration time is {integration_time} us")
            print(f"Bias voltage is {bias_voltage} V")
        if run_mcells == 1:
            memory_cells = 1
            print('Dark runs in single cell mode, '
                  f'storage cell start: {sc_start:02d}')
        else:
            memory_cells = 16
            print('Dark runs in burst mode, '
                  f'storage cell start: {sc_start:02d}')
    else:
        gain_mode = ctrl_data.get_gain_mode()
        med_low_settings.append(ctrl_data.run_mode)

 # A transperent workaround for old raw data with wrong/missing medium and low settings
 if med_low_settings == [None, None]:
-    print("WARNING: run.settings is not stored in the data to read. "
-          f"Hence assuming gain_mode = {gain_mode} for adaptive old data.")
+    warning("run.settings is not stored in the data to read. "
+            f"Hence assuming gain_mode = {gain_mode} for adaptive old data.")
 elif med_low_settings == ["dynamicgain", "forceswitchg1"]:
-    print(f"WARNING: run.settings for medium and low gain runs are wrong {med_low_settings}. "
-          f"This is an expected bug for old raw data. Setting gain_mode to {gain_mode}.")
+    warning(f"run.settings for medium and low gain runs are wrong {med_low_settings}. "
+            f"This is an expected bug for old raw data. Setting gain_mode to {gain_mode}.")
 # Validate that low_med_settings is not a mix of adaptive and fixed settings.
 elif not (sorted(med_low_settings) in [fixed_settings, dynamic_settings, old_fixed_settings]):  # noqa
    raise ValueError(
        "Medium and low run settings are not as expected. "
        f"Either {dynamic_settings}, {fixed_settings}, or {old_fixed_settings} are expected.\n"
        f"Got {sorted(med_low_settings)} for both runs, respectively.")

 print(f"Gain mode is {gain_mode} ({med_low_settings})")

 step_timer.done_step(f'Reading control data.')
 ```

 %% Cell type:code id: tags:

 ``` python
 # set the operating condition
 condition = Conditions.Dark.jungfrau(
    memory_cells=memory_cells,
    bias_voltage=bias_voltage,
    integration_time=integration_time,
    gain_setting=gain_setting,
    gain_mode=gain_mode,
 )

 db_modules = get_pdu_from_db(
    karabo_id=karabo_id,
    karabo_da=karabo_da,
    constant=Constants.jungfrau.Offset(),
    condition=condition,
    cal_db_interface=cal_db_interface,
    snapshot_at=creation_time)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Start retrieving existing constants for comparison
 mod_x_const = [(mod, const) for const in ["Offset", "Noise", "BadPixelsDark"] for mod in karabo_da]

 from cal_tools.tools import get_from_db
 from datetime import timedelta

 def retrieve_old_constant(mod, const):
    dconst = getattr(Constants.jungfrau, const)()

    data, mdata = get_from_db(
        karabo_id=karabo_id,
        karabo_da=mod,
        constant=dconst,
        condition=condition,
        empty_constant=None,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time-timedelta(seconds=60) if creation_time else None,
        strategy="pdu_prior_in_time",
        verbosity=1,
        timeout=cal_db_timeout
    )

    if mdata is None or data is None:
        timestamp = "Not found"
        filepath = None
        h5path = None
    else:
        timestamp = mdata.calibration_constant_version.begin_at.isoformat()
        filepath = os.path.join(
            mdata.calibration_constant_version.hdf5path,
            mdata.calibration_constant_version.filename
        )
        h5path = mdata.calibration_constant_version.h5path

    return data, timestamp, filepath, h5path


 old_retrieval_pool = multiprocessing.Pool()
 old_retrieval_res = old_retrieval_pool.starmap_async(
    retrieve_old_constant, mod_x_const
 )
 old_retrieval_pool.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Use only high gain threshold for all gains in case of fixed_gain.

 if gain_mode:  # fixed_gain
    offset_abs_threshold = [[offset_abs_threshold_low[0]]*3, [offset_abs_threshold_high[0]]*3]
 else:
    offset_abs_threshold = [offset_abs_threshold_low, offset_abs_threshold_high]
 ```

 %% Cell type:code id: tags:

 ``` python
-context = psh.context.ThreadContext(num_workers=multiprocessing.cpu_count())
+context = psh.context.ThreadContext(num_workers=memory_cells)
 ```

 %% Cell type:code id: tags:

 ``` python
 """
 All jungfrau runs are taken through one acquisition, except for the forceswitch runs.
 While taking non-fixed dark runs, a procedure of multiple acquisitions is used to switch the storage cell indices.

 This is done for medium and low gain dark dynamic runs, only [forceswitchg1, forceswitchg2]:
 Switching the cell indices in burst mode is a work around for hardware procedure
 deficiency that produces wrong data for dark runs except for the first storage cell.
 This is why multiple acquisitions are taken to switch the used storage cells and
 acquire data through two cells for each of the 16 cells instead of acquiring darks through all 16 cells.
 """

 print(f"Maximum trains to process is set to {max_trains}")

 noise_map = dict()
 offset_map = dict()
 bad_pixels_map = dict()

 for mod in karabo_da:
    step_timer.start()
    instrument_src = instrument_source_template.format(
        karabo_id, receiver_template.format(int(mod[-2:])))

    print(f"\n- Instrument data path for {mod} is {instrument_src}.")

-    offset_map[mod] = context.alloc(shape=(sensor_size+(memory_cells, 3)), fill=0)
+    offset_map[mod] = context.alloc(
+        shape=(sensor_size+(memory_cells, 3)), fill=0, dtype=np.float32)
    noise_map[mod] = context.alloc(like=offset_map[mod], fill=0)
    bad_pixels_map[mod] = context.alloc(like=offset_map[mod], dtype=np.uint32, fill=0)

    for run_n, [gain, run_dc] in gain_runs.items():

        def process_cell(worker_id, array_index, cell_number):
            cell_slice_idx = acelltable == cell_number
-            thiscell = images[..., cell_slice_idx]
+            thiscell = images[..., cell_slice_idx]  # [1024, 512, n_trains]

            # Identify cells/trains with images of 0 pixels.
            # TODO: An investigation is ongoing by DET to identify reason for these empty images.
-            nonzero_adc = np.any(thiscell != 0 , axis=(0, 1))
+            nonzero_adc = np.any(thiscell != 0 , axis=(0, 1))  # [n_trains]

            # Exclude empty images with 0 pixels, before calculating offset and noise
            thiscell = thiscell[..., nonzero_adc]
-            offset_map[mod][..., cell_number, gain] = np.mean(thiscell, axis=2)
-            noise_map[mod][..., cell_number, gain] = np.std(thiscell, axis=2)
-
+            offset_map[mod][..., cell_number, gain] = np.mean(  # [1024, 512]
+                thiscell, axis=2, dtype=np.float32)
+            noise_map[mod][..., cell_number, gain] = np.std(  # [1024, 512]
+                thiscell, axis=2, dtype=np.float32)
+            del thiscell
            # Check if there are wrong bad gain values.
            # 1. Exclude empty images.
            # 2. Indicate pixels with wrong gain value for any train for each cell.
            # TODO: mean is used to use thresholds for accepting gain values, even if not 0 mean value.
-            gain_avg = np.mean(
-                gain_vals[..., cell_slice_idx][..., nonzero_adc], axis=2)
+            gain_avg = np.mean(  # [1024, 512]
+                gain_vals[..., cell_slice_idx][..., nonzero_adc],
+                axis=2, dtype=np.float32
+            )

+            # [1024, 512]
            bad_pixels_map[mod][..., cell_number, gain][gain_avg != raw_g] |= BadPixels.WRONG_GAIN_VALUE.value
+
        print(f"Gain stage {gain}, run {run_n}")

        # load shape of data for memory cells, and detector size (imgs, cells, x, y)
-        n_imgs = run_dc[instrument_src, "data.adc"].shape[0]
+        n_trains = run_dc[instrument_src, "data.adc"].shape[0]
        # load number of data available, including trains with empty data.
-        n_trains = len(run_dc.train_ids)
+        all_trains = len(run_dc.train_ids)
        instr_dc = run_dc.select(instrument_src, require_all=True)
-        empty_trains = n_trains - n_imgs
+        empty_trains = all_trains - n_trains
        if empty_trains != 0:
-            print(f"\tWARNING: {mod} has {empty_trains} trains with empty data out of {n_trains} trains")  # noqa
+            print(f"{mod} has {empty_trains} empty trains out of {all_trains} trains")
        if max_trains > 0:
-            n_imgs = min(n_imgs, max_trains)
-        print(f"Processing {n_imgs} images.")
-        # Select only requested number of images to process darks.
-        instr_dc = instr_dc.select_trains(np.s_[:n_imgs])
+            n_trains = min(n_trains, max_trains)
+        print(f"Processing {n_trains} images.")

-        if n_imgs < min_trains:
-            raise ValueError(
-                f"Less than {min_trains} trains are available in RAW data."
-                " Not enough data to process darks.")
+        if n_trains == 0:
+            raise ValueError(f"{run_n} has no trains to process.")

+        if n_trains < min_trains:
+            warning(f"Less than {min_trains} trains are available in RAW data.")
+
+        # Select only requested number of images to process darks.
+        instr_dc = instr_dc.select_trains(np.s_[:n_trains])
        images = np.transpose(
            instr_dc[instrument_src, "data.adc"].ndarray(), (3, 2, 1, 0))
        acelltable = np.transpose(instr_dc[instrument_src, "data.memoryCell"].ndarray())
        gain_vals = np.transpose(
            instr_dc[instrument_src, "data.gain"].ndarray(), (3, 2, 1, 0))

        # define gain value as saved in raw gain map
        raw_g = 3 if gain == 2 else gain

        if memory_cells == 1:
            acelltable -= sc_start
        # Only for dynamic medium and low gain runs [forceswitchg1, forceswitchg2] in burst mode.

        if gain_mode == 0 and gain > 0 and memory_cells == 16:
            # 255 similar to the receiver which uses the 255
            # value to indicate a cell without an image.
            # image shape for forceswitchg1 and forceswitchg2 = (1024, 512, 2, trains)
            # compared to expected shape of (1024, 512, 16, trains) for high gain run.
            acelltable[1:] = 255

        # Calculate offset and noise maps
        context.map(process_cell, range(memory_cells))
-
+        del images
+        del acelltable
+        del gain_vals
    step_timer.done_step(f'Creating Offset and noise constants for a module.')
 ```

 %% Cell type:code id: tags:

 ``` python
 if detailed_report:
    display(Markdown("## Offset and Noise Maps:"))
    display(Markdown(
        "Below offset and noise maps for the high ($g_0$) gain stage are shown, "
        "alongside the distribution of these values. One expects block-like "
        "structures mapping to the ASICs of the detector"))
    g_name = ['G0', 'G1', 'G2']
    g_range = [(0, 8000), (8000, 16000), (8000, 16000)]
    n_range = [(0., 50.), (0., 50.), (0., 50.)]

    unit = '[ADCu]'
    # TODO: Fix plots arrangment and speed for Jungfrau burst mode.
    step_timer.start()
    for pdu, mod in zip(db_modules, karabo_da):
        for g_idx in gains:
            for cell in range(0, memory_cells):
                f_o0 = heatmapPlot(
                    np.swapaxes(offset_map[mod][..., cell, g_idx], 0, 1),
                    y_label="Row",
                    x_label="Column",
                    lut_label=unit,
                    aspect=1.,
                    vmin=g_range[g_idx][0],
                    vmax=g_range[g_idx][1],
                    title=f'Pedestal {g_name[g_idx]} - Cell {cell:02d} - Module {mod} ({pdu})')

                fo0, ax_o0 = plt.subplots()
                res_o0 = histPlot(
                    ax_o0, offset_map[mod][..., cell, g_idx],
                    bins=800,
                    range=g_range[g_idx],
                    facecolor='b',
                    histotype='stepfilled',
                )

                ax_o0.tick_params(axis='both',which='major',labelsize=15)
                ax_o0.set_title(
                    f'Module pedestal distribution - Cell {cell:02d} - Module {mod} ({pdu})',
                    fontsize=15)
                ax_o0.set_xlabel(f'Pedestal {g_name[g_idx]} {unit}',fontsize=15)
                ax_o0.set_yscale('log')

                f_n0 = heatmapPlot(
                    np.swapaxes(noise_map[mod][..., cell, g_idx], 0, 1),
                    y_label="Row",
                    x_label="Column",
                    lut_label= unit,
                    aspect=1.,
                    vmin=n_range[g_idx][0],
                    vmax=n_range[g_idx][1],
                    title=f"RMS noise {g_name[g_idx]} - Cell {cell:02d} - Module {mod} ({pdu})",
                )

                fn0, ax_n0 = plt.subplots()
                res_n0 = histPlot(
                    ax_n0,
                    noise_map[mod][..., cell, g_idx],
                    bins=100,
                    range=n_range[g_idx],
                    facecolor='b',
                    histotype='stepfilled',
                )

                ax_n0.tick_params(axis='both', which='major', labelsize=15)
                ax_n0.set_title(
                    f'Module noise distribution - Cell {cell:02d} - Module {mod} ({pdu})',
                    fontsize=15)
                ax_n0.set_xlabel(
                    f'RMS noise {g_name[g_idx]} ' + unit, fontsize=15)
                plt.show()
    step_timer.done_step(f'Plotting offset and noise maps.')
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixel Map ###

 The bad pixel map is deduced by comparing offset and noise of each pixel ($v_i$) and each gain ($g$) against the median value for that gain stage:

 $$
 v_i > \mathrm{median}(v_{k,g}) + n \sigma_{v_{k,g}}
 $$
 or
 $$
 v_i < \mathrm{median}(v_{k,g}) - n \sigma_{v_{k,g}}
 $$

 Values are encoded in a 32 bit mask, where for the dark image deduced bad pixels the following non-zero entries are relevant:

 %% Cell type:code id: tags:

 ``` python
 def print_bp_entry(bp):
    print("{:<30s} {:032b} -> {}".format(bp.name, bp.value, int(bp.value)))

 print_bp_entry(BadPixels.OFFSET_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.NOISE_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.OFFSET_NOISE_EVAL_ERROR)
 print_bp_entry(BadPixels.WRONG_GAIN_VALUE)

 def eval_bpidx(d):

    mdn = np.nanmedian(d, axis=(0, 1))[None, None, :, :]
    std = np.nanstd(d, axis=(0, 1))[None, None, :, :]
    idx = (d > badpixel_threshold_sigma*std+mdn) | (d < (-badpixel_threshold_sigma)*std+mdn)

    return idx
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 for pdu, mod in zip(db_modules, karabo_da):
    display(Markdown(f"### Badpixels for module {mod} ({pdu}):"))
    offset_abs_threshold = np.array(offset_abs_threshold)

    bad_pixels_map[mod][eval_bpidx(offset_map[mod])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value

    bad_pixels_map[mod][~np.isfinite(offset_map[mod])] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    bad_pixels_map[mod][eval_bpidx(noise_map[mod])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value

    bad_pixels_map[mod][~np.isfinite(noise_map[mod])] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    bad_pixels_map[mod][(offset_map[mod] < offset_abs_threshold[0][None, None, None, :]) | (offset_map[mod] > offset_abs_threshold[1][None, None, None, :])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value  # noqa

    if detailed_report:

        for g_idx in gains:
            for cell in range(memory_cells):
                bad_pixels = bad_pixels_map[mod][:, :, cell, g_idx]
                fn_0 = heatmapPlot(
                    np.swapaxes(bad_pixels, 0, 1),
                    y_label="Row",
                    x_label="Column",
                    lut_label=f"Badpixels {g_name[g_idx]} [ADCu]",
                    aspect=1.,
                    vmin=0, vmax=5,
                    title=f'G{g_idx} Bad pixel map - Cell {cell:02d} - Module {mod} ({pdu})')
 step_timer.done_step(f'Creating bad pixels constant')
 ```

 %% Cell type:markdown id: tags:

 ## Inject and save calibration constants

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 for mod, db_mod in zip(karabo_da, db_modules):
    constants = {
        'Offset': np.moveaxis(offset_map[mod], 0, 1),
        'Noise': np.moveaxis(noise_map[mod], 0, 1),
        'BadPixelsDark': np.moveaxis(bad_pixels_map[mod], 0, 1),
    }

    md = None

    for key, const_data in constants.items():

        const =  getattr(Constants.jungfrau, key)()
        const.data = const_data

        for parm in condition.parameters:
            if parm.name == "Integration Time":
                parm.lower_deviation = time_limits
                parm.upper_deviation = time_limits

        if db_output:
            md = send_to_db(
                db_module=db_mod,
                karabo_id=karabo_id,
                constant=const,
                condition=condition,
                file_loc=file_loc,
                report_path=report,
                cal_db_interface=cal_db_interface,
                creation_time=creation_time,
                timeout=cal_db_timeout,
            )
        if local_output:
            md = save_const_to_h5(
                db_module=db_mod,
                karabo_id=karabo_id,
                constant=const,
                condition=condition,
                data=const.data,
                file_loc=file_loc,
                report=report,
                creation_time=creation_time,
                out_folder=out_folder,
            )
            print(f"Calibration constant {key} is stored locally at {out_folder}.\n")

 print("Constants parameter conditions are:\n")
 print(
    f"• Bias voltage: {bias_voltage}\n"
    f"• Memory cells: {memory_cells}\n"
    f"• Integration time: {integration_time}\n"
    f"• Gain setting: {gain_setting}\n"
    f"• Gain mode: {gain_mode}\n"
    f"• Creation time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")  # noqa
 step_timer.done_step("Injecting constants.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 # now we need the old constants
 old_const = {}
 old_mdata = {}
 old_retrieval_res.wait()

 for (mod, const), (data, timestamp, filepath, h5path) in zip(
    mod_x_const, old_retrieval_res.get()):
    old_const.setdefault(mod, {})[const] = data
    old_mdata.setdefault(mod, {})[const] = {
        "timestamp": timestamp,
        "filepath": filepath,
        "h5path": h5path,
    }
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown("## The following pre-existing constants are used for comparison:"))

 for mod, consts in old_mdata.items():
    pdu = db_modules[karabo_da.index(mod)]
    display(Markdown(f"- {mod} ({pdu})"))
    for const in consts:
        display(Markdown(f"    - {const} at {consts[const]['timestamp']}"))
    # saving locations of old constants for summary notebook
    with open(f"{metadata_folder or out_folder}/module_metadata_{mod}.yml", "w") as fd:
        yaml.safe_dump(
            {
                "module": mod,
                "pdu": pdu,
                "old-constants": old_mdata[mod],
            },
            fd,
        )
 ```

--- a/notebooks/LPD/LPDChar_Darks_NBC.ipynb
+++ b/notebooks/LPD/LPDChar_Darks_NBC.ipynb
 %% Cell type:markdown id: tags:

 # LPD Offset, Noise and Dead Pixels Characterization #

 Author: M. Karnevskiy, S. Hauf

 This notebook performs re-characterize of dark images to derive offset, noise and bad-pixel maps. All three types of constants are evaluated per-pixel and per-memory cell.

 The notebook will correctly handle veto settings, but note that if you veto cells you will not be able to use these offsets for runs with different veto settings - vetoed cells will have zero offset.

 The evaluated calibration constants are stored locally and injected in the calibration data base.


 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/FXE/202030/p900121/raw" # path to input data, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/LPD/" # path to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequence = 0 # sequence files to evaluate
 modules = [-1] # list of modules to evaluate, RANGE ALLOWED
 run_high = 120 # run number in which high gain data was recorded, required
 run_med = 121 # run number in which medium gain data was recorded, required
 run_low = 122 # run number in which low gain data was recorded, required

 karabo_id = "FXE_DET_LPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = '/INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
 h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images

 use_dir_creation_date = True # use the creation date of the directory for database time derivation
 cal_db_interface = "tcp://max-exfl016:8015#8025" # the database interface to use
 cal_db_timeout = 300000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database

 capacitor_setting = 5 # capacitor_setting for which data was taken
 mem_cells = 512 # number of memory cells used
 bias_voltage = 250 # detector bias voltage
 thresholds_offset_sigma = 3. # bad pixel relative threshold in terms of n sigma offset
 thresholds_offset_hard = [400, 1500] # bad pixel hard threshold
 thresholds_noise_sigma = 7. # bad pixel relative threshold in terms of n sigma noise
 thresholds_noise_hard = [1, 35] # bad pixel hard threshold
 skip_first_ntrains = 10 # Number of first trains to skip

+# Parameters for plotting
+skip_plots = False  # exit after writing corrected files
+
 instrument = "FXE" # instrument name
 ntrains = 100 # number of trains to use
 high_res_badpix_3d = False # plot bad-pixel summary in high resolution
 test_for_normality = False # permorm normality test
+inject_cell_order = False  # Include memory cell order as part of the detector condition
 operation_mode = ''  # Detector operation mode, optional
 ```

 %% Cell type:code id: tags:

 ``` python
 import copy
 import multiprocessing
 import os
 import warnings
 from collections import OrderedDict
 from datetime import datetime
 from functools import partial
+from logging import warning

 warnings.filterwarnings('ignore')

 import dateutil.parser
 import h5py
 import matplotlib
 import pasha as psh
 import scipy.stats
 from IPython.display import Latex, Markdown, display

 matplotlib.use("agg")
 import matplotlib.patches as patches
 import matplotlib.pyplot as plt

 %matplotlib inline
 import numpy as np
 import tabulate
 import yaml
 from iCalibrationDB import Conditions, Constants, Detectors, Versions
 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.simpleplot import simplePlot

 from cal_tools.enums import BadPixels
 from cal_tools.plotting import (
    create_constant_overview,
    plot_badpix_3d,
    show_overview,
    show_processed_modules,
 )
 from cal_tools.tools import (
    get_dir_creation_date,
    get_from_db,
    get_notebook_name,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    map_gain_stages,
    module_index_to_qm,
    parse_runs,
    run_prop_seq_from_path,
    save_const_to_h5,
    send_to_db,
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 gains = np.arange(3)
 max_cells = mem_cells
 cells = np.arange(max_cells)
 gain_names = ['High', 'Medium', 'Low']

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(16))
    karabo_da = ['LPD{:02d}'.format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 gain_runs = OrderedDict()
 if capacitor_setting == 5:
    gain_runs["high_5pf"] = run_high
    gain_runs["med_5pf"] =  run_med
    gain_runs["low_5pf"] =  run_low
 elif capacitor_setting == 50:
    gain_runs["high_50pf"] = run_high
    gain_runs["med_50pf"] =  run_med
    gain_runs["low_50pf"] =  run_low

 capacitor_settings = [capacitor_setting]
 capacitor_settings = ['{}pf'.format(c) for c in capacitor_settings]

 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)
    print("Using {} as creation time".format(creation_time))

 run, prop, seq = run_prop_seq_from_path(in_folder)

 cal_db_interface = get_random_db_interface(cal_db_interface)

 display(Markdown('## Evaluated parameters'))
 print('CalDB Interface {}'.format(cal_db_interface))
 print("Proposal: {}".format(prop))
 print("Memory cells: {}/{}".format(mem_cells, max_cells))
 print("Runs: {}, {}, {}".format(run_high, run_med, run_low))
 print("Sequence: {}".format(sequence))
 print("Using DB: {}".format(db_output))
 print("Input: {}".format(in_folder))
 print("Output: {}".format(out_folder))
 print("Bias voltage: {}V".format(bias_voltage))
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 gmf = map_gain_stages(in_folder, gain_runs, path_template, karabo_da, [sequence])
 gain_mapped_files, total_sequences, total_file_size = gmf
 print(f"Will process a total of {total_sequences} files.")
 ```

 %% Cell type:markdown id: tags:

 ## Data processing

 %% Cell type:code id: tags:

 ``` python
 parallel_num_procs = min(6, len(modules)*3)
 parallel_num_threads = multiprocessing.cpu_count() // parallel_num_procs

 # the actual characterization
 def characterize_module(filename, channel, gg, cap):

    def splitOffGainLPD(d):
        msk = np.zeros(d.shape, np.uint16)
        msk[...] = 0b0000111111111111
        data = np.bitwise_and(d, msk)
        msk[...] = 0b0011000000000000
        gain = np.bitwise_and(d, msk)//4096
        gain[gain > 2] = 2
        return data, gain

    infile = h5py.File(filename, "r")

    instrument_src = h5path.format(channel)
    index_src = h5path_idx.format(channel)
    count = infile[f"{index_src}/count"][()]
    first = infile[f"{index_src}/first"][()]
    valid = count != 0
    count, first = count[valid], first[valid]
    first_image = int(first[skip_first_ntrains] if first.shape[0] > skip_first_ntrains else 0)
    last_image = int(first_image + np.sum(count[skip_first_ntrains:skip_first_ntrains+ntrains]))

    im = np.array(infile["{}/data".format(instrument_src, channel)][first_image:last_image, ...])
    cellid = np.squeeze(np.array(infile["{}/cellId".format(instrument_src, channel)][first_image:last_image, ...]))
    infile.close()
+    if im.shape[0] == 0:  # No data
+        return None, None, channel, gg, cap, None, None, None, None
+
+    cellid_pattern = cellid[:count[0]]

    im, g = splitOffGainLPD(im[:, 0, ...])
    im = im.astype(np.float32)

    im = np.rollaxis(im, 2)
    im = np.rollaxis(im, 2, 1)

    context = psh.context.ThreadContext(num_workers=parallel_num_threads)
    offset = context.alloc(shape=(im.shape[0], im.shape[1], max_cells), dtype=np.float64)
    noise = context.alloc(like=offset)
    normal_test = context.alloc(like=offset)
    def process_cell(worker_id, array_index, cc):
        idx = cellid == cc
        im_slice = im[..., idx]
        if np.any(idx):
            offset[..., cc] = np.median(im_slice, axis=2)
            noise[..., cc] = np.std(im_slice, axis=2)
            if test_for_normality:
                _, normal_test[..., cc] = scipy.stats.normaltest(
                    im[:, :, idx], axis=2)
    context.map(process_cell, np.unique(cellid))

    # bad pixels
    bp = np.zeros(offset.shape, np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0, 1))
    offset_std = np.nanstd(offset, axis=(0, 1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[(offset < thresholds_offset_hard[0]) | (
        offset > thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0, 1))
    noise_std = np.nanstd(noise, axis=(0, 1))

    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[(noise < thresholds_noise_hard[0]) | (
        noise > thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

-    idx = cellid == 12
-    return offset, noise, channel, gg, cap, bp, im[12, 12, idx], normal_test
+    idx = (cellid == cellid[0])
+    return offset, noise, channel, gg, cap, bp, im[12, 12, idx], normal_test, cellid_pattern
 ```

 %% Cell type:code id: tags:

 ``` python
 offset_g = OrderedDict()
 noise_g = OrderedDict()
 badpix_g = OrderedDict()
 data_g = OrderedDict()
 ntest_g = OrderedDict()
+# Should be the same cell order for all modules & all gain stages
+cellid_patterns_g = {}

 gg = 0
 old_cap = None
 start = datetime.now()
 inp = []

 for gain, mapped_files in gain_mapped_files.items():
    cap = gain.split("_")[1]
    if cap != old_cap:
        gg = 0
        old_cap = cap
        offset_g[cap] = OrderedDict()
        noise_g[cap] = OrderedDict()
        badpix_g[cap] = OrderedDict()
        data_g[cap] = OrderedDict()
        ntest_g[cap] = OrderedDict()
+        cellid_patterns_g[cap] = {}

    for i in modules:
        qm = module_index_to_qm(i)
        if qm in mapped_files and not mapped_files[qm].empty():
            fname_in = mapped_files[qm].get()
            print("Process file: ", fname_in)
            inp.append((fname_in, i, gg, cap))

    gg+=1

 with multiprocessing.Pool(processes=parallel_num_procs) as pool:
    results = pool.starmap(characterize_module, inp)

 for ir, r in enumerate(results):
-    offset, noise, i, gg, cap, bp, data, normal = r
+    offset, noise, i, gg, cap, bp, data, normal, cellid_pattern = r
+    if data is None:
+        warning(f"No data for module {i} of gain {gg}")
+        skip_plots = True
+        continue
    qm = module_index_to_qm(i)
    if qm not in offset_g[cap]:
        offset_g[cap][qm] = np.zeros(
            (offset.shape[0], offset.shape[1], offset.shape[2], 3))
        noise_g[cap][qm] = np.zeros_like(offset_g[cap][qm])
        badpix_g[cap][qm] = np.zeros_like(offset_g[cap][qm], dtype=np.uint32)
        data_g[cap][qm] = np.full((ntrains, 3), np.nan)
        ntest_g[cap][qm] = np.zeros_like(offset_g[cap][qm])
+        cellid_patterns_g[cap][qm] = cellid_pattern
+    else:
+        if not np.array_equal(cellid_pattern, cellid_patterns_g[cap][qm]):
+            raise ValueError("Inconsistent cell ID pattern between gain stages")
+

    offset_g[cap][qm][..., gg] = offset
    noise_g[cap][qm][..., gg] = noise
    badpix_g[cap][qm][..., gg] = bp
    data_g[cap][qm][:data.shape[0], gg] = data
    ntest_g[cap][qm][..., gg] = normal

    hn, cn = np.histogram(data, bins=20)
    print(f"{gain_names[gg]} gain, Capacitor {cap}, Module: {qm}. "
          f"Number of processed trains per cell: {data.shape[0]}.")
 ```

 %% Cell type:code id: tags:

 ``` python
+if skip_plots:
+    import sys
+    print('Skipping plots')
+    sys.exit(0)
+```
+
+%% Cell type:code id: tags:
+
+``` python
 # Read report path and create file location tuple to add with the injection
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_low, run_med, run_high)

 report = get_report(metadata_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 # TODO: add db_module when received from myMDC
 # Create the modules dict of karabo_das and PDUs
 qm_dict = OrderedDict()
 for i, k_da in zip(modules, karabo_da):
    qm = module_index_to_qm(i)
    qm_dict[qm] = {"karabo_da": k_da,
                   "db_module": ""}
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve existing constants for comparison
 clist = ["Offset", "Noise", "BadPixelsDark"]
 old_const = {}
 old_mdata = {}

 dinstance = "LPD1M1"
 detinst = getattr(Detectors, dinstance)
 print('Retrieve pre-existing constants for comparison.')
 for cap in capacitor_settings:
    old_const[cap] = {}
    old_mdata[cap] = {}
    for qm in offset_g[cap].keys():
        old_const[cap][qm] = {}
        old_mdata[cap][qm] = {}
        qm_db = qm_dict[qm]
        karabo_da = qm_db["karabo_da"]
+        cellid_pattern = cellid_patterns_g[cap][qm]
+        if inject_cell_order:
+            mem_cell_order = ",".join([str(c) for c in cellid_pattern]) + ","
+        else:
+            mem_cell_order = None

        condition = Conditions.Dark.LPD(memory_cells=max_cells,
                                        bias_voltage=bias_voltage,
-                                        capacitor=cap)
+                                        capacitor=cap,
+                                        memory_cell_order=mem_cell_order,
+                                       )
        for const in clist:
            constant = getattr(Constants.LPD, const)()
            if not qm_db["db_module"]:
                # This should be used in case of running notebook
                # by a different method other than myMDC which already
                # sends CalCat info.
                qm_db["db_module"] = get_pdu_from_db(karabo_id, [karabo_da], constant,
                                                     condition, cal_db_interface,
                                                     snapshot_at=creation_time)[0]

            data, mdata = get_from_db(karabo_id, karabo_da,
                                      constant,
                                      condition, None,
                                      cal_db_interface,
                                      creation_time=creation_time,
                                      verbosity=2, timeout=cal_db_timeout)

            old_const[cap][qm][const] = data

            if mdata is None or data is None:
                old_mdata[cap][qm][const] = {
                    "timestamp": "Not found",
                    "filepath": None,
                    "h5path": None
                }
            else:
                timestamp = mdata.calibration_constant_version.begin_at.isoformat()
                filepath = os.path.join(
                    mdata.calibration_constant_version.hdf5path,
                    mdata.calibration_constant_version.filename
                )
                h5path = mdata.calibration_constant_version.h5path
                old_mdata[cap][qm][const] = {
                    "timestamp": timestamp,
                    "filepath": filepath,
                    "h5path": h5path
                }

        with open(f"{out_folder}/module_metadata_{qm}.yml","w") as fd:
            yaml.safe_dump(
                {
                    "module": qm,
                    "pdu": qm_db["db_module"],
                    "old-constants": old_mdata[cap][qm]
                }, fd)
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()
 for cap in capacitor_settings:
    res[cap] = OrderedDict()
    for i in modules:
        qm = module_index_to_qm(i)

        res[cap][qm] = {'Offset': offset_g[cap][qm],
                        'Noise': noise_g[cap][qm],
                        'BadPixelsDark': badpix_g[cap][qm]
                        }
 ```

 %% Cell type:code id: tags:

 ``` python
 # Save constants in the calibration DB
 md = None
 for cap in capacitor_settings:
    for qm in res[cap]:

        karabo_da = qm_dict[qm]["karabo_da"]
        db_module = qm_dict[qm]["db_module"]
+        cellid_pattern = cellid_patterns_g[cap][qm]
+        if inject_cell_order:
+            mem_cell_order = ",".join([str(c) for c in cellid_pattern]) + ","
+        else:
+            mem_cell_order = None

        # Do not store empty constants
        # In case of 0 trains data_g is initiated with nans and never refilled.
        if np.count_nonzero(~np.isnan(data_g[cap][qm]))==0:
+            print(f"Constant ({cap}, {qm}) would be empty, skipping saving")
            continue
+
        for const in res[cap][qm]:

            dconst = getattr(Constants.LPD, const)()
            dconst.data = res[cap][qm][const]

            # set the operating condition
+
            condition = Conditions.Dark.LPD(memory_cells=max_cells,
-                                            bias_voltage=bias_voltage,
-                                            capacitor=cap)
+                                        bias_voltage=bias_voltage,
+                                        capacitor=cap,
+                                        memory_cell_order=mem_cell_order,
+                                       )

            if db_output:
                md = send_to_db(db_module, karabo_id, dconst, condition,
                                file_loc, report_path=report,
                                cal_db_interface=cal_db_interface,
                                creation_time=creation_time,
                                timeout=cal_db_timeout)

            if local_output:
                md = save_const_to_h5(db_module, karabo_id, dconst, condition,
                                      dconst.data, file_loc, report, creation_time, out_folder)
                print(f"Calibration constant {const} is stored locally.\n")

        print("Constants parameter conditions are:\n")
-        print(f"• memory_cells: {max_cells}\n• bias_voltage: {bias_voltage}\n"
+        print(f"• memory_cells: {max_cells}\n"
+              f"• bias_voltage: {bias_voltage}\n"
              f"• capacitor: {cap}\n"
+              f"• memory cell order: {mem_cell_order}\n"
              f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:code id: tags:

 ``` python
 show_processed_modules(
    dinstance=dinstance,
    constants=None,
    mnames=[module_index_to_qm(i) for i in modules],
    mode="position"
 )
 ```

 %% Cell type:markdown id: tags:

 ## Raw pedestal distribution ##

 Distribution of a pedestal (ADUs) over trains for the pixel (12,12), memory cell 12. A median of the distribution is shown in yellow. A standard deviation is shown in red. The green line shows average over all pixels for a given memory cell and gain stage.

 %% Cell type:code id: tags:

 ``` python
 fig, grid = plt.subplots(3, 1, sharex="col", sharey="row", figsize=(10, 7))
 fig.subplots_adjust(wspace=0, hspace=0)

 for cap in capacitor_settings:
    for i in modules:
        qm = module_index_to_qm(i)
        if np.count_nonzero(~np.isnan(data_g[cap][qm])) == 0:
            break
        for gain in range(3):
            data = data_g[cap][qm][:, gain]
            offset = np.nanmedian(data)
            noise = np.nanstd(data)
            xrange = [np.nanmin(data_g[cap][qm]), np.nanmax(data_g[cap][qm])]
            if xrange[1] == xrange[0]:
                xrange = [0, xrange[0]+xrange[0]//2]
                nbins = data_g[cap][qm].shape[0]
            else:
                nbins = int(xrange[1] - xrange[0])

            hn, cn = np.histogram(data, bins=nbins, range=xrange)

            grid[gain].hist(data, range=xrange, bins=nbins)
            grid[gain].plot([offset-noise, offset-noise], [0, np.nanmax(hn)],
                            linewidth=1.5, color='red',
                            label='1 $\sigma$ deviation')
            grid[gain].plot([offset+noise, offset+noise],
                            [0, np.nanmax(hn)], linewidth=1.5, color='red')
            grid[gain].plot([offset, offset], [0, 0],
                            linewidth=1.5, color='y', label='median')

            grid[gain].plot([np.nanmedian(offset_g[cap][qm][:, :, 12, gain]),
                             np.nanmedian(offset_g[cap][qm][:, :, 12, gain])],
                            [0, np.nanmax(hn)], linewidth=1.5, color='green',
                            label='average over pixels')

            grid[gain].set_xlim(xrange)
            grid[gain].set_ylim(0, np.nanmax(hn)*1.1)
            grid[gain].set_xlabel("Offset value [ADU]")
            grid[gain].set_ylabel("# of occurance")

            if gain == 0:
                leg = grid[gain].legend(
                    loc='upper center', ncol=3,
                    bbox_to_anchor=(0.1, 0.25, 0.7, 1.0))

            grid[gain].text(820, np.nanmax(hn)*0.4,
                            "{} gain".format(gain_names[gain]), fontsize=20)

            a = plt.axes([.125, .1, 0.775, .8], frame_on=False)
            a.patch.set_alpha(0.05)
            a.set_xlim(xrange)
            plt.plot([offset, offset], [0, 1], linewidth=1.5, color='y')
            plt.xticks([])
            plt.yticks([])

        ypos = 0.9
        x1pos = (np.nanmedian(data_g[cap][qm][:, 0]) +
                 np.nanmedian(data_g[cap][qm][:, 2]))/2.
        x2pos = (np.nanmedian(data_g[cap][qm][:, 2]) +
                 np.nanmedian(data_g[cap][qm][:, 1]))/2.-10

        plt.annotate("", xy=(np.nanmedian(data_g[cap][qm][:, 0]), ypos), xycoords='data',
                     xytext=(np.nanmedian(data_g[cap][qm][:, 2]), ypos), textcoords='data',
                     arrowprops=dict(arrowstyle="<->", connectionstyle="arc3"))

        plt.annotate('{}'.format(np.nanmedian(data_g[cap][qm][:, 0])-np.nanmedian(data_g[cap][qm][:, 2])),
                     xy=(x1pos, ypos), xycoords='data', xytext=(5, 5), textcoords='offset points')

        plt.annotate("", xy=(np.nanmedian(data_g[cap][qm][:, 2]), ypos), xycoords='data',
                     xytext=(np.nanmedian(data_g[cap][qm][:, 1]), ypos), textcoords='data',
                     arrowprops=dict(arrowstyle="<->", connectionstyle="arc3"))

        plt.annotate('{}'.format(np.nanmedian(data_g[cap][qm][:, 2])-np.nanmedian(data_g[cap][qm][:, 1])),
                     xy=(x2pos, ypos), xycoords='data', xytext=(5, 5), textcoords='offset points')

 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Normality test ##

 Distributions of raw pedestal values have been tested if they are normally distributed. A normality test have been performed for each pixel and each memory cell. Plots below show histogram of p-Values and a 2D distribution for the  memory cell 12.

 %% Cell type:code id: tags:

 ``` python
 # Loop over capacitor settings, modules, constants
 for cap in capacitor_settings:
    if not test_for_normality:
        print('Normality test was not requested. Flag `test_for_normality` False')
        break
    for i in modules:
        qm = module_index_to_qm(i)

        data = np.copy(ntest_g[cap][qm][:,:,:,:])
        data[badpix_g[cap][qm][:,:,:,:]>0] = 1.01

        hn,cn = np.histogram(data[:,:,:,0], bins=100)

        d = [{'x': np.arange(100)*0.01+0.01,
              'y': np.histogram(data[:,:,:,0], bins=100)[0],
              'drawstyle': 'steps-pre',
              'label' : 'High gain',
              },
             {'x': np.arange(100)*0.01+0.01,
              'y': np.histogram(data[:,:,:,1], bins=100)[0],
              'drawstyle': 'steps-pre',
              'label' : 'Medium gain',
              },
             {'x': np.arange(100)*0.01+0.01,
              'y': np.histogram(data[:,:,:,2], bins=100)[0],
              'drawstyle': 'steps-pre',
              'label' : 'Low gain',
              },
            ]


        fig = plt.figure(figsize=(15,15), tight_layout={'pad': 0.5, 'w_pad': 0.3})

        for gain in range(3):
            ax = fig.add_subplot(221+gain)
            heatmapPlot(data[:,:,12,gain], add_panels=False, cmap='viridis', figsize=(10,10),
                y_label='Rows', x_label='Columns',
                lut_label='p-Value',
                use_axis=ax,
                title='p-Value for cell 12, {} gain'.format(gain_names[gain]) )

        ax = fig.add_subplot(224)
        _ = simplePlot(d, #aspect=1.6,
                              x_label = "p-Value".format(gain),
                              y_label="# of occurance",
                              use_axis=ax,
                               y_log=False, legend='outside-top-ncol3-frame', legend_pad=0.05, legend_size='5%')
        ax.ticklabel_format(style='sci', axis='y', scilimits=(4,6))

 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on a sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:code id: tags:

 ``` python
 cell = 12
 for cap in capacitor_settings:
    for gain in range(3):
        display(
            Markdown('### Cell-12 overview - {} gain'.format(gain_names[gain])))

        fig = plt.figure(figsize=(18, 22) , tight_layout={'pad': 0.1, 'w_pad': 0.1})
        for qm in res[cap]:
            for iconst, const in enumerate(['Offset', 'Noise', 'BadPixelsDark']):

                ax = fig.add_subplot(321+iconst)

                data = res[cap][qm][const][:, :, 12, gain]
                vmax = 1.5 * np.nanmedian(res[cap][qm][const][:, :, 12, gain])
                title = const
                label = '{} value [ADU]'.format(const)
                title = '{} value'.format(const)
                if const == 'BadPixelsDark':
                    vmax = 4
                    bpix_code = data.astype(np.float32)
                    bpix_code[bpix_code == 0] = np.nan
                    title = 'Bad pixel code'
                    label = title

                    cb_labels = ['1 {}'.format(BadPixels.NOISE_OUT_OF_THRESHOLD.name),
                                 '2 {}'.format(BadPixels.OFFSET_NOISE_EVAL_ERROR.name),
                                 '3 {}'.format(BadPixels.OFFSET_OUT_OF_THRESHOLD.name),
                                 '4 {}'.format('MIXED')]

                    heatmapPlot(bpix_code, add_panels=False, cmap='viridis',
                                y_label='Rows', x_label='Columns',
                                lut_label='', vmax=vmax,
                                use_axis=ax, cb_ticklabels=cb_labels, cb_ticks = np.arange(4)+1,
                                title='{}'.format(title))
                    del bpix_code
                else:

                    heatmapPlot(data, add_panels=False, cmap='viridis',
                                y_label='Rows', x_label='Columns',
                                lut_label=label, vmax=vmax,
                                use_axis=ax,
                                title='{}'.format(title))

        for qm in res[cap]:
            for iconst, const in enumerate(['Offset', 'Noise']):
                data = res[cap][qm][const]
                dataBP = np.copy(data)
                dataBP[res[cap][qm]['BadPixelsDark'] > 0] = -1

                x_ranges = [[0, 1500], [0, 40]]
                hn, cn = np.histogram(
                    data[:, :, :, gain], bins=100, range=x_ranges[iconst])
                hnBP, cnBP = np.histogram(dataBP[:, :, :, gain], bins=cn)

                d = [{'x': cn[:-1],
                      'y': hn,
                      'drawstyle': 'steps-pre',
                      'label': 'All data',
                      },
                     {'x': cnBP[:-1],
                      'y': hnBP,
                      'drawstyle': 'steps-pre',
                      'label': 'Bad pixels masked',
                      },
                     ]

                ax = fig.add_subplot(325+iconst)
                _ = simplePlot(d, figsize=(5, 7), aspect=1,
                                    x_label="{} value [ADU]".format(const),
                                    y_label="# of occurance",
                                    title='', legend_pad=0.1, legend_size='10%',
                                    use_axis=ax,
                                    y_log=True, legend='outside-top-2col-frame')

        plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:


 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.NOISE_OUT_OF_THRESHOLD.value: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR.value: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value | BadPixels.NOISE_OUT_OF_THRESHOLD.value: ('MIXED', '#DD00DD80')}

 if high_res_badpix_3d:
    display(Markdown("""

    ## Global Bad Pixel Behaviour ##

    The following plots shows the results of a bad pixel evaluation for all evaluated memory cells.
    Cells are stacked in the Z-dimension, while pixels values in x/y are re-binned with a factor of 2.
    This excludes single bad pixels present only in disconnected pixels.
    Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
    Colors encode the bad pixel type, or mixed type.

        """))
    # Switch rebin to 1 for full resolution and
    # no interpolation for badpixel values.
    rebin = 2
    for gain in range(3):
        display(Markdown('### Bad pixel behaviour - {} gain ###'.format(gain_names[gain])))
        for cap in capacitor_settings:
            for mod, data in badpix_g[cap].items():
                plot_badpix_3d(data[...,gain], cols, title='', rebin_fac=rebin)
                ax = plt.gca()
                leg = ax.get_legend()
                leg.set(alpha=0.5)
        plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Summary across tiles ##

 Plots give an overview of calibration constants averaged across tiles. A bad pixel mask is applied. Constants are compared with pre-existing constants retrieved from the calibration database. Differences $\Delta$ between the old and new constants is shown.

 %% Cell type:code id: tags:

 ``` python
 time_summary = []
 for cap, cap_data in old_mdata.items():
    time_summary.append(f"The following pre-existing constants are used for comparison for capacitor setting **{cap}**:")
    for qm, qm_data in cap_data.items():
        time_summary.append(f"- Module {qm}")
        for const, const_data in qm_data.items():
            time_summary.append(f"    - {const} created at {const_data['timestamp']}")
 display(Markdown("\n".join(time_summary)))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Loop over capacitor settings, modules, constants
 for cap in res:
    for qm in res[cap]:
        for gain in range(3):
            display(Markdown('### Summary across tiles - {} gain'.format(gain_names[gain])))

            for const in res[cap][qm]:
                data = np.copy(res[cap][qm][const][:, :, :, gain])

                label = 'Fraction of bad pixels'

                if const != 'BadPixelsDark':
                    data[badpix_g[cap][qm][:, :, :, gain] > 0] = np.nan
                    label = '{} value [ADU]'.format(const)
                else:
                    data[data>0] = 1.0

                data = data.reshape(
                    int(data.shape[0] / 32),
                    32,
                    int(data.shape[1] / 128),
                    128,
                    data.shape[2])
                data = np.nanmean(data, axis=(1, 3)).swapaxes(
                    0, 2).reshape(512, 16)

                fig = plt.figure(figsize=(15, 6))
                ax = fig.add_subplot(121)

                _ = heatmapPlot(data[:510, :], add_panels=True,
                                y_label='Momery Cell ID', x_label='Tile ID',
                                lut_label=label, use_axis=ax,
                                panel_y_label=label, panel_x_label=label,
                                cmap='viridis',  # cb_loc='right',cb_aspect=15,
                                x_ticklabels=np.arange(16)+1,
                                x_ticks=np.arange(16)+0.5)

                if old_const[cap][qm][const] is not None:
                    ax = fig.add_subplot(122)

                    dataold = np.copy(old_const[cap][qm][const][:, :, :, gain])

                    label = '$\Delta$ {}'.format(label)

                    if const != 'BadPixelsDark':
                        if old_const[cap][qm]['BadPixelsDark'] is not None:
                            dataold[old_const[cap][qm]['BadPixelsDark'][:, :, :, gain] > 0] = np.nan
                        else:
                            dataold[:] = np.nan
                    else:
                        dataold[dataold>0]=1.0

                    dataold = dataold.reshape(
                        int(dataold.shape[0] / 32),
                        32,
                        int(dataold.shape[1] / 128),
                        128,
                        dataold.shape[2])
                    dataold = np.nanmean(dataold, axis=(
                        1, 3)).swapaxes(0, 2).reshape(512, 16)
                    dataold = dataold - data

                    _ = heatmapPlot(dataold[:510, :], add_panels=True,
                                    y_label='Momery Cell ID', x_label='Tile ID',
                                    lut_label=label, use_axis=ax,
                                    panel_y_label=label, panel_x_label=label,
                                    cmap='viridis',  # cb_loc='right',cb_aspect=15,
                                    x_ticklabels=np.arange(16)+1,
                                    x_ticks=np.arange(16)+0.5)
            plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Variation of offset and noise across Tiles and ASICs ##

 The following plots show a standard deviation $\sigma$ of the calibration constant. The plot of standard deviations across tiles show pixels of one tile ($128 \times 32$). Value for each pixel shows a standard deviation across 16 tiles. The standard deviation across ASICs are shown overall tiles. The plot shows pixels of one ASIC ($16 \times 32$), where the value shows a standard deviation across all ACIS of the module.

 %% Cell type:code id: tags:

 ``` python
 # Loop over capacitor settings, modules, constants
 for cap in res:
    for qm in res[cap]:
        for gain in range(3):
            display(Markdown('### Variation of offset and noise across ASICs - {} gain'.format(gain_names[gain])))

            fig = plt.figure(figsize=(15, 6))
            for iconst, const in enumerate(['Offset', 'Noise']):
                data = np.copy(res[cap][qm][const][:, :, :, gain])
                data[badpix_g[cap][qm][:, :, :, gain] > 0] = np.nan
                label = '$\sigma$ {} [ADU]'.format(const)

                dataA = np.nanmean(data, axis=2)  # average over cells
                dataA = dataA.reshape(8, 32, 16, 16)
                dataA = np.nanstd(dataA, axis=(0, 2))  # average across ASICs

                ax = fig.add_subplot(121+iconst)
                _ = heatmapPlot(dataA, add_panels=True,
                                y_label='rows', x_label='columns',
                                lut_label=label, use_axis=ax,
                                panel_y_label=label, panel_x_label=label,
                                cmap='viridis'
                                )

            plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Loop over capacitor settings, modules, constants
 for cap in res:
    for qm in res[cap]:
        for gain in range(3):
            display(Markdown('### Variation of offset and noise across tiles - {} gain'.format(gain_names[gain])))

            fig = plt.figure(figsize=(15, 6))
            for iconst, const in enumerate(['Offset', 'Noise']):
                data = np.copy(res[cap][qm][const][:, :, :, gain])
                data[badpix_g[cap][qm][:, :, :, gain] > 0] = np.nan
                label = '$\sigma$ {} [ADU]'.format(const)

                dataT = data.reshape(
                    int(data.shape[0] / 32),
                    32,
                    int(data.shape[1] / 128),
                    128,
                    data.shape[2])
                dataT = np.nanstd(dataT, axis=(0, 2))
                dataT = np.nanmean(dataT, axis=2)

                ax = fig.add_subplot(121+iconst)
                _ = heatmapPlot(dataT, add_panels=True,
                                y_label='rows', x_label='columns',
                                lut_label=label, use_axis=ax,
                                panel_y_label=label, panel_x_label=label,
                                cmap='viridis')
            plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Aggregate values and per cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per-cell behavior, averaged across pixels.

 %% Cell type:code id: tags:

 ``` python
 # Loop over capacitor settings, modules, constants
 for cap in res:
    for qm in res[cap]:
        for gain in range(3):
            display(Markdown('### Mean over pixels - {} gain'.format(gain_names[gain])))

            fig = plt.figure(figsize=(9,11))

            for iconst, const in enumerate(res[cap][qm]):

                ax = fig.add_subplot(311+iconst)

                data = res[cap][qm][const][:,:,:510,gain]
                if const == 'BadPixelsDark':
                    data[data>0] = 1.0

                dataBP = np.copy(data)
                dataBP[badpix_g[cap][qm][:,:,:510,gain]>0] = -10

                data = np.nanmean(data, axis=(0,1))
                dataBP = np.nanmean(dataBP, axis=(0,1))

                d = [{'y': data,
                      'x': np.arange(data.shape[0]),
                      'drawstyle': 'steps-mid',
                      'label' : 'All data'
                     }
                    ]

                if const != 'BadPixelsDark':
                    d.append({'y': dataBP,
                      'x': np.arange(data.shape[0]),
                      'drawstyle': 'steps-mid',
                      'label' : 'good pixels only'
                     })
                    y_title = "{} value [ADU]".format(const)
                    title = "{} value, {} gain".format(const, gain_names[gain])
                else:
                    y_title = "Fraction of Bad Pixels"
                    title = "Fraction of Bad Pixels, {} gain".format(gain_names[gain])

                data_min = np.min([data, dataBP])if const != 'BadPixelsDark' else np.min([data])
                data_max = np.max([data[20:], dataBP[20:]])
                data_dif = data_max - data_min

                local_max = np.max([data[200:300], dataBP[200:300]])
                frac = 0.35
                new_max = (local_max - data_min*(1-frac))/frac
                new_max = np.max([data_max, new_max])

                _ = simplePlot(d, figsize=(10,10), aspect=2, xrange=(-12, 510),
                                  x_label = 'Memory Cell ID',
                                  y_label=y_title, use_axis=ax,
                                  title=title,
                                  title_position=[0.5, 1.15],
                                  inset='xy-coord-right', inset_x_range=(0,20), inset_indicated=True,
                                  inset_labeled=True, inset_coord=[0.2,0.5,0.6,0.95],
                                    inset_lw = 1.0, y_range = [data_min-data_dif*0.05, new_max+data_dif*0.05],
                                  y_log=False, legend='outside-top-ncol2-frame', legend_size='18%',
                                     legend_pad=0.00)

                plt.tight_layout(pad=1.08, h_pad=0.35)

            plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 table = []
 bits = [BadPixels.NOISE_OUT_OF_THRESHOLD, BadPixels.OFFSET_OUT_OF_THRESHOLD, BadPixels.OFFSET_NOISE_EVAL_ERROR]
 for cap in res:
    for qm in res[cap]:
        for gain in range(3):

            l_data = []
            l_data_old = []

            data = np.copy(res[cap][qm]['BadPixelsDark'][:,:,:,gain])
            l_data.append(len(data[data>0].flatten()))
            for bit in bits:
                l_data.append(np.count_nonzero(badpix_g[cap][qm][:,:,:,gain] & bit.value))

            if old_const[cap][qm]['BadPixelsDark'] is not None:
                old_const[cap][qm]['BadPixelsDark'] = old_const[cap][qm]['BadPixelsDark'].astype(np.uint32)
                dataold = np.copy(old_const[cap][qm]['BadPixelsDark'][:, :, :, gain])
                l_data_old.append(len(dataold[dataold>0].flatten()))
                for bit in bits:
                    l_data_old.append(np.count_nonzero(old_const[cap][qm]['BadPixelsDark'][:, :, :, gain] & bit.value))

            l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',
                           'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR']

            l_threshold = ['', f'{thresholds_noise_sigma}', f'{thresholds_offset_sigma}',
                           f'{thresholds_offset_hard}/{thresholds_noise_hard}']

            for i in range(len(l_data)):
                line = [f'{l_data_name[i]}, gain {gain_names[gain]}', l_threshold[i], l_data[i]]

                if old_const[cap][qm]['BadPixelsDark'] is not None:
                    line += [l_data_old[i]]
                else:
                    line += ['-']

                table.append(line)
            table.append(['', '', '', ''])

 display(Markdown('''

 ### Number of bad pixels ###

 One pixel can be bad for different reasons, therefore, the sum of all types of bad pixels can be more than the number of all bad pixels.

 '''))
 if len(table)>0:
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Pixel type", "Threshold",
                                              "New constant", "Old constant"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 for const in ['Offset', 'Noise']:
    table = [['','High gain', 'High gain', 'Medium gain', 'Medium gain', 'Low gain', 'Low gain']]
    for cap in res:
        for qm in res[cap]:

            data = np.copy(res[cap][qm][const])
            data[res[cap][qm]['BadPixelsDark']>0] = np.nan

            if old_const[cap][qm][const] is not None and old_const[cap][qm]['BadPixelsDark'] is not None :
                dataold = np.copy(old_const[cap][qm][const])
                dataold[old_const[cap][qm]['BadPixelsDark']>0] = np.nan

            f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
            n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

            for i, f in enumerate(f_list):
                line = [n_list[i]]
                for gain in range(3):
                    line.append('{:6.1f}'.format(f(data[...,gain])))
                    if old_const[cap][qm][const] is not None and old_const[cap][qm]['BadPixelsDark'] is not None:
                        line.append('{:6.1f}'.format(f(dataold[...,gain])))
                    else:
                        line.append('-')

                table.append(line)

    display(Markdown('### {} [ADU], good pixels only ###'.format(const)))
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
 ```

 %% Cell type:markdown id: tags:

 # LPD Offset, Noise and Dead Pixels Characterization #

 Author: M. Karnevskiy, S. Hauf

 This notebook performs re-characterize of dark images to derive offset, noise and bad-pixel maps. All three types of constants are evaluated per-pixel and per-memory cell.

 The notebook will correctly handle veto settings, but note that if you veto cells you will not be able to use these offsets for runs with different veto settings - vetoed cells will have zero offset.

 The evaluated calibration constants are stored locally and injected in the calibration data base.


 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/FXE/202030/p900121/raw" # path to input data, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/LPD/" # path to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequence = 0 # sequence files to evaluate
 modules = [-1] # list of modules to evaluate, RANGE ALLOWED
 run_high = 120 # run number in which high gain data was recorded, required
 run_med = 121 # run number in which medium gain data was recorded, required
 run_low = 122 # run number in which low gain data was recorded, required

 karabo_id = "FXE_DET_LPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = '/INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
 h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images

 use_dir_creation_date = True # use the creation date of the directory for database time derivation
 cal_db_interface = "tcp://max-exfl016:8015#8025" # the database interface to use
 cal_db_timeout = 300000 # timeout on caldb requests"
 local_output = True # output constants locally
 db_output = False # output constants to database

 capacitor_setting = 5 # capacitor_setting for which data was taken
 mem_cells = 512 # number of memory cells used
 bias_voltage = 250 # detector bias voltage
 thresholds_offset_sigma = 3. # bad pixel relative threshold in terms of n sigma offset
 thresholds_offset_hard = [400, 1500] # bad pixel hard threshold
 thresholds_noise_sigma = 7. # bad pixel relative threshold in terms of n sigma noise
 thresholds_noise_hard = [1, 35] # bad pixel hard threshold
 skip_first_ntrains = 10 # Number of first trains to skip

+# Parameters for plotting
+skip_plots = False  # exit after writing corrected files
+
 instrument = "FXE" # instrument name
 ntrains = 100 # number of trains to use
 high_res_badpix_3d = False # plot bad-pixel summary in high resolution
 test_for_normality = False # permorm normality test
+inject_cell_order = False  # Include memory cell order as part of the detector condition
 operation_mode = ''  # Detector operation mode, optional
 ```

 %% Cell type:code id: tags:

 ``` python
 import copy
 import multiprocessing
 import os
 import warnings
 from collections import OrderedDict
 from datetime import datetime
 from functools import partial
+from logging import warning

 warnings.filterwarnings('ignore')

 import dateutil.parser
 import h5py
 import matplotlib
 import pasha as psh
 import scipy.stats
 from IPython.display import Latex, Markdown, display

 matplotlib.use("agg")
 import matplotlib.patches as patches
 import matplotlib.pyplot as plt

 %matplotlib inline
 import numpy as np
 import tabulate
 import yaml
 from iCalibrationDB import Conditions, Constants, Detectors, Versions
 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.simpleplot import simplePlot

 from cal_tools.enums import BadPixels
 from cal_tools.plotting import (
    create_constant_overview,
    plot_badpix_3d,
    show_overview,
    show_processed_modules,
 )
 from cal_tools.tools import (
    get_dir_creation_date,
    get_from_db,
    get_notebook_name,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    map_gain_stages,
    module_index_to_qm,
    parse_runs,
    run_prop_seq_from_path,
    save_const_to_h5,
    send_to_db,
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 gains = np.arange(3)
 max_cells = mem_cells
 cells = np.arange(max_cells)
 gain_names = ['High', 'Medium', 'Low']

 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(16))
    karabo_da = ['LPD{:02d}'.format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 gain_runs = OrderedDict()
 if capacitor_setting == 5:
    gain_runs["high_5pf"] = run_high
    gain_runs["med_5pf"] =  run_med
    gain_runs["low_5pf"] =  run_low
 elif capacitor_setting == 50:
    gain_runs["high_50pf"] = run_high
    gain_runs["med_50pf"] =  run_med
    gain_runs["low_50pf"] =  run_low

 capacitor_settings = [capacitor_setting]
 capacitor_settings = ['{}pf'.format(c) for c in capacitor_settings]

 h5path = h5path.format(karabo_id, receiver_id)
 h5path_idx = h5path_idx.format(karabo_id, receiver_id)

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)
    print("Using {} as creation time".format(creation_time))

 run, prop, seq = run_prop_seq_from_path(in_folder)

 cal_db_interface = get_random_db_interface(cal_db_interface)

 display(Markdown('## Evaluated parameters'))
 print('CalDB Interface {}'.format(cal_db_interface))
 print("Proposal: {}".format(prop))
 print("Memory cells: {}/{}".format(mem_cells, max_cells))
 print("Runs: {}, {}, {}".format(run_high, run_med, run_low))
 print("Sequence: {}".format(sequence))
 print("Using DB: {}".format(db_output))
 print("Input: {}".format(in_folder))
 print("Output: {}".format(out_folder))
 print("Bias voltage: {}V".format(bias_voltage))
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 gmf = map_gain_stages(in_folder, gain_runs, path_template, karabo_da, [sequence])
 gain_mapped_files, total_sequences, total_file_size = gmf
 print(f"Will process a total of {total_sequences} files.")
 ```

 %% Cell type:markdown id: tags:

 ## Data processing

 %% Cell type:code id: tags:

 ``` python
 parallel_num_procs = min(6, len(modules)*3)
 parallel_num_threads = multiprocessing.cpu_count() // parallel_num_procs

 # the actual characterization
 def characterize_module(filename, channel, gg, cap):

    def splitOffGainLPD(d):
        msk = np.zeros(d.shape, np.uint16)
        msk[...] = 0b0000111111111111
        data = np.bitwise_and(d, msk)
        msk[...] = 0b0011000000000000
        gain = np.bitwise_and(d, msk)//4096
        gain[gain > 2] = 2
        return data, gain

    infile = h5py.File(filename, "r")

    instrument_src = h5path.format(channel)
    index_src = h5path_idx.format(channel)
    count = infile[f"{index_src}/count"][()]
    first = infile[f"{index_src}/first"][()]
    valid = count != 0
    count, first = count[valid], first[valid]
    first_image = int(first[skip_first_ntrains] if first.shape[0] > skip_first_ntrains else 0)
    last_image = int(first_image + np.sum(count[skip_first_ntrains:skip_first_ntrains+ntrains]))

    im = np.array(infile["{}/data".format(instrument_src, channel)][first_image:last_image, ...])
    cellid = np.squeeze(np.array(infile["{}/cellId".format(instrument_src, channel)][first_image:last_image, ...]))
    infile.close()
+    if im.shape[0] == 0:  # No data
+        return None, None, channel, gg, cap, None, None, None, None
+
+    cellid_pattern = cellid[:count[0]]

    im, g = splitOffGainLPD(im[:, 0, ...])
    im = im.astype(np.float32)

    im = np.rollaxis(im, 2)
    im = np.rollaxis(im, 2, 1)

    context = psh.context.ThreadContext(num_workers=parallel_num_threads)
    offset = context.alloc(shape=(im.shape[0], im.shape[1], max_cells), dtype=np.float64)
    noise = context.alloc(like=offset)
    normal_test = context.alloc(like=offset)
    def process_cell(worker_id, array_index, cc):
        idx = cellid == cc
        im_slice = im[..., idx]
        if np.any(idx):
            offset[..., cc] = np.median(im_slice, axis=2)
            noise[..., cc] = np.std(im_slice, axis=2)
            if test_for_normality:
                _, normal_test[..., cc] = scipy.stats.normaltest(
                    im[:, :, idx], axis=2)
    context.map(process_cell, np.unique(cellid))

    # bad pixels
    bp = np.zeros(offset.shape, np.uint32)
    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0, 1))
    offset_std = np.nanstd(offset, axis=(0, 1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[(offset < thresholds_offset_hard[0]) | (
        offset > thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0, 1))
    noise_std = np.nanstd(noise, axis=(0, 1))

    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[(noise < thresholds_noise_hard[0]) | (
        noise > thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

-    idx = cellid == 12
-    return offset, noise, channel, gg, cap, bp, im[12, 12, idx], normal_test
+    idx = (cellid == cellid[0])
+    return offset, noise, channel, gg, cap, bp, im[12, 12, idx], normal_test, cellid_pattern
 ```

 %% Cell type:code id: tags:

 ``` python
 offset_g = OrderedDict()
 noise_g = OrderedDict()
 badpix_g = OrderedDict()
 data_g = OrderedDict()
 ntest_g = OrderedDict()
+# Should be the same cell order for all modules & all gain stages
+cellid_patterns_g = {}

 gg = 0
 old_cap = None
 start = datetime.now()
 inp = []

 for gain, mapped_files in gain_mapped_files.items():
    cap = gain.split("_")[1]
    if cap != old_cap:
        gg = 0
        old_cap = cap
        offset_g[cap] = OrderedDict()
        noise_g[cap] = OrderedDict()
        badpix_g[cap] = OrderedDict()
        data_g[cap] = OrderedDict()
        ntest_g[cap] = OrderedDict()
+        cellid_patterns_g[cap] = {}

    for i in modules:
        qm = module_index_to_qm(i)
        if qm in mapped_files and not mapped_files[qm].empty():
            fname_in = mapped_files[qm].get()
            print("Process file: ", fname_in)
            inp.append((fname_in, i, gg, cap))

    gg+=1

 with multiprocessing.Pool(processes=parallel_num_procs) as pool:
    results = pool.starmap(characterize_module, inp)

 for ir, r in enumerate(results):
-    offset, noise, i, gg, cap, bp, data, normal = r
+    offset, noise, i, gg, cap, bp, data, normal, cellid_pattern = r
+    if data is None:
+        warning(f"No data for module {i} of gain {gg}")
+        skip_plots = True
+        continue
    qm = module_index_to_qm(i)
    if qm not in offset_g[cap]:
        offset_g[cap][qm] = np.zeros(
            (offset.shape[0], offset.shape[1], offset.shape[2], 3))
        noise_g[cap][qm] = np.zeros_like(offset_g[cap][qm])
        badpix_g[cap][qm] = np.zeros_like(offset_g[cap][qm], dtype=np.uint32)
        data_g[cap][qm] = np.full((ntrains, 3), np.nan)
        ntest_g[cap][qm] = np.zeros_like(offset_g[cap][qm])
+        cellid_patterns_g[cap][qm] = cellid_pattern
+    else:
+        if not np.array_equal(cellid_pattern, cellid_patterns_g[cap][qm]):
+            raise ValueError("Inconsistent cell ID pattern between gain stages")
+

    offset_g[cap][qm][..., gg] = offset
    noise_g[cap][qm][..., gg] = noise
    badpix_g[cap][qm][..., gg] = bp
    data_g[cap][qm][:data.shape[0], gg] = data
    ntest_g[cap][qm][..., gg] = normal

    hn, cn = np.histogram(data, bins=20)
    print(f"{gain_names[gg]} gain, Capacitor {cap}, Module: {qm}. "
          f"Number of processed trains per cell: {data.shape[0]}.")
 ```

 %% Cell type:code id: tags:

 ``` python
+if skip_plots:
+    import sys
+    print('Skipping plots')
+    sys.exit(0)
+```
+
+%% Cell type:code id: tags:
+
+``` python
 # Read report path and create file location tuple to add with the injection
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_low, run_med, run_high)

 report = get_report(metadata_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 # TODO: add db_module when received from myMDC
 # Create the modules dict of karabo_das and PDUs
 qm_dict = OrderedDict()
 for i, k_da in zip(modules, karabo_da):
    qm = module_index_to_qm(i)
    qm_dict[qm] = {"karabo_da": k_da,
                   "db_module": ""}
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve existing constants for comparison
 clist = ["Offset", "Noise", "BadPixelsDark"]
 old_const = {}
 old_mdata = {}

 dinstance = "LPD1M1"
 detinst = getattr(Detectors, dinstance)
 print('Retrieve pre-existing constants for comparison.')
 for cap in capacitor_settings:
    old_const[cap] = {}
    old_mdata[cap] = {}
    for qm in offset_g[cap].keys():
        old_const[cap][qm] = {}
        old_mdata[cap][qm] = {}
        qm_db = qm_dict[qm]
        karabo_da = qm_db["karabo_da"]
+        cellid_pattern = cellid_patterns_g[cap][qm]
+        if inject_cell_order:
+            mem_cell_order = ",".join([str(c) for c in cellid_pattern]) + ","
+        else:
+            mem_cell_order = None

        condition = Conditions.Dark.LPD(memory_cells=max_cells,
                                        bias_voltage=bias_voltage,
-                                        capacitor=cap)
+                                        capacitor=cap,
+                                        memory_cell_order=mem_cell_order,
+                                       )
        for const in clist:
            constant = getattr(Constants.LPD, const)()
            if not qm_db["db_module"]:
                # This should be used in case of running notebook
                # by a different method other than myMDC which already
                # sends CalCat info.
                qm_db["db_module"] = get_pdu_from_db(karabo_id, [karabo_da], constant,
                                                     condition, cal_db_interface,
                                                     snapshot_at=creation_time)[0]

            data, mdata = get_from_db(karabo_id, karabo_da,
                                      constant,
                                      condition, None,
                                      cal_db_interface,
                                      creation_time=creation_time,
                                      verbosity=2, timeout=cal_db_timeout)

            old_const[cap][qm][const] = data

            if mdata is None or data is None:
                old_mdata[cap][qm][const] = {
                    "timestamp": "Not found",
                    "filepath": None,
                    "h5path": None
                }
            else:
                timestamp = mdata.calibration_constant_version.begin_at.isoformat()
                filepath = os.path.join(
                    mdata.calibration_constant_version.hdf5path,
                    mdata.calibration_constant_version.filename
                )
                h5path = mdata.calibration_constant_version.h5path
                old_mdata[cap][qm][const] = {
                    "timestamp": timestamp,
                    "filepath": filepath,
                    "h5path": h5path
                }

        with open(f"{out_folder}/module_metadata_{qm}.yml","w") as fd:
            yaml.safe_dump(
                {
                    "module": qm,
                    "pdu": qm_db["db_module"],
                    "old-constants": old_mdata[cap][qm]
                }, fd)
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()
 for cap in capacitor_settings:
    res[cap] = OrderedDict()
    for i in modules:
        qm = module_index_to_qm(i)

        res[cap][qm] = {'Offset': offset_g[cap][qm],
                        'Noise': noise_g[cap][qm],
                        'BadPixelsDark': badpix_g[cap][qm]
                        }
 ```

 %% Cell type:code id: tags:

 ``` python
 # Save constants in the calibration DB
 md = None
 for cap in capacitor_settings:
    for qm in res[cap]:

        karabo_da = qm_dict[qm]["karabo_da"]
        db_module = qm_dict[qm]["db_module"]
+        cellid_pattern = cellid_patterns_g[cap][qm]
+        if inject_cell_order:
+            mem_cell_order = ",".join([str(c) for c in cellid_pattern]) + ","
+        else:
+            mem_cell_order = None

        # Do not store empty constants
        # In case of 0 trains data_g is initiated with nans and never refilled.
        if np.count_nonzero(~np.isnan(data_g[cap][qm]))==0:
+            print(f"Constant ({cap}, {qm}) would be empty, skipping saving")
            continue
+
        for const in res[cap][qm]:

            dconst = getattr(Constants.LPD, const)()
            dconst.data = res[cap][qm][const]

            # set the operating condition
+
            condition = Conditions.Dark.LPD(memory_cells=max_cells,
-                                            bias_voltage=bias_voltage,
-                                            capacitor=cap)
+                                        bias_voltage=bias_voltage,
+                                        capacitor=cap,
+                                        memory_cell_order=mem_cell_order,
+                                       )

            if db_output:
                md = send_to_db(db_module, karabo_id, dconst, condition,
                                file_loc, report_path=report,
                                cal_db_interface=cal_db_interface,
                                creation_time=creation_time,
                                timeout=cal_db_timeout)

            if local_output:
                md = save_const_to_h5(db_module, karabo_id, dconst, condition,
                                      dconst.data, file_loc, report, creation_time, out_folder)
                print(f"Calibration constant {const} is stored locally.\n")

        print("Constants parameter conditions are:\n")
-        print(f"• memory_cells: {max_cells}\n• bias_voltage: {bias_voltage}\n"
+        print(f"• memory_cells: {max_cells}\n"
+              f"• bias_voltage: {bias_voltage}\n"
              f"• capacitor: {cap}\n"
+              f"• memory cell order: {mem_cell_order}\n"
              f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```

 %% Cell type:code id: tags:

 ``` python
 show_processed_modules(
    dinstance=dinstance,
    constants=None,
    mnames=[module_index_to_qm(i) for i in modules],
    mode="position"
 )
 ```

 %% Cell type:markdown id: tags:

 ## Raw pedestal distribution ##

 Distribution of a pedestal (ADUs) over trains for the pixel (12,12), memory cell 12. A median of the distribution is shown in yellow. A standard deviation is shown in red. The green line shows average over all pixels for a given memory cell and gain stage.

 %% Cell type:code id: tags:

 ``` python
 fig, grid = plt.subplots(3, 1, sharex="col", sharey="row", figsize=(10, 7))
 fig.subplots_adjust(wspace=0, hspace=0)

 for cap in capacitor_settings:
    for i in modules:
        qm = module_index_to_qm(i)
        if np.count_nonzero(~np.isnan(data_g[cap][qm])) == 0:
            break
        for gain in range(3):
            data = data_g[cap][qm][:, gain]
            offset = np.nanmedian(data)
            noise = np.nanstd(data)
            xrange = [np.nanmin(data_g[cap][qm]), np.nanmax(data_g[cap][qm])]
            if xrange[1] == xrange[0]:
                xrange = [0, xrange[0]+xrange[0]//2]
                nbins = data_g[cap][qm].shape[0]
            else:
                nbins = int(xrange[1] - xrange[0])

            hn, cn = np.histogram(data, bins=nbins, range=xrange)

            grid[gain].hist(data, range=xrange, bins=nbins)
            grid[gain].plot([offset-noise, offset-noise], [0, np.nanmax(hn)],
                            linewidth=1.5, color='red',
                            label='1 $\sigma$ deviation')
            grid[gain].plot([offset+noise, offset+noise],
                            [0, np.nanmax(hn)], linewidth=1.5, color='red')
            grid[gain].plot([offset, offset], [0, 0],
                            linewidth=1.5, color='y', label='median')

            grid[gain].plot([np.nanmedian(offset_g[cap][qm][:, :, 12, gain]),
                             np.nanmedian(offset_g[cap][qm][:, :, 12, gain])],
                            [0, np.nanmax(hn)], linewidth=1.5, color='green',
                            label='average over pixels')

            grid[gain].set_xlim(xrange)
            grid[gain].set_ylim(0, np.nanmax(hn)*1.1)
            grid[gain].set_xlabel("Offset value [ADU]")
            grid[gain].set_ylabel("# of occurance")

            if gain == 0:
                leg = grid[gain].legend(
                    loc='upper center', ncol=3,
                    bbox_to_anchor=(0.1, 0.25, 0.7, 1.0))

            grid[gain].text(820, np.nanmax(hn)*0.4,
                            "{} gain".format(gain_names[gain]), fontsize=20)

            a = plt.axes([.125, .1, 0.775, .8], frame_on=False)
            a.patch.set_alpha(0.05)
            a.set_xlim(xrange)
            plt.plot([offset, offset], [0, 1], linewidth=1.5, color='y')
            plt.xticks([])
            plt.yticks([])

        ypos = 0.9
        x1pos = (np.nanmedian(data_g[cap][qm][:, 0]) +
                 np.nanmedian(data_g[cap][qm][:, 2]))/2.
        x2pos = (np.nanmedian(data_g[cap][qm][:, 2]) +
                 np.nanmedian(data_g[cap][qm][:, 1]))/2.-10

        plt.annotate("", xy=(np.nanmedian(data_g[cap][qm][:, 0]), ypos), xycoords='data',
                     xytext=(np.nanmedian(data_g[cap][qm][:, 2]), ypos), textcoords='data',
                     arrowprops=dict(arrowstyle="<->", connectionstyle="arc3"))

        plt.annotate('{}'.format(np.nanmedian(data_g[cap][qm][:, 0])-np.nanmedian(data_g[cap][qm][:, 2])),
                     xy=(x1pos, ypos), xycoords='data', xytext=(5, 5), textcoords='offset points')

        plt.annotate("", xy=(np.nanmedian(data_g[cap][qm][:, 2]), ypos), xycoords='data',
                     xytext=(np.nanmedian(data_g[cap][qm][:, 1]), ypos), textcoords='data',
                     arrowprops=dict(arrowstyle="<->", connectionstyle="arc3"))

        plt.annotate('{}'.format(np.nanmedian(data_g[cap][qm][:, 2])-np.nanmedian(data_g[cap][qm][:, 1])),
                     xy=(x2pos, ypos), xycoords='data', xytext=(5, 5), textcoords='offset points')

 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Normality test ##

 Distributions of raw pedestal values have been tested if they are normally distributed. A normality test have been performed for each pixel and each memory cell. Plots below show histogram of p-Values and a 2D distribution for the  memory cell 12.

 %% Cell type:code id: tags:

 ``` python
 # Loop over capacitor settings, modules, constants
 for cap in capacitor_settings:
    if not test_for_normality:
        print('Normality test was not requested. Flag `test_for_normality` False')
        break
    for i in modules:
        qm = module_index_to_qm(i)

        data = np.copy(ntest_g[cap][qm][:,:,:,:])
        data[badpix_g[cap][qm][:,:,:,:]>0] = 1.01

        hn,cn = np.histogram(data[:,:,:,0], bins=100)

        d = [{'x': np.arange(100)*0.01+0.01,
              'y': np.histogram(data[:,:,:,0], bins=100)[0],
              'drawstyle': 'steps-pre',
              'label' : 'High gain',
              },
             {'x': np.arange(100)*0.01+0.01,
              'y': np.histogram(data[:,:,:,1], bins=100)[0],
              'drawstyle': 'steps-pre',
              'label' : 'Medium gain',
              },
             {'x': np.arange(100)*0.01+0.01,
              'y': np.histogram(data[:,:,:,2], bins=100)[0],
              'drawstyle': 'steps-pre',
              'label' : 'Low gain',
              },
            ]


        fig = plt.figure(figsize=(15,15), tight_layout={'pad': 0.5, 'w_pad': 0.3})

        for gain in range(3):
            ax = fig.add_subplot(221+gain)
            heatmapPlot(data[:,:,12,gain], add_panels=False, cmap='viridis', figsize=(10,10),
                y_label='Rows', x_label='Columns',
                lut_label='p-Value',
                use_axis=ax,
                title='p-Value for cell 12, {} gain'.format(gain_names[gain]) )

        ax = fig.add_subplot(224)
        _ = simplePlot(d, #aspect=1.6,
                              x_label = "p-Value".format(gain),
                              y_label="# of occurance",
                              use_axis=ax,
                               y_log=False, legend='outside-top-ncol3-frame', legend_pad=0.05, legend_size='5%')
        ax.ticklabel_format(style='sci', axis='y', scilimits=(4,6))

 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on a sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:code id: tags:

 ``` python
 cell = 12
 for cap in capacitor_settings:
    for gain in range(3):
        display(
            Markdown('### Cell-12 overview - {} gain'.format(gain_names[gain])))

        fig = plt.figure(figsize=(18, 22) , tight_layout={'pad': 0.1, 'w_pad': 0.1})
        for qm in res[cap]:
            for iconst, const in enumerate(['Offset', 'Noise', 'BadPixelsDark']):

                ax = fig.add_subplot(321+iconst)

                data = res[cap][qm][const][:, :, 12, gain]
                vmax = 1.5 * np.nanmedian(res[cap][qm][const][:, :, 12, gain])
                title = const
                label = '{} value [ADU]'.format(const)
                title = '{} value'.format(const)
                if const == 'BadPixelsDark':
                    vmax = 4
                    bpix_code = data.astype(np.float32)
                    bpix_code[bpix_code == 0] = np.nan
                    title = 'Bad pixel code'
                    label = title

                    cb_labels = ['1 {}'.format(BadPixels.NOISE_OUT_OF_THRESHOLD.name),
                                 '2 {}'.format(BadPixels.OFFSET_NOISE_EVAL_ERROR.name),
                                 '3 {}'.format(BadPixels.OFFSET_OUT_OF_THRESHOLD.name),
                                 '4 {}'.format('MIXED')]

                    heatmapPlot(bpix_code, add_panels=False, cmap='viridis',
                                y_label='Rows', x_label='Columns',
                                lut_label='', vmax=vmax,
                                use_axis=ax, cb_ticklabels=cb_labels, cb_ticks = np.arange(4)+1,
                                title='{}'.format(title))
                    del bpix_code
                else:

                    heatmapPlot(data, add_panels=False, cmap='viridis',
                                y_label='Rows', x_label='Columns',
                                lut_label=label, vmax=vmax,
                                use_axis=ax,
                                title='{}'.format(title))

        for qm in res[cap]:
            for iconst, const in enumerate(['Offset', 'Noise']):
                data = res[cap][qm][const]
                dataBP = np.copy(data)
                dataBP[res[cap][qm]['BadPixelsDark'] > 0] = -1

                x_ranges = [[0, 1500], [0, 40]]
                hn, cn = np.histogram(
                    data[:, :, :, gain], bins=100, range=x_ranges[iconst])
                hnBP, cnBP = np.histogram(dataBP[:, :, :, gain], bins=cn)

                d = [{'x': cn[:-1],
                      'y': hn,
                      'drawstyle': 'steps-pre',
                      'label': 'All data',
                      },
                     {'x': cnBP[:-1],
                      'y': hnBP,
                      'drawstyle': 'steps-pre',
                      'label': 'Bad pixels masked',
                      },
                     ]

                ax = fig.add_subplot(325+iconst)
                _ = simplePlot(d, figsize=(5, 7), aspect=1,
                                    x_label="{} value [ADU]".format(const),
                                    y_label="# of occurance",
                                    title='', legend_pad=0.1, legend_size='10%',
                                    use_axis=ax,
                                    y_log=True, legend='outside-top-2col-frame')

        plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:


 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.NOISE_OUT_OF_THRESHOLD.value: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR.value: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value | BadPixels.NOISE_OUT_OF_THRESHOLD.value: ('MIXED', '#DD00DD80')}

 if high_res_badpix_3d:
    display(Markdown("""

    ## Global Bad Pixel Behaviour ##

    The following plots shows the results of a bad pixel evaluation for all evaluated memory cells.
    Cells are stacked in the Z-dimension, while pixels values in x/y are re-binned with a factor of 2.
    This excludes single bad pixels present only in disconnected pixels.
    Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
    Colors encode the bad pixel type, or mixed type.

        """))
    # Switch rebin to 1 for full resolution and
    # no interpolation for badpixel values.
    rebin = 2
    for gain in range(3):
        display(Markdown('### Bad pixel behaviour - {} gain ###'.format(gain_names[gain])))
        for cap in capacitor_settings:
            for mod, data in badpix_g[cap].items():
                plot_badpix_3d(data[...,gain], cols, title='', rebin_fac=rebin)
                ax = plt.gca()
                leg = ax.get_legend()
                leg.set(alpha=0.5)
        plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Summary across tiles ##

 Plots give an overview of calibration constants averaged across tiles. A bad pixel mask is applied. Constants are compared with pre-existing constants retrieved from the calibration database. Differences $\Delta$ between the old and new constants is shown.

 %% Cell type:code id: tags:

 ``` python
 time_summary = []
 for cap, cap_data in old_mdata.items():
    time_summary.append(f"The following pre-existing constants are used for comparison for capacitor setting **{cap}**:")
    for qm, qm_data in cap_data.items():
        time_summary.append(f"- Module {qm}")
        for const, const_data in qm_data.items():
            time_summary.append(f"    - {const} created at {const_data['timestamp']}")
 display(Markdown("\n".join(time_summary)))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Loop over capacitor settings, modules, constants
 for cap in res:
    for qm in res[cap]:
        for gain in range(3):
            display(Markdown('### Summary across tiles - {} gain'.format(gain_names[gain])))

            for const in res[cap][qm]:
                data = np.copy(res[cap][qm][const][:, :, :, gain])

                label = 'Fraction of bad pixels'

                if const != 'BadPixelsDark':
                    data[badpix_g[cap][qm][:, :, :, gain] > 0] = np.nan
                    label = '{} value [ADU]'.format(const)
                else:
                    data[data>0] = 1.0

                data = data.reshape(
                    int(data.shape[0] / 32),
                    32,
                    int(data.shape[1] / 128),
                    128,
                    data.shape[2])
                data = np.nanmean(data, axis=(1, 3)).swapaxes(
                    0, 2).reshape(512, 16)

                fig = plt.figure(figsize=(15, 6))
                ax = fig.add_subplot(121)

                _ = heatmapPlot(data[:510, :], add_panels=True,
                                y_label='Momery Cell ID', x_label='Tile ID',
                                lut_label=label, use_axis=ax,
                                panel_y_label=label, panel_x_label=label,
                                cmap='viridis',  # cb_loc='right',cb_aspect=15,
                                x_ticklabels=np.arange(16)+1,
                                x_ticks=np.arange(16)+0.5)

                if old_const[cap][qm][const] is not None:
                    ax = fig.add_subplot(122)

                    dataold = np.copy(old_const[cap][qm][const][:, :, :, gain])

                    label = '$\Delta$ {}'.format(label)

                    if const != 'BadPixelsDark':
                        if old_const[cap][qm]['BadPixelsDark'] is not None:
                            dataold[old_const[cap][qm]['BadPixelsDark'][:, :, :, gain] > 0] = np.nan
                        else:
                            dataold[:] = np.nan
                    else:
                        dataold[dataold>0]=1.0

                    dataold = dataold.reshape(
                        int(dataold.shape[0] / 32),
                        32,
                        int(dataold.shape[1] / 128),
                        128,
                        dataold.shape[2])
                    dataold = np.nanmean(dataold, axis=(
                        1, 3)).swapaxes(0, 2).reshape(512, 16)
                    dataold = dataold - data

                    _ = heatmapPlot(dataold[:510, :], add_panels=True,
                                    y_label='Momery Cell ID', x_label='Tile ID',
                                    lut_label=label, use_axis=ax,
                                    panel_y_label=label, panel_x_label=label,
                                    cmap='viridis',  # cb_loc='right',cb_aspect=15,
                                    x_ticklabels=np.arange(16)+1,
                                    x_ticks=np.arange(16)+0.5)
            plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Variation of offset and noise across Tiles and ASICs ##

 The following plots show a standard deviation $\sigma$ of the calibration constant. The plot of standard deviations across tiles show pixels of one tile ($128 \times 32$). Value for each pixel shows a standard deviation across 16 tiles. The standard deviation across ASICs are shown overall tiles. The plot shows pixels of one ASIC ($16 \times 32$), where the value shows a standard deviation across all ACIS of the module.

 %% Cell type:code id: tags:

 ``` python
 # Loop over capacitor settings, modules, constants
 for cap in res:
    for qm in res[cap]:
        for gain in range(3):
            display(Markdown('### Variation of offset and noise across ASICs - {} gain'.format(gain_names[gain])))

            fig = plt.figure(figsize=(15, 6))
            for iconst, const in enumerate(['Offset', 'Noise']):
                data = np.copy(res[cap][qm][const][:, :, :, gain])
                data[badpix_g[cap][qm][:, :, :, gain] > 0] = np.nan
                label = '$\sigma$ {} [ADU]'.format(const)

                dataA = np.nanmean(data, axis=2)  # average over cells
                dataA = dataA.reshape(8, 32, 16, 16)
                dataA = np.nanstd(dataA, axis=(0, 2))  # average across ASICs

                ax = fig.add_subplot(121+iconst)
                _ = heatmapPlot(dataA, add_panels=True,
                                y_label='rows', x_label='columns',
                                lut_label=label, use_axis=ax,
                                panel_y_label=label, panel_x_label=label,
                                cmap='viridis'
                                )

            plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 # Loop over capacitor settings, modules, constants
 for cap in res:
    for qm in res[cap]:
        for gain in range(3):
            display(Markdown('### Variation of offset and noise across tiles - {} gain'.format(gain_names[gain])))

            fig = plt.figure(figsize=(15, 6))
            for iconst, const in enumerate(['Offset', 'Noise']):
                data = np.copy(res[cap][qm][const][:, :, :, gain])
                data[badpix_g[cap][qm][:, :, :, gain] > 0] = np.nan
                label = '$\sigma$ {} [ADU]'.format(const)

                dataT = data.reshape(
                    int(data.shape[0] / 32),
                    32,
                    int(data.shape[1] / 128),
                    128,
                    data.shape[2])
                dataT = np.nanstd(dataT, axis=(0, 2))
                dataT = np.nanmean(dataT, axis=2)

                ax = fig.add_subplot(121+iconst)
                _ = heatmapPlot(dataT, add_panels=True,
                                y_label='rows', x_label='columns',
                                lut_label=label, use_axis=ax,
                                panel_y_label=label, panel_x_label=label,
                                cmap='viridis')
            plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Aggregate values and per cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per-cell behavior, averaged across pixels.

 %% Cell type:code id: tags:

 ``` python
 # Loop over capacitor settings, modules, constants
 for cap in res:
    for qm in res[cap]:
        for gain in range(3):
            display(Markdown('### Mean over pixels - {} gain'.format(gain_names[gain])))

            fig = plt.figure(figsize=(9,11))

            for iconst, const in enumerate(res[cap][qm]):

                ax = fig.add_subplot(311+iconst)

                data = res[cap][qm][const][:,:,:510,gain]
                if const == 'BadPixelsDark':
                    data[data>0] = 1.0

                dataBP = np.copy(data)
                dataBP[badpix_g[cap][qm][:,:,:510,gain]>0] = -10

                data = np.nanmean(data, axis=(0,1))
                dataBP = np.nanmean(dataBP, axis=(0,1))

                d = [{'y': data,
                      'x': np.arange(data.shape[0]),
                      'drawstyle': 'steps-mid',
                      'label' : 'All data'
                     }
                    ]

                if const != 'BadPixelsDark':
                    d.append({'y': dataBP,
                      'x': np.arange(data.shape[0]),
                      'drawstyle': 'steps-mid',
                      'label' : 'good pixels only'
                     })
                    y_title = "{} value [ADU]".format(const)
                    title = "{} value, {} gain".format(const, gain_names[gain])
                else:
                    y_title = "Fraction of Bad Pixels"
                    title = "Fraction of Bad Pixels, {} gain".format(gain_names[gain])

                data_min = np.min([data, dataBP])if const != 'BadPixelsDark' else np.min([data])
                data_max = np.max([data[20:], dataBP[20:]])
                data_dif = data_max - data_min

                local_max = np.max([data[200:300], dataBP[200:300]])
                frac = 0.35
                new_max = (local_max - data_min*(1-frac))/frac
                new_max = np.max([data_max, new_max])

                _ = simplePlot(d, figsize=(10,10), aspect=2, xrange=(-12, 510),
                                  x_label = 'Memory Cell ID',
                                  y_label=y_title, use_axis=ax,
                                  title=title,
                                  title_position=[0.5, 1.15],
                                  inset='xy-coord-right', inset_x_range=(0,20), inset_indicated=True,
                                  inset_labeled=True, inset_coord=[0.2,0.5,0.6,0.95],
                                    inset_lw = 1.0, y_range = [data_min-data_dif*0.05, new_max+data_dif*0.05],
                                  y_log=False, legend='outside-top-ncol2-frame', legend_size='18%',
                                     legend_pad=0.00)

                plt.tight_layout(pad=1.08, h_pad=0.35)

            plt.show()
 ```

 %% Cell type:raw id: tags:

 .. raw:: latex

    \newpage

 %% Cell type:markdown id: tags:

 ## Summary tables ##

 The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.

 %% Cell type:code id: tags:

 ``` python
 table = []
 bits = [BadPixels.NOISE_OUT_OF_THRESHOLD, BadPixels.OFFSET_OUT_OF_THRESHOLD, BadPixels.OFFSET_NOISE_EVAL_ERROR]
 for cap in res:
    for qm in res[cap]:
        for gain in range(3):

            l_data = []
            l_data_old = []

            data = np.copy(res[cap][qm]['BadPixelsDark'][:,:,:,gain])
            l_data.append(len(data[data>0].flatten()))
            for bit in bits:
                l_data.append(np.count_nonzero(badpix_g[cap][qm][:,:,:,gain] & bit.value))

            if old_const[cap][qm]['BadPixelsDark'] is not None:
                old_const[cap][qm]['BadPixelsDark'] = old_const[cap][qm]['BadPixelsDark'].astype(np.uint32)
                dataold = np.copy(old_const[cap][qm]['BadPixelsDark'][:, :, :, gain])
                l_data_old.append(len(dataold[dataold>0].flatten()))
                for bit in bits:
                    l_data_old.append(np.count_nonzero(old_const[cap][qm]['BadPixelsDark'][:, :, :, gain] & bit.value))

            l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',
                           'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR']

            l_threshold = ['', f'{thresholds_noise_sigma}', f'{thresholds_offset_sigma}',
                           f'{thresholds_offset_hard}/{thresholds_noise_hard}']

            for i in range(len(l_data)):
                line = [f'{l_data_name[i]}, gain {gain_names[gain]}', l_threshold[i], l_data[i]]

                if old_const[cap][qm]['BadPixelsDark'] is not None:
                    line += [l_data_old[i]]
                else:
                    line += ['-']

                table.append(line)
            table.append(['', '', '', ''])

 display(Markdown('''

 ### Number of bad pixels ###

 One pixel can be bad for different reasons, therefore, the sum of all types of bad pixels can be more than the number of all bad pixels.

 '''))
 if len(table)>0:
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Pixel type", "Threshold",
                                              "New constant", "Old constant"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 header = ['Parameter',
          "New constant", "Old constant ",
          "New constant", "Old constant ",
          "New constant", "Old constant "]

 for const in ['Offset', 'Noise']:
    table = [['','High gain', 'High gain', 'Medium gain', 'Medium gain', 'Low gain', 'Low gain']]
    for cap in res:
        for qm in res[cap]:

            data = np.copy(res[cap][qm][const])
            data[res[cap][qm]['BadPixelsDark']>0] = np.nan

            if old_const[cap][qm][const] is not None and old_const[cap][qm]['BadPixelsDark'] is not None :
                dataold = np.copy(old_const[cap][qm][const])
                dataold[old_const[cap][qm]['BadPixelsDark']>0] = np.nan

            f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
            n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']

            for i, f in enumerate(f_list):
                line = [n_list[i]]
                for gain in range(3):
                    line.append('{:6.1f}'.format(f(data[...,gain])))
                    if old_const[cap][qm][const] is not None and old_const[cap][qm]['BadPixelsDark'] is not None:
                        line.append('{:6.1f}'.format(f(dataold[...,gain])))
                    else:
                        line.append('-')

                table.append(line)

    display(Markdown('### {} [ADU], good pixels only ###'.format(const)))
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
 ```

--- a/notebooks/LPD/LPD_Correct_Fast.ipynb
+++ b/notebooks/LPD/LPD_Correct_Fast.ipynb
 %% Cell type:markdown id: tags:

 # LPD Offline Correction #

 Author: European XFEL Data Analysis Group

 %% Cell type:code id: tags:

 ``` python
 # Input parameters
 in_folder = "/gpfs/exfel/exp/FXE/202201/p003073/raw/"  # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/schmidtp/random/LPD_test"  # the folder to output to, required
 metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate.
 sequences = [-1]  # Sequences to correct, use [-1] for all
 modules = [-1]  # Modules indices to correct, use [-1] for all, only used when karabo_da is empty
 karabo_da = ['']  # Data aggregators names to correct, use [''] for all
 run = 10  # run to process, required

 # Source parameters
 karabo_id = 'FXE_DET_LPD1M-1'  # Karabo domain for detector.
 input_source = '{karabo_id}/DET/{module_index}CH0:xtdf'  # Input fast data source.
 output_source = ''  # Output fast data source, empty to use same as input.

 # CalCat parameters
 creation_time = ""  # The timestamp to use with Calibration DB. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41
 cal_db_interface = ''  # Not needed, compatibility with current webservice.
 cal_db_timeout = 0  # Not needed, compatbility with current webservice.
 cal_db_root = '/gpfs/exfel/d/cal/caldb_store'

 # Operating conditions
 mem_cells = 512  # Memory cells, LPD constants are always taken with 512 cells.
 bias_voltage = 250.0  # Detector bias voltage.
 capacitor = '5pF'  # Capacitor setting: 5pF or 50pF
 photon_energy = 9.2  # Photon energy in keV.
 category = 0  # Whom to blame.
+use_cell_order = False  # Whether to use memory cell order as a detector condition (not stored for older constants)

 # Correction parameters
 offset_corr = True  # Offset correction.
 rel_gain = True  # Gain correction based on RelativeGain constant.
 ff_map = True  # Gain correction based on FFMap constant.
 gain_amp_map = True  # Gain correction based on GainAmpMap constant.

 # Output options
 overwrite = True  # set to True if existing data should be overwritten
 chunks_data = 1  # HDF chunk size for pixel data in number of frames.
 chunks_ids = 32  # HDF chunk size for cellId and pulseId datasets.
 create_virtual_cxi_in = ''  # Folder to create virtual CXI files in (for each sequence).

 # Parallelization options
 sequences_per_node = 1  # Sequence files to process per node
 max_nodes = 8  # Maximum number of SLURM jobs to split correction work into
 num_workers = 8  # Worker processes per node, 8 is safe on 768G nodes but won't work on 512G.
 num_threads_per_worker = 32  # Number of threads per worker.

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
 ```

 %% Cell type:code id: tags:

 ``` python
 from collections import OrderedDict
 from pathlib import Path
 from time import perf_counter
 import gc
 import re
 import warnings

 import numpy as np
 import h5py

 import matplotlib
 matplotlib.use('agg')
 import matplotlib.pyplot as plt
 %matplotlib inline

 from calibration_client import CalibrationClient
 from calibration_client.modules import CalibrationConstantVersion
 import extra_data as xd
 import extra_geom as xg
 import pasha as psh

 from extra_data.components import LPD1M

 from cal_tools.lpdalgs import correct_lpd_frames
+from cal_tools.lpdlib import get_mem_cell_order
 from cal_tools.tools import CalibrationMetadata, calcat_creation_time
 from cal_tools.files import DataFile
 from cal_tools.restful_config import restful_config
 ```

 %% Cell type:markdown id: tags:

 # Prepare environment

 %% Cell type:code id: tags:

 ``` python
 file_re = re.compile(r'^RAW-R(\d{4})-(\w+\d+)-S(\d{5})$')  # This should probably move to cal_tools

 run_folder = Path(in_folder) / f'r{run:04d}'
 out_folder = Path(out_folder)
 out_folder.mkdir(exist_ok=True)

 output_source = output_source or input_source

 cal_db_root = Path(cal_db_root)

 metadata = CalibrationMetadata(metadata_folder or out_folder)

 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f'Using {creation_time.isoformat()} as creation time')

 # Pick all modules/aggregators or those selected.
 if not karabo_da or karabo_da == ['']:
    if not modules or modules == [-1]:
        modules = list(range(16))

    karabo_da = [f'LPD{i:02d}' for i in modules]

 # Pick all sequences or those selected.
 if not sequences or sequences == [-1]:
    do_sequence = lambda seq: True
 else:
    do_sequence = [int(x) for x in sequences].__contains__

 # List of detector sources.
 det_inp_sources = [input_source.format(karabo_id=karabo_id, module_index=int(da[-2:])) for da in karabo_da]
 ```

 %% Cell type:markdown id: tags:

 # Select data to process

 %% Cell type:code id: tags:

 ``` python
 data_to_process = []

 for inp_path in run_folder.glob('RAW-*.h5'):
    match = file_re.match(inp_path.stem)

    if match[2] not in karabo_da or not do_sequence(int(match[3])):
        continue

    outp_path = out_folder / 'CORR-R{run:04d}-{aggregator}-S{seq:05d}.h5'.format(
        run=int(match[1]), aggregator=match[2], seq=int(match[3]))

    data_to_process.append((match[2], inp_path, outp_path))

 print('Files to process:')
 for data_descr in sorted(data_to_process, key=lambda x: f'{x[0]}{x[1]}'):
    print(f'{data_descr[0]}\t{data_descr[1]}')
 ```

 %% Cell type:markdown id: tags:

 # Obtain and prepare calibration constants

 %% Cell type:code id: tags:

 ``` python
 # Connect to CalCat.
 calcat_config = restful_config['calcat']
 client = CalibrationClient(
    base_api_url=calcat_config['base-api-url'],
    use_oauth2=calcat_config['use-oauth2'],
    client_id=calcat_config['user-id'],
    client_secret=calcat_config['user-secret'],
    user_email=calcat_config['user-email'],
    token_url=calcat_config['token-url'],
    refresh_url=calcat_config['refresh-url'],
    auth_url=calcat_config['auth-url'],
    scope='')
 ```

 %% Cell type:code id: tags:

 ``` python
 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # Constant paths & timestamps are saved under retrieved-constants in calibration_metadata.yml
 const_yaml = metadata.setdefault("retrieved-constants", {})
 ```

 %% Cell type:code id: tags:

 ``` python
 const_data = {}
 const_load_mp = psh.ProcessContext(num_workers=24)

 if const_yaml:  # Read constants from YAML file.
    start = perf_counter()
    for da, ccvs in const_yaml.items():

        for calibration_name, ccv in ccvs['constants'].items():
+            if ccv['file-path'] is None:
+                warnings.warn(f"Missing {calibration_name} for {da}")
+                continue

            dtype = np.uint32 if calibration_name.startswith('BadPixels') else np.float32

            const_data[(da, calibration_name)] = dict(
                path=Path(ccv['file-path']),
                dataset=ccv['dataset-name'],
                data=const_load_mp.alloc(shape=(256, 256, mem_cells, 3), dtype=dtype)
            )
 else:  # Retrieve constants from CALCAT.
    dark_calibrations = {
        1: 'Offset',  # np.float32
        14: 'BadPixelsDark'  # should be np.uint32, but is np.float64
    }

-    dark_condition = [
+    base_condition = [
        dict(parameter_id=1, value=bias_voltage),  # Sensor bias voltage
        dict(parameter_id=7, value=mem_cells),  # Memory cells
        dict(parameter_id=15, value=capacitor),  # Feedback capacitor
        dict(parameter_id=13, value=256),  # Pixels X
        dict(parameter_id=14, value=256),  # Pixels Y
    ]
+    if use_cell_order:
+        # Read the order of memory cells used
+        raw_data = xd.DataCollection.from_paths([e[1] for e in data_to_process])
+        cell_ids_pattern_s = get_mem_cell_order(raw_data, det_inp_sources)
+        print("Memory cells order:", cell_ids_pattern_s)
+
+        dark_condition = base_condition + [
+            dict(parameter_id=30, value=cell_ids_pattern_s),  # Memory cell order
+        ]
+    else:
+        dark_condition = base_condition.copy()

    illuminated_calibrations = {
        20: 'BadPixelsFF',  # np.uint32
        42: 'GainAmpMap',  # np.float32
        43: 'FFMap',  # np.float32
        44: 'RelativeGain'  # np.float32
    }

-    illuminated_condition = dark_condition.copy()
-    illuminated_condition += [
+    illuminated_condition = base_condition + [
        dict(parameter_id=3, value=photon_energy),  # Source energy
        dict(parameter_id=25, value=category)  # category
    ]

    print('Querying calibration database', end='', flush=True)
    start = perf_counter()
    for calibrations, condition in [
        (dark_calibrations, dark_condition),
        (illuminated_calibrations, illuminated_condition)
    ]:
        resp = CalibrationConstantVersion.get_closest_by_time_by_detector_conditions(
            client, karabo_id, list(calibrations.keys()),
            {'parameters_conditions_attributes': condition},
            karabo_da='', event_at=creation_time.isoformat()
        )

        if not resp['success']:
            raise RuntimeError(resp)

        for ccv in resp['data']:
            cc = ccv['calibration_constant']
            da = ccv['physical_detector_unit']['karabo_da']
            calibration_name = calibrations[cc['calibration_id']]

            dtype = np.uint32 if calibration_name.startswith('BadPixels') else np.float32

            const_data[(da, calibration_name)] = dict(
                path=Path(ccv['path_to_file']) / ccv['file_name'],
                dataset=ccv['data_set_name'],
                data=const_load_mp.alloc(shape=(256, 256, mem_cells, 3), dtype=dtype)
            )
        print('.', end='', flush=True)

 total_time = perf_counter() - start
 print(f'{total_time:.1f}s')
 ```

 %% Cell type:code id: tags:

 ``` python
 def load_constant_dataset(wid, index, const_descr):
    ccv_entry = const_data[const_descr]

    with h5py.File(cal_db_root / ccv_entry['path'], 'r') as fp:
        fp[ccv_entry['dataset'] + '/data'].read_direct(ccv_entry['data'])

    print('.', end='', flush=True)

 print('Loading calibration data', end='', flush=True)
 start = perf_counter()
 const_load_mp.map(load_constant_dataset, list(const_data.keys()))
 total_time = perf_counter() - start

 print(f'{total_time:.1f}s')
 ```

 %% Cell type:code id: tags:

 ``` python
 # These are intended in order cell, X, Y, gain
 ccv_offsets = {}
 ccv_gains = {}
 ccv_masks = {}

 ccv_shape = (mem_cells, 256, 256, 3)

 constant_order = {
    'Offset':        (2, 1, 0, 3),
    'BadPixelsDark': (2, 1, 0, 3),
    'RelativeGain':  (2, 1, 0, 3),
    'FFMap':         (2, 0, 1, 3),
    'BadPixelsFF':   (2, 0, 1, 3),
    'GainAmpMap':    (2, 0, 1, 3),
 }

 def prepare_constants(wid, index, aggregator):
    consts = {calibration_name: entry['data']
              for (aggregator_, calibration_name), entry
              in const_data.items()
              if aggregator == aggregator_}

    def _prepare_data(calibration_name, dtype):
        return consts[calibration_name] \
            .transpose(constant_order[calibration_name]) \
            .astype(dtype, copy=True)  # Make sure array is contiguous.

    if offset_corr and 'Offset' in consts:
        ccv_offsets[aggregator] = _prepare_data('Offset', np.float32)
    else:
        ccv_offsets[aggregator] = np.zeros(ccv_shape, dtype=np.float32)

    ccv_gains[aggregator] = np.ones(ccv_shape, dtype=np.float32)

    if 'BadPixelsDark' in consts:
        ccv_masks[aggregator] = _prepare_data('BadPixelsDark', np.uint32)
    else:
        ccv_masks[aggregator] = np.zeros(ccv_shape, dtype=np.uint32)

    if rel_gain and 'RelativeGain' in consts:
        ccv_gains[aggregator] *= _prepare_data('RelativeGain', np.float32)

    if ff_map and 'FFMap' in consts:
        ccv_gains[aggregator] *= _prepare_data('FFMap', np.float32)

        if 'BadPixelsFF' in consts:
            np.bitwise_or(ccv_masks[aggregator], _prepare_data('BadPixelsFF', np.uint32),
                          out=ccv_masks[aggregator])

    if gain_amp_map and 'GainAmpMap' in consts:
        ccv_gains[aggregator] *= _prepare_data('GainAmpMap', np.float32)

    print('.', end='', flush=True)


 print('Preparing constants', end='', flush=True)
 start = perf_counter()
 psh.ThreadContext(num_workers=len(karabo_da)).map(prepare_constants, karabo_da)
 total_time = perf_counter() - start
 print(f'{total_time:.1f}s')

 const_data.clear()  # Clear raw constants data now to save memory.
 gc.collect();
 ```

 %% Cell type:code id: tags:

 ``` python
 def correct_file(wid, index, work):
    aggregator, inp_path, outp_path = work
    module_index = int(aggregator[-2:])

    start = perf_counter()
    dc = xd.H5File(inp_path, inc_suspect_trains=False).select('*', 'image.*', require_all=True)
    inp_source = dc[input_source.format(karabo_id=karabo_id, module_index=module_index)]
    open_time = perf_counter() - start

    # Load raw data for this file.
    # Reshaping gets rid of the extra 1-len dimensions without
    # mangling the frame axis for an actual frame count of 1.
    start = perf_counter()
    in_raw = inp_source['image.data'].ndarray().reshape(-1, 256, 256)
    in_cell = inp_source['image.cellId'].ndarray().reshape(-1)
    in_pulse = inp_source['image.pulseId'].ndarray().reshape(-1)
    read_time = perf_counter() - start

    # Allocate output arrays.
    out_data = np.zeros((in_raw.shape[0], 256, 256), dtype=np.float32)
    out_gain = np.zeros((in_raw.shape[0], 256, 256), dtype=np.uint8)
    out_mask = np.zeros((in_raw.shape[0], 256, 256), dtype=np.uint32)

    start = perf_counter()
    correct_lpd_frames(in_raw, in_cell,
                       out_data, out_gain, out_mask,
                       ccv_offsets[aggregator], ccv_gains[aggregator], ccv_masks[aggregator],
                       num_threads=num_threads_per_worker)
    correct_time = perf_counter() - start

    image_counts = inp_source['image.data'].data_counts(labelled=False)

    start = perf_counter()
    if (not outp_path.exists() or overwrite) and image_counts.sum() > 0:
        outp_source_name = output_source.format(karabo_id=karabo_id, module_index=module_index)

        with DataFile(outp_path, 'w') as outp_file:
            outp_file.create_index(dc.train_ids, from_file=dc.files[0])
            outp_file.create_metadata(like=dc, instrument_channels=(f'{outp_source_name}/image',))

            outp_source = outp_file.create_instrument_source(outp_source_name)

            outp_source.create_index(image=image_counts)
            outp_source.create_key('image.cellId', data=in_cell,
                                   chunks=(min(chunks_ids, in_cell.shape[0]),))
            outp_source.create_key('image.pulseId', data=in_pulse,
                                   chunks=(min(chunks_ids, in_pulse.shape[0]),))
            outp_source.create_key('image.data', data=out_data,
                                   chunks=(min(chunks_data, out_data.shape[0]), 256, 256))
            outp_source.create_compressed_key('image.gain', data=out_gain)
            outp_source.create_compressed_key('image.mask', data=out_mask)
    write_time = perf_counter() - start

    total_time = open_time + read_time + correct_time + write_time
    frame_rate = in_raw.shape[0] / total_time

    print('{}\t{}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{}\t{:.1f}'.format(
        wid, aggregator, open_time, read_time, correct_time, write_time, total_time,
        in_raw.shape[0], frame_rate))

    in_raw = None
    in_cell = None
    in_pulse = None
    out_data = None
    out_gain = None
    out_mask = None
    gc.collect()

 print('worker\tDA\topen\tread\tcorrect\twrite\ttotal\tframes\trate')
 start = perf_counter()
 psh.ProcessContext(num_workers=num_workers).map(correct_file, data_to_process)
 total_time = perf_counter() - start
 print(f'Total time: {total_time:.1f}s')
 ```

 %% Cell type:markdown id: tags:

 # Data preview for first train

 %% Cell type:code id: tags:

 ``` python
 geom = xg.LPD_1MGeometry.from_quad_positions(
    [(11.4, 299), (-11.5, 8), (254.5, -16), (278.5, 275)])

 output_paths = [outp_path for _, _, outp_path in data_to_process if outp_path.exists()]
 dc = xd.DataCollection.from_paths(output_paths).select_trains(np.s_[0])

 det = LPD1M(dc, detector_name=karabo_id)
 data = det.get_array('image.data')
 ```

 %% Cell type:markdown id: tags:

 ### Intensity histogram across all cells

 %% Cell type:code id: tags:

 ``` python
 left_edge_ratio = 0.01
 right_edge_ratio = 0.99

 fig, ax = plt.subplots(num=1, clear=True, figsize=(15, 6))
 values, bins, _ = ax.hist(np.ravel(data.data), bins=2000, range=(-1500, 2000))

 def find_nearest_index(array, value):
    return (np.abs(array - value)).argmin()

 cum_values = np.cumsum(values)
 vmin = bins[find_nearest_index(cum_values, cum_values[-1]*left_edge_ratio)]
 vmax = bins[find_nearest_index(cum_values, cum_values[-1]*right_edge_ratio)]

 max_value = values.max()
 ax.vlines([vmin, vmax], 0, max_value, color='red', linewidth=5, alpha=0.2)
 ax.text(vmin, max_value, f'{left_edge_ratio*100:.0f}%',
        color='red', ha='center', va='bottom', size='large')
 ax.text(vmax, max_value, f'{right_edge_ratio*100:.0f}%',
        color='red', ha='center', va='bottom', size='large')
 ax.text(vmax+(vmax-vmin)*0.01, max_value/2, 'Colormap interval',
        color='red', rotation=90, ha='left', va='center', size='x-large')

 ax.set_xlim(vmin-(vmax-vmin)*0.1, vmax+(vmax-vmin)*0.1)
 ax.set_ylim(0, max_value*1.1)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### First memory cell

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(num=2, figsize=(15, 15), clear=True, nrows=1, ncols=1)
 geom.plot_data_fast(data[:, 0, 0], ax=ax, vmin=vmin, vmax=vmax)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Train average

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(num=3, figsize=(15, 15), clear=True, nrows=1, ncols=1)
 geom.plot_data_fast(data[:, 0].mean(axis=1), ax=ax, vmin=vmin, vmax=vmax)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Lowest gain stage per pixel

 %% Cell type:code id: tags:

 ``` python
 highest_gain_stage = det.get_array('image.gain', pulses=np.s_[:]).max(axis=(1, 2))

 fig, ax = plt.subplots(num=4, figsize=(15, 15), clear=True, nrows=1, ncols=1)
 p = geom.plot_data_fast(highest_gain_stage, ax=ax, vmin=0, vmax=2);

 cb = ax.images[0].colorbar
 cb.set_ticks([0, 1, 2])
 cb.set_ticklabels(['High gain', 'Medium gain', 'Low gain'])
 ```

 %% Cell type:markdown id: tags:

 ### Create virtual CXI file

 %% Cell type:code id: tags:

 ``` python
 if create_virtual_cxi_in:
    vcxi_folder = Path(create_virtual_cxi_in.format(
        run=run, proposal_folder=str(Path(in_folder).parent)))
    vcxi_folder.mkdir(parents=True, exist_ok=True)

    def sort_files_by_seq(by_seq, outp_path):
        by_seq.setdefault(int(outp_path.stem[-5:]), []).append(outp_path)
        return by_seq

    from functools import reduce
    reduce(sort_files_by_seq, output_paths, output_by_seq := {})

    for seq_number, seq_output_paths in output_by_seq.items():
        # Create data collection and detector components only for this sequence.
        try:
            det = LPD1M(xd.DataCollection.from_paths(seq_output_paths), detector_name=karabo_id, min_modules=4)
        except ValueError:  # Couldn't find enough data for min_modules
            continue
        det.write_virtual_cxi(vcxi_folder / f'VCXI-LPD-R{run:04d}-S{seq_number:05d}.cxi')
 ```

 %% Cell type:markdown id: tags:

 # LPD Offline Correction #

 Author: European XFEL Data Analysis Group

 %% Cell type:code id: tags:

 ``` python
 # Input parameters
 in_folder = "/gpfs/exfel/exp/FXE/202201/p003073/raw/"  # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/schmidtp/random/LPD_test"  # the folder to output to, required
 metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate.
 sequences = [-1]  # Sequences to correct, use [-1] for all
 modules = [-1]  # Modules indices to correct, use [-1] for all, only used when karabo_da is empty
 karabo_da = ['']  # Data aggregators names to correct, use [''] for all
 run = 10  # run to process, required

 # Source parameters
 karabo_id = 'FXE_DET_LPD1M-1'  # Karabo domain for detector.
 input_source = '{karabo_id}/DET/{module_index}CH0:xtdf'  # Input fast data source.
 output_source = ''  # Output fast data source, empty to use same as input.

 # CalCat parameters
 creation_time = ""  # The timestamp to use with Calibration DB. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41
 cal_db_interface = ''  # Not needed, compatibility with current webservice.
 cal_db_timeout = 0  # Not needed, compatbility with current webservice.
 cal_db_root = '/gpfs/exfel/d/cal/caldb_store'

 # Operating conditions
 mem_cells = 512  # Memory cells, LPD constants are always taken with 512 cells.
 bias_voltage = 250.0  # Detector bias voltage.
 capacitor = '5pF'  # Capacitor setting: 5pF or 50pF
 photon_energy = 9.2  # Photon energy in keV.
 category = 0  # Whom to blame.
+use_cell_order = False  # Whether to use memory cell order as a detector condition (not stored for older constants)

 # Correction parameters
 offset_corr = True  # Offset correction.
 rel_gain = True  # Gain correction based on RelativeGain constant.
 ff_map = True  # Gain correction based on FFMap constant.
 gain_amp_map = True  # Gain correction based on GainAmpMap constant.

 # Output options
 overwrite = True  # set to True if existing data should be overwritten
 chunks_data = 1  # HDF chunk size for pixel data in number of frames.
 chunks_ids = 32  # HDF chunk size for cellId and pulseId datasets.
 create_virtual_cxi_in = ''  # Folder to create virtual CXI files in (for each sequence).

 # Parallelization options
 sequences_per_node = 1  # Sequence files to process per node
 max_nodes = 8  # Maximum number of SLURM jobs to split correction work into
 num_workers = 8  # Worker processes per node, 8 is safe on 768G nodes but won't work on 512G.
 num_threads_per_worker = 32  # Number of threads per worker.

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
 ```

 %% Cell type:code id: tags:

 ``` python
 from collections import OrderedDict
 from pathlib import Path
 from time import perf_counter
 import gc
 import re
 import warnings

 import numpy as np
 import h5py

 import matplotlib
 matplotlib.use('agg')
 import matplotlib.pyplot as plt
 %matplotlib inline

 from calibration_client import CalibrationClient
 from calibration_client.modules import CalibrationConstantVersion
 import extra_data as xd
 import extra_geom as xg
 import pasha as psh

 from extra_data.components import LPD1M

 from cal_tools.lpdalgs import correct_lpd_frames
+from cal_tools.lpdlib import get_mem_cell_order
 from cal_tools.tools import CalibrationMetadata, calcat_creation_time
 from cal_tools.files import DataFile
 from cal_tools.restful_config import restful_config
 ```

 %% Cell type:markdown id: tags:

 # Prepare environment

 %% Cell type:code id: tags:

 ``` python
 file_re = re.compile(r'^RAW-R(\d{4})-(\w+\d+)-S(\d{5})$')  # This should probably move to cal_tools

 run_folder = Path(in_folder) / f'r{run:04d}'
 out_folder = Path(out_folder)
 out_folder.mkdir(exist_ok=True)

 output_source = output_source or input_source

 cal_db_root = Path(cal_db_root)

 metadata = CalibrationMetadata(metadata_folder or out_folder)

 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f'Using {creation_time.isoformat()} as creation time')

 # Pick all modules/aggregators or those selected.
 if not karabo_da or karabo_da == ['']:
    if not modules or modules == [-1]:
        modules = list(range(16))

    karabo_da = [f'LPD{i:02d}' for i in modules]

 # Pick all sequences or those selected.
 if not sequences or sequences == [-1]:
    do_sequence = lambda seq: True
 else:
    do_sequence = [int(x) for x in sequences].__contains__

 # List of detector sources.
 det_inp_sources = [input_source.format(karabo_id=karabo_id, module_index=int(da[-2:])) for da in karabo_da]
 ```

 %% Cell type:markdown id: tags:

 # Select data to process

 %% Cell type:code id: tags:

 ``` python
 data_to_process = []

 for inp_path in run_folder.glob('RAW-*.h5'):
    match = file_re.match(inp_path.stem)

    if match[2] not in karabo_da or not do_sequence(int(match[3])):
        continue

    outp_path = out_folder / 'CORR-R{run:04d}-{aggregator}-S{seq:05d}.h5'.format(
        run=int(match[1]), aggregator=match[2], seq=int(match[3]))

    data_to_process.append((match[2], inp_path, outp_path))

 print('Files to process:')
 for data_descr in sorted(data_to_process, key=lambda x: f'{x[0]}{x[1]}'):
    print(f'{data_descr[0]}\t{data_descr[1]}')
 ```

 %% Cell type:markdown id: tags:

 # Obtain and prepare calibration constants

 %% Cell type:code id: tags:

 ``` python
 # Connect to CalCat.
 calcat_config = restful_config['calcat']
 client = CalibrationClient(
    base_api_url=calcat_config['base-api-url'],
    use_oauth2=calcat_config['use-oauth2'],
    client_id=calcat_config['user-id'],
    client_secret=calcat_config['user-secret'],
    user_email=calcat_config['user-email'],
    token_url=calcat_config['token-url'],
    refresh_url=calcat_config['refresh-url'],
    auth_url=calcat_config['auth-url'],
    scope='')
 ```

 %% Cell type:code id: tags:

 ``` python
 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # Constant paths & timestamps are saved under retrieved-constants in calibration_metadata.yml
 const_yaml = metadata.setdefault("retrieved-constants", {})
 ```

 %% Cell type:code id: tags:

 ``` python
 const_data = {}
 const_load_mp = psh.ProcessContext(num_workers=24)

 if const_yaml:  # Read constants from YAML file.
    start = perf_counter()
    for da, ccvs in const_yaml.items():

        for calibration_name, ccv in ccvs['constants'].items():
+            if ccv['file-path'] is None:
+                warnings.warn(f"Missing {calibration_name} for {da}")
+                continue

            dtype = np.uint32 if calibration_name.startswith('BadPixels') else np.float32

            const_data[(da, calibration_name)] = dict(
                path=Path(ccv['file-path']),
                dataset=ccv['dataset-name'],
                data=const_load_mp.alloc(shape=(256, 256, mem_cells, 3), dtype=dtype)
            )
 else:  # Retrieve constants from CALCAT.
    dark_calibrations = {
        1: 'Offset',  # np.float32
        14: 'BadPixelsDark'  # should be np.uint32, but is np.float64
    }

-    dark_condition = [
+    base_condition = [
        dict(parameter_id=1, value=bias_voltage),  # Sensor bias voltage
        dict(parameter_id=7, value=mem_cells),  # Memory cells
        dict(parameter_id=15, value=capacitor),  # Feedback capacitor
        dict(parameter_id=13, value=256),  # Pixels X
        dict(parameter_id=14, value=256),  # Pixels Y
    ]
+    if use_cell_order:
+        # Read the order of memory cells used
+        raw_data = xd.DataCollection.from_paths([e[1] for e in data_to_process])
+        cell_ids_pattern_s = get_mem_cell_order(raw_data, det_inp_sources)
+        print("Memory cells order:", cell_ids_pattern_s)
+
+        dark_condition = base_condition + [
+            dict(parameter_id=30, value=cell_ids_pattern_s),  # Memory cell order
+        ]
+    else:
+        dark_condition = base_condition.copy()

    illuminated_calibrations = {
        20: 'BadPixelsFF',  # np.uint32
        42: 'GainAmpMap',  # np.float32
        43: 'FFMap',  # np.float32
        44: 'RelativeGain'  # np.float32
    }

-    illuminated_condition = dark_condition.copy()
-    illuminated_condition += [
+    illuminated_condition = base_condition + [
        dict(parameter_id=3, value=photon_energy),  # Source energy
        dict(parameter_id=25, value=category)  # category
    ]

    print('Querying calibration database', end='', flush=True)
    start = perf_counter()
    for calibrations, condition in [
        (dark_calibrations, dark_condition),
        (illuminated_calibrations, illuminated_condition)
    ]:
        resp = CalibrationConstantVersion.get_closest_by_time_by_detector_conditions(
            client, karabo_id, list(calibrations.keys()),
            {'parameters_conditions_attributes': condition},
            karabo_da='', event_at=creation_time.isoformat()
        )

        if not resp['success']:
            raise RuntimeError(resp)

        for ccv in resp['data']:
            cc = ccv['calibration_constant']
            da = ccv['physical_detector_unit']['karabo_da']
            calibration_name = calibrations[cc['calibration_id']]

            dtype = np.uint32 if calibration_name.startswith('BadPixels') else np.float32

            const_data[(da, calibration_name)] = dict(
                path=Path(ccv['path_to_file']) / ccv['file_name'],
                dataset=ccv['data_set_name'],
                data=const_load_mp.alloc(shape=(256, 256, mem_cells, 3), dtype=dtype)
            )
        print('.', end='', flush=True)

 total_time = perf_counter() - start
 print(f'{total_time:.1f}s')
 ```

 %% Cell type:code id: tags:

 ``` python
 def load_constant_dataset(wid, index, const_descr):
    ccv_entry = const_data[const_descr]

    with h5py.File(cal_db_root / ccv_entry['path'], 'r') as fp:
        fp[ccv_entry['dataset'] + '/data'].read_direct(ccv_entry['data'])

    print('.', end='', flush=True)

 print('Loading calibration data', end='', flush=True)
 start = perf_counter()
 const_load_mp.map(load_constant_dataset, list(const_data.keys()))
 total_time = perf_counter() - start

 print(f'{total_time:.1f}s')
 ```

 %% Cell type:code id: tags:

 ``` python
 # These are intended in order cell, X, Y, gain
 ccv_offsets = {}
 ccv_gains = {}
 ccv_masks = {}

 ccv_shape = (mem_cells, 256, 256, 3)

 constant_order = {
    'Offset':        (2, 1, 0, 3),
    'BadPixelsDark': (2, 1, 0, 3),
    'RelativeGain':  (2, 1, 0, 3),
    'FFMap':         (2, 0, 1, 3),
    'BadPixelsFF':   (2, 0, 1, 3),
    'GainAmpMap':    (2, 0, 1, 3),
 }

 def prepare_constants(wid, index, aggregator):
    consts = {calibration_name: entry['data']
              for (aggregator_, calibration_name), entry
              in const_data.items()
              if aggregator == aggregator_}

    def _prepare_data(calibration_name, dtype):
        return consts[calibration_name] \
            .transpose(constant_order[calibration_name]) \
            .astype(dtype, copy=True)  # Make sure array is contiguous.

    if offset_corr and 'Offset' in consts:
        ccv_offsets[aggregator] = _prepare_data('Offset', np.float32)
    else:
        ccv_offsets[aggregator] = np.zeros(ccv_shape, dtype=np.float32)

    ccv_gains[aggregator] = np.ones(ccv_shape, dtype=np.float32)

    if 'BadPixelsDark' in consts:
        ccv_masks[aggregator] = _prepare_data('BadPixelsDark', np.uint32)
    else:
        ccv_masks[aggregator] = np.zeros(ccv_shape, dtype=np.uint32)

    if rel_gain and 'RelativeGain' in consts:
        ccv_gains[aggregator] *= _prepare_data('RelativeGain', np.float32)

    if ff_map and 'FFMap' in consts:
        ccv_gains[aggregator] *= _prepare_data('FFMap', np.float32)

        if 'BadPixelsFF' in consts:
            np.bitwise_or(ccv_masks[aggregator], _prepare_data('BadPixelsFF', np.uint32),
                          out=ccv_masks[aggregator])

    if gain_amp_map and 'GainAmpMap' in consts:
        ccv_gains[aggregator] *= _prepare_data('GainAmpMap', np.float32)

    print('.', end='', flush=True)


 print('Preparing constants', end='', flush=True)
 start = perf_counter()
 psh.ThreadContext(num_workers=len(karabo_da)).map(prepare_constants, karabo_da)
 total_time = perf_counter() - start
 print(f'{total_time:.1f}s')

 const_data.clear()  # Clear raw constants data now to save memory.
 gc.collect();
 ```

 %% Cell type:code id: tags:

 ``` python
 def correct_file(wid, index, work):
    aggregator, inp_path, outp_path = work
    module_index = int(aggregator[-2:])

    start = perf_counter()
    dc = xd.H5File(inp_path, inc_suspect_trains=False).select('*', 'image.*', require_all=True)
    inp_source = dc[input_source.format(karabo_id=karabo_id, module_index=module_index)]
    open_time = perf_counter() - start

    # Load raw data for this file.
    # Reshaping gets rid of the extra 1-len dimensions without
    # mangling the frame axis for an actual frame count of 1.
    start = perf_counter()
    in_raw = inp_source['image.data'].ndarray().reshape(-1, 256, 256)
    in_cell = inp_source['image.cellId'].ndarray().reshape(-1)
    in_pulse = inp_source['image.pulseId'].ndarray().reshape(-1)
    read_time = perf_counter() - start

    # Allocate output arrays.
    out_data = np.zeros((in_raw.shape[0], 256, 256), dtype=np.float32)
    out_gain = np.zeros((in_raw.shape[0], 256, 256), dtype=np.uint8)
    out_mask = np.zeros((in_raw.shape[0], 256, 256), dtype=np.uint32)

    start = perf_counter()
    correct_lpd_frames(in_raw, in_cell,
                       out_data, out_gain, out_mask,
                       ccv_offsets[aggregator], ccv_gains[aggregator], ccv_masks[aggregator],
                       num_threads=num_threads_per_worker)
    correct_time = perf_counter() - start

    image_counts = inp_source['image.data'].data_counts(labelled=False)

    start = perf_counter()
    if (not outp_path.exists() or overwrite) and image_counts.sum() > 0:
        outp_source_name = output_source.format(karabo_id=karabo_id, module_index=module_index)

        with DataFile(outp_path, 'w') as outp_file:
            outp_file.create_index(dc.train_ids, from_file=dc.files[0])
            outp_file.create_metadata(like=dc, instrument_channels=(f'{outp_source_name}/image',))

            outp_source = outp_file.create_instrument_source(outp_source_name)

            outp_source.create_index(image=image_counts)
            outp_source.create_key('image.cellId', data=in_cell,
                                   chunks=(min(chunks_ids, in_cell.shape[0]),))
            outp_source.create_key('image.pulseId', data=in_pulse,
                                   chunks=(min(chunks_ids, in_pulse.shape[0]),))
            outp_source.create_key('image.data', data=out_data,
                                   chunks=(min(chunks_data, out_data.shape[0]), 256, 256))
            outp_source.create_compressed_key('image.gain', data=out_gain)
            outp_source.create_compressed_key('image.mask', data=out_mask)
    write_time = perf_counter() - start

    total_time = open_time + read_time + correct_time + write_time
    frame_rate = in_raw.shape[0] / total_time

    print('{}\t{}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{}\t{:.1f}'.format(
        wid, aggregator, open_time, read_time, correct_time, write_time, total_time,
        in_raw.shape[0], frame_rate))

    in_raw = None
    in_cell = None
    in_pulse = None
    out_data = None
    out_gain = None
    out_mask = None
    gc.collect()

 print('worker\tDA\topen\tread\tcorrect\twrite\ttotal\tframes\trate')
 start = perf_counter()
 psh.ProcessContext(num_workers=num_workers).map(correct_file, data_to_process)
 total_time = perf_counter() - start
 print(f'Total time: {total_time:.1f}s')
 ```

 %% Cell type:markdown id: tags:

 # Data preview for first train

 %% Cell type:code id: tags:

 ``` python
 geom = xg.LPD_1MGeometry.from_quad_positions(
    [(11.4, 299), (-11.5, 8), (254.5, -16), (278.5, 275)])

 output_paths = [outp_path for _, _, outp_path in data_to_process if outp_path.exists()]
 dc = xd.DataCollection.from_paths(output_paths).select_trains(np.s_[0])

 det = LPD1M(dc, detector_name=karabo_id)
 data = det.get_array('image.data')
 ```

 %% Cell type:markdown id: tags:

 ### Intensity histogram across all cells

 %% Cell type:code id: tags:

 ``` python
 left_edge_ratio = 0.01
 right_edge_ratio = 0.99

 fig, ax = plt.subplots(num=1, clear=True, figsize=(15, 6))
 values, bins, _ = ax.hist(np.ravel(data.data), bins=2000, range=(-1500, 2000))

 def find_nearest_index(array, value):
    return (np.abs(array - value)).argmin()

 cum_values = np.cumsum(values)
 vmin = bins[find_nearest_index(cum_values, cum_values[-1]*left_edge_ratio)]
 vmax = bins[find_nearest_index(cum_values, cum_values[-1]*right_edge_ratio)]

 max_value = values.max()
 ax.vlines([vmin, vmax], 0, max_value, color='red', linewidth=5, alpha=0.2)
 ax.text(vmin, max_value, f'{left_edge_ratio*100:.0f}%',
        color='red', ha='center', va='bottom', size='large')
 ax.text(vmax, max_value, f'{right_edge_ratio*100:.0f}%',
        color='red', ha='center', va='bottom', size='large')
 ax.text(vmax+(vmax-vmin)*0.01, max_value/2, 'Colormap interval',
        color='red', rotation=90, ha='left', va='center', size='x-large')

 ax.set_xlim(vmin-(vmax-vmin)*0.1, vmax+(vmax-vmin)*0.1)
 ax.set_ylim(0, max_value*1.1)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### First memory cell

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(num=2, figsize=(15, 15), clear=True, nrows=1, ncols=1)
 geom.plot_data_fast(data[:, 0, 0], ax=ax, vmin=vmin, vmax=vmax)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Train average

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots(num=3, figsize=(15, 15), clear=True, nrows=1, ncols=1)
 geom.plot_data_fast(data[:, 0].mean(axis=1), ax=ax, vmin=vmin, vmax=vmax)
 pass
 ```

 %% Cell type:markdown id: tags:

 ### Lowest gain stage per pixel

 %% Cell type:code id: tags:

 ``` python
 highest_gain_stage = det.get_array('image.gain', pulses=np.s_[:]).max(axis=(1, 2))

 fig, ax = plt.subplots(num=4, figsize=(15, 15), clear=True, nrows=1, ncols=1)
 p = geom.plot_data_fast(highest_gain_stage, ax=ax, vmin=0, vmax=2);

 cb = ax.images[0].colorbar
 cb.set_ticks([0, 1, 2])
 cb.set_ticklabels(['High gain', 'Medium gain', 'Low gain'])
 ```

 %% Cell type:markdown id: tags:

 ### Create virtual CXI file

 %% Cell type:code id: tags:

 ``` python
 if create_virtual_cxi_in:
    vcxi_folder = Path(create_virtual_cxi_in.format(
        run=run, proposal_folder=str(Path(in_folder).parent)))
    vcxi_folder.mkdir(parents=True, exist_ok=True)

    def sort_files_by_seq(by_seq, outp_path):
        by_seq.setdefault(int(outp_path.stem[-5:]), []).append(outp_path)
        return by_seq

    from functools import reduce
    reduce(sort_files_by_seq, output_paths, output_by_seq := {})

    for seq_number, seq_output_paths in output_by_seq.items():
        # Create data collection and detector components only for this sequence.
        try:
            det = LPD1M(xd.DataCollection.from_paths(seq_output_paths), detector_name=karabo_id, min_modules=4)
        except ValueError:  # Couldn't find enough data for min_modules
            continue
        det.write_virtual_cxi(vcxi_folder / f'VCXI-LPD-R{run:04d}-S{seq_number:05d}.cxi')
 ```

--- a/notebooks/LPD/LPD_retrieve_constants_precorrection.ipynb
+++ b/notebooks/LPD/LPD_retrieve_constants_precorrection.ipynb
 %% Cell type:markdown id: tags:

 # LPD Retrieving Constants Pre-correction #

 Author: European XFEL Detector Group, Version: 1.0

 The following notebook provides a constants metadata in a YAML file to use while correcting LPD images.

 %% Cell type:code id: tags:

 ``` python
 # Input parameters
 in_folder = "/gpfs/exfel/exp/FXE/202201/p003073/raw/"  # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/remove/LPD_test"  # the folder to output to, required
 metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate.
 modules = [-1]  # Modules indices to correct, use [-1] for all, only used when karabo_da is empty
 karabo_da = ['']  # Data aggregators names to correct, use [''] for all
 run = 10  # run to process, required

 # Source parameters
 karabo_id = 'FXE_DET_LPD1M-1'  # Karabo domain for detector.
+input_source = '{karabo_id}/DET/{module_index}CH0:xtdf'  # Input fast data source.

 # CalCat parameters
 creation_time = ""  # The timestamp to use with Calibration DB. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41

 # Operating conditions
 mem_cells = 512  # Memory cells, LPD constants are always taken with 512 cells.
 bias_voltage = 250.0  # Detector bias voltage.
 capacitor = '5pF'  # Capacitor setting: 5pF or 50pF
 photon_energy = 9.2  # Photon energy in keV.
 category = 0  # Whom to blame.
+use_cell_order = False  # Whether to use memory cell order as a detector condition (not stored for older constants)
 ```

 %% Cell type:code id: tags:

 ``` python
 from pathlib import Path
 from time import perf_counter

+import numpy as np
+
 from calibration_client import CalibrationClient
 from calibration_client.modules import CalibrationConstantVersion
+import extra_data as xd

+from cal_tools.lpdlib import get_mem_cell_order
 from cal_tools.tools import (
    CalibrationMetadata,
    calcat_creation_time,
    save_constant_metadata,
 )
 from cal_tools.restful_config import restful_config
 ```

 %% Cell type:code id: tags:

 ``` python
 out_folder = Path(out_folder)
 out_folder.mkdir(exist_ok=True)

 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # Constant paths & timestamps are saved under retrieved-constants in calibration_metadata.yml
 retrieved_constants = metadata.setdefault("retrieved-constants", {})

 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f'Using {creation_time.isoformat()} as creation time')

 # Pick all modules/aggregators or those selected.
 if not karabo_da or karabo_da == ['']:
    if not modules or modules == [-1]:
        modules = list(range(16))

    karabo_da = [f'LPD{i:02d}' for i in modules]
+
+# List of detector sources.
+det_inp_sources = [input_source.format(karabo_id=karabo_id, module_index=int(da[-2:])) for da in karabo_da]
 ```

 %% Cell type:code id: tags:

 ``` python
 # Connect to CalCat.
 calcat_config = restful_config['calcat']
 client = CalibrationClient(
    base_api_url=calcat_config['base-api-url'],
    use_oauth2=calcat_config['use-oauth2'],
    client_id=calcat_config['user-id'],
    client_secret=calcat_config['user-secret'],
    user_email=calcat_config['user-email'],
    token_url=calcat_config['token-url'],
    refresh_url=calcat_config['refresh-url'],
    auth_url=calcat_config['auth-url'],
    scope='')
 ```

 %% Cell type:code id: tags:

 ``` python
 dark_calibrations = {
    1: 'Offset',
    14: 'BadPixelsDark',
 }

-dark_condition = [
+base_condition = [
    dict(parameter_id=1, value=bias_voltage),  # Sensor bias voltage
    dict(parameter_id=7, value=mem_cells),  # Memory cells
    dict(parameter_id=15, value=capacitor),  # Feedback capacitor
    dict(parameter_id=13, value=256),  # Pixels X
    dict(parameter_id=14, value=256),  # Pixels Y
 ]
+if use_cell_order:
+    # Read the order of memory cells used
+    raw_data = xd.RunDirectory(Path(in_folder, f'r{run:04d}'))
+    cell_ids_pattern_s = get_mem_cell_order(raw_data, det_inp_sources)
+    print("Memory cells order:", cell_ids_pattern_s)
+
+    dark_condition = base_condition + [
+        dict(parameter_id=30, value=cell_ids_pattern_s),  # Memory cell order
+    ]
+else:
+    dark_condition = base_condition.copy()

 illuminated_calibrations = {
    20: 'BadPixelsFF',
    42: 'GainAmpMap',
    43: 'FFMap',
    44: 'RelativeGain',
 }

-illuminated_condition = dark_condition.copy()
-illuminated_condition += [
+illuminated_condition = base_condition + [
    dict(parameter_id=3, value=photon_energy),  # Source energy
    dict(parameter_id=25, value=category)  # category
 ]
 ```

 %% Cell type:code id: tags:

 ``` python
 const_data = {}

 print('Querying calibration database', end='', flush=True)
 start = perf_counter()
 for k_da in karabo_da:
    pdu = None
-    if k_da in retrieved_constants:
-        print(f"Constant for {k_da} already in {metadata.filename}, won't query again.")  # noqa
-        continue
    retrieved_constants[k_da] = dict()
    const_mdata = retrieved_constants[k_da]["constants"] = dict()
    for calibrations, condition in [
        (dark_calibrations, dark_condition),
        (illuminated_calibrations, illuminated_condition)
    ]:
        resp = CalibrationConstantVersion.get_closest_by_time_by_detector_conditions(
            client, karabo_id, list(calibrations.keys()),
            {'parameters_conditions_attributes': condition},
            karabo_da=k_da, event_at=creation_time.isoformat())

        if not resp["success"]:
            print(f"ERROR: Constants {list(calibrations.values())} "
            f"were not retrieved, {resp['app_info']}")
            for cname in calibrations.values():
                const_mdata[cname] = dict()
                const_mdata[cname]["file-path"] = None
                const_mdata[cname]["dataset-name"] = None
                const_mdata[cname]["creation-time"] = None
            continue

        for ccv in resp["data"]:
            cc = ccv['calibration_constant']
            cname = calibrations[cc['calibration_id']]
            const_mdata[cname] = dict()
            const_mdata[cname]["file-path"] = str(Path(ccv['path_to_file']) / ccv['file_name'])
            const_mdata[cname]["dataset-name"] = ccv['data_set_name']
            const_mdata[cname]["creation-time"] = ccv['begin_at']
            pdu = ccv['physical_detector_unit']['physical_name']

        print('.', end='', flush=True)
    retrieved_constants[k_da]["physical-detector-unit"] = pdu
 metadata.save()

 total_time = perf_counter() - start
 print(f'{total_time:.1f}s')
 print(f"Stored retrieved constants in {metadata.filename}")
 ```

 %% Cell type:markdown id: tags:

 # LPD Retrieving Constants Pre-correction #

 Author: European XFEL Detector Group, Version: 1.0

 The following notebook provides a constants metadata in a YAML file to use while correcting LPD images.

 %% Cell type:code id: tags:

 ``` python
 # Input parameters
 in_folder = "/gpfs/exfel/exp/FXE/202201/p003073/raw/"  # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/remove/LPD_test"  # the folder to output to, required
 metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate.
 modules = [-1]  # Modules indices to correct, use [-1] for all, only used when karabo_da is empty
 karabo_da = ['']  # Data aggregators names to correct, use [''] for all
 run = 10  # run to process, required

 # Source parameters
 karabo_id = 'FXE_DET_LPD1M-1'  # Karabo domain for detector.
+input_source = '{karabo_id}/DET/{module_index}CH0:xtdf'  # Input fast data source.

 # CalCat parameters
 creation_time = ""  # The timestamp to use with Calibration DB. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41

 # Operating conditions
 mem_cells = 512  # Memory cells, LPD constants are always taken with 512 cells.
 bias_voltage = 250.0  # Detector bias voltage.
 capacitor = '5pF'  # Capacitor setting: 5pF or 50pF
 photon_energy = 9.2  # Photon energy in keV.
 category = 0  # Whom to blame.
+use_cell_order = False  # Whether to use memory cell order as a detector condition (not stored for older constants)
 ```

 %% Cell type:code id: tags:

 ``` python
 from pathlib import Path
 from time import perf_counter

+import numpy as np
+
 from calibration_client import CalibrationClient
 from calibration_client.modules import CalibrationConstantVersion
+import extra_data as xd

+from cal_tools.lpdlib import get_mem_cell_order
 from cal_tools.tools import (
    CalibrationMetadata,
    calcat_creation_time,
    save_constant_metadata,
 )
 from cal_tools.restful_config import restful_config
 ```

 %% Cell type:code id: tags:

 ``` python
 out_folder = Path(out_folder)
 out_folder.mkdir(exist_ok=True)

 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # Constant paths & timestamps are saved under retrieved-constants in calibration_metadata.yml
 retrieved_constants = metadata.setdefault("retrieved-constants", {})

 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f'Using {creation_time.isoformat()} as creation time')

 # Pick all modules/aggregators or those selected.
 if not karabo_da or karabo_da == ['']:
    if not modules or modules == [-1]:
        modules = list(range(16))

    karabo_da = [f'LPD{i:02d}' for i in modules]
+
+# List of detector sources.
+det_inp_sources = [input_source.format(karabo_id=karabo_id, module_index=int(da[-2:])) for da in karabo_da]
 ```

 %% Cell type:code id: tags:

 ``` python
 # Connect to CalCat.
 calcat_config = restful_config['calcat']
 client = CalibrationClient(
    base_api_url=calcat_config['base-api-url'],
    use_oauth2=calcat_config['use-oauth2'],
    client_id=calcat_config['user-id'],
    client_secret=calcat_config['user-secret'],
    user_email=calcat_config['user-email'],
    token_url=calcat_config['token-url'],
    refresh_url=calcat_config['refresh-url'],
    auth_url=calcat_config['auth-url'],
    scope='')
 ```

 %% Cell type:code id: tags:

 ``` python
 dark_calibrations = {
    1: 'Offset',
    14: 'BadPixelsDark',
 }

-dark_condition = [
+base_condition = [
    dict(parameter_id=1, value=bias_voltage),  # Sensor bias voltage
    dict(parameter_id=7, value=mem_cells),  # Memory cells
    dict(parameter_id=15, value=capacitor),  # Feedback capacitor
    dict(parameter_id=13, value=256),  # Pixels X
    dict(parameter_id=14, value=256),  # Pixels Y
 ]
+if use_cell_order:
+    # Read the order of memory cells used
+    raw_data = xd.RunDirectory(Path(in_folder, f'r{run:04d}'))
+    cell_ids_pattern_s = get_mem_cell_order(raw_data, det_inp_sources)
+    print("Memory cells order:", cell_ids_pattern_s)
+
+    dark_condition = base_condition + [
+        dict(parameter_id=30, value=cell_ids_pattern_s),  # Memory cell order
+    ]
+else:
+    dark_condition = base_condition.copy()

 illuminated_calibrations = {
    20: 'BadPixelsFF',
    42: 'GainAmpMap',
    43: 'FFMap',
    44: 'RelativeGain',
 }

-illuminated_condition = dark_condition.copy()
-illuminated_condition += [
+illuminated_condition = base_condition + [
    dict(parameter_id=3, value=photon_energy),  # Source energy
    dict(parameter_id=25, value=category)  # category
 ]
 ```

 %% Cell type:code id: tags:

 ``` python
 const_data = {}

 print('Querying calibration database', end='', flush=True)
 start = perf_counter()
 for k_da in karabo_da:
    pdu = None
-    if k_da in retrieved_constants:
-        print(f"Constant for {k_da} already in {metadata.filename}, won't query again.")  # noqa
-        continue
    retrieved_constants[k_da] = dict()
    const_mdata = retrieved_constants[k_da]["constants"] = dict()
    for calibrations, condition in [
        (dark_calibrations, dark_condition),
        (illuminated_calibrations, illuminated_condition)
    ]:
        resp = CalibrationConstantVersion.get_closest_by_time_by_detector_conditions(
            client, karabo_id, list(calibrations.keys()),
            {'parameters_conditions_attributes': condition},
            karabo_da=k_da, event_at=creation_time.isoformat())

        if not resp["success"]:
            print(f"ERROR: Constants {list(calibrations.values())} "
            f"were not retrieved, {resp['app_info']}")
            for cname in calibrations.values():
                const_mdata[cname] = dict()
                const_mdata[cname]["file-path"] = None
                const_mdata[cname]["dataset-name"] = None
                const_mdata[cname]["creation-time"] = None
            continue

        for ccv in resp["data"]:
            cc = ccv['calibration_constant']
            cname = calibrations[cc['calibration_id']]
            const_mdata[cname] = dict()
            const_mdata[cname]["file-path"] = str(Path(ccv['path_to_file']) / ccv['file_name'])
            const_mdata[cname]["dataset-name"] = ccv['data_set_name']
            const_mdata[cname]["creation-time"] = ccv['begin_at']
            pdu = ccv['physical_detector_unit']['physical_name']

        print('.', end='', flush=True)
    retrieved_constants[k_da]["physical-detector-unit"] = pdu
 metadata.save()

 total_time = perf_counter() - start
 print(f'{total_time:.1f}s')
 print(f"Stored retrieved constants in {metadata.filename}")
 ```

--- a/notebooks/generic/overallmodules_Darks_Summary_NBC.ipynb
+++ b/notebooks/generic/overallmodules_Darks_Summary_NBC.ipynb
--- a/setup.py
+++ b/setup.py
@@ -108,8 +108,9 @@ install_requires = [

 if "readthedocs.org" not in sys.executable:
    install_requires += [
-        "iCalibrationDB @ git+ssh://git@git.xfel.eu:10022/detectors/cal_db_interactive.git@2.3.0",  # noqa
+        "iCalibrationDB @ git+ssh://git@git.xfel.eu:10022/detectors/cal_db_interactive.git@2.4.0",  # noqa
        "XFELDetectorAnalysis @ git+ssh://git@git.xfel.eu:10022/karaboDevices/pyDetLib.git@2.7.0",  # noqa
+        "CalParrot @ git+ssh://git@git.xfel.eu:10022/calibration/calparrot.git@0.1",  # noqa
    ]

 setup(

--- a/src/cal_tools/h5_copy_except.py
+++ b/src/cal_tools/h5_copy_except.py
@@ -25,7 +25,7 @@ def copy_except_tree(src_group: h5py.Group, dest_group: h5py.Group, except_tree)
        if except_tree_part is True:  # Totally excluded
            pass
        elif except_tree_part is None:  # Not excluded
-            src_group.copy(name, dest_group, name)
+            src_group.copy(name, dest_group, name, without_attrs=True)
        else:  # Partially excluded
            src_subgroup = src_group[name]
            assert isinstance(src_subgroup, h5py.Group)

--- a/src/cal_tools/lpdlib.py
+++ b/src/cal_tools/lpdlib.py
 import copy
 from typing import List, Optional, Tuple
+from warnings import warn

 import h5py
 import numpy as np
@@ -769,3 +770,23 @@ class LpdCorrections:

        self.initialize(offsets, rel_gains, rel_gains_b, bpixels, noises,
                        flat_fields)
+
+
+def get_mem_cell_order(run, sources) -> str:
+    """Load the memory cell order to use as a condition to find constants"""
+    res = set()
+    for source in sources:
+        cell_id_data = run[source, 'image.cellId'].drop_empty_trains()
+        if len(cell_id_data.train_ids) == 0:
+            continue  # No data for this module
+        cell_ids = cell_id_data[0].ndarray()
+        # Trailing comma required so e.g. "...,1" doesn't match "...,10"
+        res.add(",".join([str(c) for c in cell_ids.flatten()]) + ",")
+
+    if len(res) > 1:
+        warn("Memory cell order varies between detector modules: "
+             "; ".join([f"{s[:10]}...{s[-10:]}" for s in res]))
+    elif not res:
+        raise ValueError("Couldn't find memory cell order for any modules")
+
+    return res.pop()
--- a/src/cal_tools/restful_config.py
+++ b/src/cal_tools/restful_config.py
 from pathlib import Path

-from calibration_client import CalibrationClient
 from dynaconf import Dynaconf

 config_dir = Path(__file__).parent.resolve()
@@ -17,6 +16,8 @@ restful_config = Dynaconf(


 def calibration_client():
+    from calibration_client import CalibrationClient
+
    # Create client for CalCat.
    calcat_config = restful_config.get('calcat')
    return CalibrationClient(

--- a/src/xfel_calibrate/calibrate.py
+++ b/src/xfel_calibrate/calibrate.py
@@ -385,11 +385,14 @@ class JobArgs:
        """Run this job in a local process, return exit status"""
        return call(self.format_cmd(python), cwd=work_dir)

-    def submit_job(self, work_dir, python, slurm_opts, after_ok=(), after_any=()):
+    def submit_job(
+            self, work_dir, python, slurm_opts, after_ok=(), after_any=(), env=None
+    ):
        """Submit this job to Slurm, return its job ID"""
        cmd = slurm_opts.get_launcher_command(work_dir, after_ok, after_any)
        cmd += self.format_cmd(python)
-        output = check_output(cmd, cwd=work_dir).decode('utf-8')
+        # sbatch propagates environment variables into the job by default
+        output = check_output(cmd, cwd=work_dir, env=env).decode('utf-8')
        return output.partition(';')[0].strip()  # job ID


@@ -440,7 +443,7 @@ class JobChain:
                'steps': [step.to_dict() for step in self.steps]
            }, f, indent=2)

-    def submit_jobs(self, slurm_opts: SlurmOptions):
+    def submit_jobs(self, slurm_opts: SlurmOptions, env=None):
        """Submit these jobs to Slurm, return a list of job IDs

        Slurm dependencies are used to manage the sequence of jobs.
@@ -453,7 +456,9 @@ class JobChain:
            step_job_ids = []
            kw = {('after_any' if step.after_error else 'after_ok'): dep_job_ids}
            for job_desc in step.jobs:
-                jid = job_desc.submit_job(self.work_dir, self.python, slurm_opts, **kw)
+                jid = job_desc.submit_job(
+                    self.work_dir, self.python, slurm_opts, env=env, **kw
+                )
                step_job_ids.append(jid)
            dep_job_ids = step_job_ids
            all_job_ids.extend(step_job_ids)

--- a/src/xfel_calibrate/repeat.py
+++ b/src/xfel_calibrate/repeat.py
@@ -135,12 +135,12 @@ def main(argv=None):
    out_folder = parameters['out-folder']
    params_to_set = {'metadata_folder': "."}
    if args.out_folder:
-        out_folder = parameters['out-folder'] = args.out_folder
+        out_folder = parameters['out-folder'] = os.path.abspath(args.out_folder)
        params_to_set['out_folder'] = out_folder
    update_notebooks_params(cal_work_dir, params_to_set)

    if args.report_to:
-        report_to = args.report_to
+        report_to = os.path.abspath(args.report_to)
    else:  # Default to saving report in output folder
        report_to = str(Path(out_folder, f'xfel-calibrate-repeat-{run_uuid}'))
    cal_metadata['report-path'] = f'{report_to}.pdf'
@@ -159,12 +159,17 @@ def main(argv=None):
        job_chain.run_direct()
        joblist = []
    else:
+        # The queries to look up constants should all be the same as those
+        # from the previous calibration - tell CalParrot to warn if not.
+        env = os.environ.copy()
+        env['CALPARROT_NEW_QUERY'] = 'warn'
+
        joblist = job_chain.submit_jobs(SlurmOptions(
            partition=args.slurm_partition,
            mem=args.slurm_mem,
-        ))
+        ), env=env)

-    fmt_args = {'run_path': cal_work_dir,
+    fmt_args = {'cal_work_dir': cal_work_dir,
                'out_path': out_folder,
                'version': get_pycalib_version(),
                'report_to': report_to,

--- a/tests/test_cal_tools.py
+++ b/tests/test_cal_tools.py
@@ -253,7 +253,7 @@ def test_raise_get_from_db(_agipd_const_cond):
    assert str(excinfo.value) == "Resource temporarily unavailable"

    # Wrong type for creation_time.
-    with pytest.raises(ValueError):
+    with pytest.raises(TypeError):
        _call_get_from_db(
            constant=constant, condition=condition,
            karabo_id=AGIPD_KARABO_ID, karabo_da="AGIPD00",

--- a/webservice/job_monitor.py
+++ b/webservice/job_monitor.py
@@ -279,17 +279,16 @@ class JobsMonitor:
        log.debug("Update MDC for %s, %s: %s", r['action'], r['mymdc_id'], msg)

        if r['action'] == 'CORRECT':
-            status = 'A' if success else 'NA'  # Not-/Available
+            status = 'A' if success else 'E'  # Available/Error
            self.mymdc_update_run(r['mymdc_id'], msg, status)
        else:  # r['action'] == 'DARK'
            status = 'F' if success else 'E'  # Finished/Error
            self.mymdc_update_dark(r['mymdc_id'], msg, status)

-    def mymdc_update_run(self, run_id, msg, status='R'):
+    def mymdc_update_run(self, run_id, msg, status='IP'):
        """Update correction status in MyMdC"""
-        data = {'flg_cal_data_status': status,
-                'cal_pipeline_reply': msg}
-        if status != 'R':
+        data = {'cal_pipeline_reply': msg, 'flg_cal_data_status': status}
+        if status != 'IP':
            data['cal_last_end_at'] = datetime.now(tz=timezone.utc).isoformat()
        response = self.mdc.update_run_api(run_id, data)
        if response.status_code != 200:

--- a/webservice/update_mdc.py
+++ b/webservice/update_mdc.py
@@ -27,7 +27,8 @@ parser.add_argument('--runs', type=parse_int_range,
 parser.add_argument('--rid', type=int, help='Run id from MDC')
 parser.add_argument('--msg', type=str, help='Message string to MDC',
                    default='Error while job submission')
-parser.add_argument('--really', help="Actually make changes (otherwise dry-run)")
+parser.add_argument('--really', action='store_true',
+                    help="Actually make changes (otherwise dry-run)")
 args = parser.parse_args()

 if args.conf_file is not None:

--- a/webservice/webservice.py
+++ b/webservice/webservice.py
@@ -742,17 +742,20 @@ async def update_mdc_status(mdc: MetadataClient, action: str,
    https://git.xfel.eu/gitlab/detectors/pycalibration/wikis/MyMDC-Communication
    """
    if message.split(':')[0] in ('FAILED', 'WARN'):  # Errors
-        flag = 'NA' if action == 'correct' else 'E'
+        flag = 'E'
    elif message.split(':')[0] == 'SUCCESS':  # Success
-        flag = 'R' if action == 'correct' else 'IP'
+        flag = 'IP'
        if 'Uploaded' in message or 'Finished' in message:
            flag = 'A' if action == 'correct' else 'F'
    else:  # MDC Timeout
-        flag = 'NA' if action == 'correct' else 'T'
+        flag = 'E' if action == 'correct' else 'T'

    if action == 'correct':
        func = mdc.update_run_api
-        data = {'flg_cal_data_status': flag, 'cal_pipeline_reply': message}
+        data = {'cal_pipeline_reply': message}
+        # Don't send In Progress; job_monitor will send this when jobs start.
+        if flag != 'IP':
+            data['flg_cal_data_status'] = flag

    elif action == 'dark_request':
        func = mdc.update_dark_run_api
No results found