Feat/ppu/number of trains

d00ed80a · Thomas Michelat · e6df7b77 · d00ed80a
Commit d00ed80a authored 1 year ago by Thomas Michelat
--- a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
+++ b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
@@ -141,7 +141,7 @@
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "import yaml\n",
-    "from extra_data import RunDirectory, stack_detector_data\n",
+    "from extra_data import by_id, RunDirectory, stack_detector_data\n",
    "from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry\n",
    "from matplotlib import cm as colormap\n",
    "from matplotlib.colors import LogNorm\n",
@@ -304,7 +304,14 @@
    "\n",
    "    # The trains picked are the unique values of trainTrigger.sequenceStart\n",
    "    # minus the first (previous trigger before this run).\n",
-    "    train_ids = np.unique(seq_start)[1:] + ppu_train_offset\n",
+    "    start_train_ids = np.unique(seq_start)[1:] + ppu_train_offset\n",
+    "\n",
+    "    train_ids = []\n",
+    "    for train_id in start_train_ids:\n",
+    "        n_trains = dc[\n",
+    "            use_ppu_device, 'trainTrigger.numberOfTrains'\n",
+    "        ].select_trains(by_id[[train_id]]).ndarray()[0]\n",
+    "        train_ids.extend(list(range(train_id, train_id + n_trains)))\n",
    "\n",
    "    print(f'PPU device {use_ppu_device} triggered for {len(train_ids)} train(s)')\n",
    "\n",

 %% Cell type:markdown id: tags:

 # AGIPD Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/MID/202201/p002834/raw" # the folder to read data from, required
 out_folder = "/gpfs/exfel/data/scratch/esobolev/pycal_litfrm/p002834/r0225"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 overwrite = False  # IGNORED, NEEDED FOR COMPATIBILITY.
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 train_ids = [-1] # train IDs to correct, set to -1 for all, range allowed
 run = 225 # runs to process, required

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_template = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 index_source_template = 'INDEX/{}/DET/{}:xtdf/'  # path in the HDF5 file to images
 ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information
 karabo_id_control = "MID_EXP_AGIPD1M1" # karabo-id for control device

 slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants, loaded in precorrection notebook

 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milliseconds
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants

 mem_cells = -1  # Number of memory cells used, set to 0 to automatically infer
 bias_voltage = -1  # bias voltage, set to 0 to use stored value in slow data.
 acq_rate = -1. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)
 max_pulses = [0, 352, 1] # range list [st, end, step] of memory cell indices to be processed within a train. 3 allowed maximum list input elements.
 mem_cells_db = -1  # set to a value different than 0 to use this value for DB queries
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction parameters
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
 cm_dark_fraction = 0.66 # threshold for fraction of  empty pixels to consider module enough dark to perform CM correction
 cm_dark_range = [-50.,30] # range for signal value ADU for pixel to be consider as a dark pixel
 cm_n_itr = 4 # number of iterations for common mode correction
 hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
 mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
 noisy_adc_threshold = 0.25 # threshold to mask complete adc
 ff_gain = 7.2 # conversion gain for absolute FlatField constants, while applying xray_gain
 photon_energy = -1.0 # photon energy in keV, non-positive value for XGM autodetection

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 zero_nans = False # set NaN values in corrected data to 0
 zero_orange = False # set to 0 very negative and very large values in corrected data
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
 corr_asic_diag = False # if set, diagonal drop offs on ASICs are corrected
 force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold
 force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold
 mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
 common_mode = False # Common mode correction
 melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
 mask_zero_std = False # Mask pixels with zero standard deviation across train
 low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain
 round_photons = False  # Round to absolute number of photons, only use with gain corrections

 # Optional auxiliary devices
 use_ppu_device = ''  # Device ID for a pulse picker device to only process picked trains, empty string to disable
 ppu_train_offset = 0  # When using the pulse picker, offset between the PPU's sequence start and actually picked train

 use_litframe_finder = 'off' # Process only illuminated frames: 'off' - disable, 'device' - use online device data, 'offline' - use offline algorithm, 'auto' - choose online/offline source automatically (default)
 litframe_device_id = '' # Device ID for a lit frame finder device, empty string to auto detection
 energy_threshold = -1000 # The low limit for the energy (uJ) exposed by frames subject to processing. If -1000, selection by pulse energy is disabled
 use_super_selection = 'cm' # Make a common selection for entire run: 'off' - disable, 'final' - enable for final selection, 'cm' - enable only for common mode correction

 use_xgm_device = ''  # DoocsXGM device ID to obtain actual photon energy, operating condition else.

 # Output parameters
 recast_image_data = ''  # Cast data to a different dtype before saving
 compress_fields = ['gain', 'mask']  # Datasets in image group to compress.

 # Plotting parameters
 skip_plots = False # exit after writing corrected files and metadata
 cell_id_preview = 1 # cell Id used for preview in single-shot plots

 # Parallelization parameters
 chunk_size = 1000  # Size of chunk for image-wise correction
 n_cores_correct = 16 # Number of chunks to be processed in parallel
 n_cores_files = 4 # Number of files to be processed in parallel
 sequences_per_node = 2 # number of sequence files per cluster node if run as SLURM job, set to 0 to not run SLURM parallel
 max_nodes = 8 # Maximum number of SLURM jobs to split correction work into
 max_tasks_per_worker = 1  # the number of tasks a correction pool worker process can complete before it will exit and be replaced with a fresh worker process. Leave as -1 to keep worker alive as long as pool.

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import os
 import math
 import multiprocessing
 import re
 import warnings
 from datetime import timedelta
 from logging import warning
 from pathlib import Path
 from time import perf_counter

 import tabulate
 from dateutil import parser
 from IPython.display import Latex, Markdown, display

 warnings.filterwarnings('ignore')
 import matplotlib
 import matplotlib.pyplot as plt
 import yaml
-from extra_data import RunDirectory, stack_detector_data
+from extra_data import by_id, RunDirectory, stack_detector_data
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from matplotlib import cm as colormap
 from matplotlib.colors import LogNorm

 matplotlib.use("agg")
 %matplotlib inline
 import numpy as np
 import seaborn as sns

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")

 from cal_tools import agipdalgs as calgs
 from cal_tools.agipdlib import (
    AgipdCorrections,
    AgipdCtrl,
    CellRange,
    LitFrameSelection,
 )
 from cal_tools.ana_tools import get_range
 from cal_tools.enums import AgipdGainMode, BadPixels
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    CalibrationMetadata,
    calcat_creation_time,
    map_modules_from_folder,
    module_index_to_qm,
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_folder = in_folder / f'r{run:04d}'
 ```

 %% Cell type:markdown id: tags:

 ## Evaluated parameters ##

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the hierarchy and dependability for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["zero_nans"] = zero_nans
    corr_bools["zero_orange"] = zero_orange
    corr_bools["mask_noisy_adc"] = mask_noisy_adc
    corr_bools["force_hg_if_below"] = force_hg_if_below
    corr_bools["force_mg_if_below"] = force_mg_if_below
    corr_bools["common_mode"] = common_mode
    corr_bools["melt_snow"] = melt_snow
    corr_bools["mask_zero_std"] = mask_zero_std
    corr_bools["low_medium_gap"] = low_medium_gap
    corr_bools["round_photons"] = round_photons

 # Many corrections don't apply to fixed gain mode; will explicitly disable later if detected
 disable_for_fixed_gain = [
    "adjust_mg_baseline",
    "blc_set_min",
    "force_hg_if_below",
    "force_mg_if_below",
    "low_medium_gap",
    "melt_snow",
    "rel_gain"
 ]
 ```

 %% Cell type:code id: tags:

 ``` python
 if sequences == [-1]:
    sequences = None

 dc = RunDirectory(run_folder)

 ctrl_src = ctrl_source_template.format(karabo_id_control)
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 index_src = index_source_template.format(karabo_id, receiver_template)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create output folder
 out_folder.mkdir(parents=True, exist_ok=True)

 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 # Evaluate requested modules
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 print("Process modules:", ', '.join(module_index_to_qm(x) for x in modules))
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_ppu_device:
    # Obtain trains to process if using a pulse picker device.

    # Will throw an uncaught exception if the device is wrong.
    seq_start = dc[use_ppu_device, 'trainTrigger.sequenceStart.value'].ndarray()

    # The trains picked are the unique values of trainTrigger.sequenceStart
    # minus the first (previous trigger before this run).
-    train_ids = np.unique(seq_start)[1:] + ppu_train_offset
+    start_train_ids = np.unique(seq_start)[1:] + ppu_train_offset
+
+    train_ids = []
+    for train_id in start_train_ids:
+        n_trains = dc[
+            use_ppu_device, 'trainTrigger.numberOfTrains'
+        ].select_trains(by_id[[train_id]]).ndarray()[0]
+        train_ids.extend(list(range(train_id, train_id + n_trains)))

    print(f'PPU device {use_ppu_device} triggered for {len(train_ids)} train(s)')

 elif train_ids != [-1]:
    # Specific trains passed by parameter, convert to ndarray.
    train_ids = np.array(train_ids)

    print(f'Processing up to {len(train_ids)} manually selected train(s)')
 else:
    # Process all trains.
    train_ids = None

    print(f'Processing all valid trains')
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mapped_files, _, total_sequences, _, _ =  map_modules_from_folder(
    str(in_folder), run, path_template, karabo_da, sequences
 )
 file_list = []

 # ToDo: Split table over pages
 print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
 table = []
 ti = 0
 for k, files in mapped_files.items():
    i = 0
    for f in list(files.queue):
        file_list.append(f)
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["#", "module", "# module", "file"])))
 file_list = sorted(file_list, key=lambda name: name[-10:])
 ```

 %% Cell type:code id: tags:

 ``` python
 first_mod_channel = sorted(modules)[0]

 instrument_src_mod = [
    s for s in list(dc.all_sources) if f"{first_mod_channel}CH" in s][0]
 mod_channel = int(re.findall(rf".*{first_mod_channel}CH([0-9]+):.*", instrument_src_mod)[0])

 agipd_cond = AgipdCtrl(
    run_dc=dc,
    image_src=instrument_src_mod,
    ctrl_src=ctrl_src,
    raise_error=False,  # to be able to process very old data without gain_setting value
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 offset = parser.parse(creation_date_offset)
 delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
 creation_time += delta
 print(f"Creation time: {creation_time}")

 if acq_rate == -1.:
    acq_rate = agipd_cond.get_acq_rate()
 if mem_cells == -1:
    mem_cells = agipd_cond.get_num_cells()
 # TODO: look for alternative for passing creation_time
 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == -1:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()
 if gain_mode == -1:
    gain_mode = agipd_cond.get_gain_mode()
 else:
    gain_mode = AgipdGainMode(gain_mode)
 ```

 %% Cell type:code id: tags:

 ``` python
 if mem_cells is None:
    raise ValueError(f"No raw images found for {instrument_src_mod}")

 mem_cells_db = mem_cells if mem_cells_db == -1 else mem_cells_db

 print(f"Maximum memory cells to calibrate: {mem_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Using {creation_time} as creation time")
 print("Operating conditions are:")
 print(f"• Bias voltage: {bias_voltage}")
 print(f"• Memory cells: {mem_cells_db}")
 print(f"• Acquisition rate: {acq_rate}")
 print(f"• Gain setting: {gain_setting}")
 print(f"• Gain mode: {gain_mode.name}")
 print(f"• Integration time: {integration_time}")
 print(f"• Photon Energy: 9.2")
 ```

 %% Cell type:code id: tags:

 ``` python
 if gain_mode:
    for to_disable in disable_for_fixed_gain:
        if corr_bools.get(to_disable, False):
            warning(f"{to_disable} correction was requested, but does not apply to fixed gain mode")
            corr_bools[to_disable] = False
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_litframe_finder != 'off':
    from extra_redu import make_litframe_finder, LitFrameFinderError

    if use_litframe_finder not in ['auto', 'offline', 'online']:
        raise ValueError("Unexpected value in 'use_litframe_finder'.")

    inst = karabo_id_control[:3]
    litfrm = make_litframe_finder(inst, dc, litframe_device_id)
    try:
        get_data = {'auto': litfrm.read_or_process, 'offline': litfrm.process, 'online': litfrm.read}
        r = get_data[use_litframe_finder]()
        cell_sel = LitFrameSelection(r, train_ids, max_pulses, energy_threshold, use_super_selection)
        cell_sel.print_report()
    except LitFrameFinderError as err:
        warning(f"Cannot use AgipdLitFrameFinder due to:\n{err}")
        cell_sel = CellRange(max_pulses, max_cells=mem_cells)
 else:
    # Use range selection
    cell_sel = CellRange(max_pulses, max_cells=mem_cells)

 print(cell_sel.msg())
 ```

 %% Cell type:code id: tags:

 ``` python
 if round_photons and photon_energy <= 0.0:
    if use_xgm_device:
        # Try to obtain photon energy from XGM device.
        wavelength_data = dc[use_xgm_device, 'pulseEnergy.wavelengthUsed']

        try:
            from scipy.constants import h, c, e

            # Read wavelength as a single value and convert to hv.
            photon_energy = (h * c / e) / (wavelength_data.as_single_value(rtol=1e-2) * 1e-6)
            print(f'Obtained photon energy {photon_energy:.3f} keV from {use_xgm_device}')
        except ValueError:
            warning('XGM source available but photon energy varies greater than 1%, '
                 'photon rounding disabled!')
            round_photons = False
    else:
        warning('Neither explicit photon energy nor XGM device configured, photon rounding disabled!')
        round_photons = False
 elif round_photons:
    print(f'Photon energy for rounding: {photon_energy:.3f} keV')
 ```

 %% Cell type:markdown id: tags:

 ## Data processing ##

 %% Cell type:code id: tags:

 ``` python
 agipd_corr = AgipdCorrections(
    mem_cells,
    cell_sel,
    h5_data_path=instrument_src,
    h5_index_path=index_src,
    corr_bools=corr_bools,
    gain_mode=gain_mode,
    comp_threads=os.cpu_count() // n_cores_files,
    train_ids=train_ids
 )

 agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
 agipd_corr.hg_hard_threshold = hg_hard_threshold
 agipd_corr.mg_hard_threshold = mg_hard_threshold

 agipd_corr.cm_dark_min = cm_dark_range[0]
 agipd_corr.cm_dark_max = cm_dark_range[1]
 agipd_corr.cm_dark_fraction = cm_dark_fraction
 agipd_corr.cm_n_itr = cm_n_itr
 agipd_corr.noisy_adc_threshold = noisy_adc_threshold
 agipd_corr.ff_gain = ff_gain
 agipd_corr.photon_energy = photon_energy

 agipd_corr.compress_fields = compress_fields
 if recast_image_data:
    agipd_corr.recast_image_fields['data'] = np.dtype(recast_image_data)
 ```

 %% Cell type:code id: tags:

 ``` python
 module_index_to_karabo_da = {mod: da for (mod, da) in zip(modules, karabo_da)}
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve calibration constants to RAM
 agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))

 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # NOTE: this notebook will not overwrite calibration metadata file
 const_yaml = metadata.get("retrieved-constants", {})

 def retrieve_constants(mod):
    """
    Retrieve calibration constants and load them to shared memory

    Metadata for constants is taken from yml file or retrieved from the DB
    """
    k_da = module_index_to_karabo_da[mod]
    # check if there is a yaml file in out_folder that has the device constants.
    if k_da in const_yaml:
        when = agipd_corr.initialize_from_yaml(k_da, const_yaml, mod)
        print(f"Found constants for {k_da} in calibration_metadata.yml")
    else:
        try:
            # TODO: replace with proper retrieval (as done in pre-correction)
            when = agipd_corr.initialize_from_db(
                karabo_id=karabo_id,
                karabo_da=k_da,
                cal_db_interface=cal_db_interface,
                creation_time=creation_time,
                memory_cells=mem_cells_db,
                bias_voltage=bias_voltage,
                photon_energy=9.2,
                gain_setting=gain_setting,
                acquisition_rate=acq_rate,
                integration_time=integration_time,
                module_idx=mod,
                only_dark=False,
            )
            print(f"Queried CalCat for {k_da}")
        except Exception as e:
            warning(f"Module: {k_da}, {e}")
            when = None
    return mod, when, k_da


 print(f'Preparing constants (FF: {agipd_corr.corr_bools.get("xray_corr", False)}, PC: {any(agipd_corr.pc_bools)}, '
      f'BLC: {any(agipd_corr.blc_bools)})')
 ts = perf_counter()
 with multiprocessing.Pool(processes=len(modules)) as pool:
    const_out = pool.map(retrieve_constants, modules)
 print(f"Constants were loaded in {perf_counter()-ts:.01f}s")
 ```

 %% Cell type:code id: tags:

 ``` python
 # allocate memory for images and hists
 n_images_max = mem_cells * 256
 data_shape = (n_images_max, 512, 128)
 agipd_corr.allocate_images(data_shape, n_cores_files)
 ```

 %% Cell type:code id: tags:

 ``` python
 def batches(l, batch_size):
    """Group a list into batches of (up to) batch_size elements"""
    start = 0
    while start < len(l):
        yield l[start:start + batch_size]
        start += batch_size
 ```

 %% Cell type:code id: tags:

 ``` python
 def imagewise_chunks(img_counts):
    """Break up the loaded data into chunks of up to chunk_size

    Yields (file data slot, start index, stop index)
    """


    for i_proc, n_img in enumerate(img_counts):
        n_chunks = math.ceil(n_img / chunk_size)
        for i in range(n_chunks):
            yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 if max_tasks_per_worker == -1:
    max_tasks_per_worker = None
 with multiprocessing.Pool(maxtasksperchild=max_tasks_per_worker) as pool:
    step_timer.done_step('Started pool')

    for file_batch in batches(file_list, n_cores_files):
        # TODO: Move some printed output to logging or similar
        print(f"Processing next {len(file_batch)} files")
        step_timer.start()
        img_counts = pool.starmap(
            agipd_corr.read_file,
            zip(range(len(file_batch)), file_batch, [not common_mode]*len(file_batch))
        )
        step_timer.done_step(f'Loading data from files')

        if img_counts == 0:
            # Skip any further processing and output if there are no images to
            # correct in this file.
            continue

        if mask_zero_std:
            # Evaluate zero-data-std mask
            pool.starmap(
                agipd_corr.mask_zero_std, itertools.product(
                    range(len(file_batch)),
                    np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
                )
            )
            step_timer.done_step('Mask 0 std')

        # Perform offset image-wise correction
        pool.starmap(agipd_corr.offset_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Offset correction")

        if blc_noise or blc_stripes or blc_hmatch:
            # Perform image-wise correction
            pool.starmap(agipd_corr.baseline_correction, imagewise_chunks(img_counts))
            step_timer.done_step("Base-line shift correction")

        if common_mode:
            # In common mode corrected is enabled.
            # Cell selection is only activated after common mode correction.
            # Perform cross-file correction parallel over asics
            image_files_idx = [i_proc for i_proc, n_img in enumerate(img_counts) if n_img > 0]
            pool.starmap(agipd_corr.cm_correction, itertools.product(
                image_files_idx, range(16)  # 16 ASICs per module
            ))
            step_timer.done_step("Common-mode correction")

            img_counts = pool.map(agipd_corr.apply_selected_pulses, image_files_idx)
            step_timer.done_step("Applying selected cells after common mode correction")

        # Perform image-wise correction"
        pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Gain corrections")

        # Save corrected data
        pool.starmap(agipd_corr.write_file, [
            (i_proc, file_name, str(out_folder / Path(file_name).name.replace("RAW", "CORR")))
            for i_proc, file_name in enumerate(file_batch)
        ])
        step_timer.done_step("Save")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Correction of {len(file_list)} files is finished")
 print(f"Total processing time {step_timer.timespan():.01f} s")
 print(f"Timing summary per batch of {n_cores_files} files:")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 # if the yml file contains "retrieved-constants", that means a leading
 # notebook got processed and the reporting would be generated from it.
 fst_print = True
 timestamps = {}

 for i, (modno, when, k_da) in enumerate(const_out):
    qm = module_index_to_qm(modno)

    if k_da not in const_yaml:
        if fst_print:
            print("Constants are retrieved with creation time: ")
            fst_print = False

        module_timestamps = {}

        print(f"{qm}:")
        for key, item in when.items():
            if hasattr(item, 'strftime'):
                item = item.strftime('%y-%m-%d %H:%M')
            when[key] = item
            print('{:.<12s}'.format(key), item)

        # Store few time stamps if exists
        # Add NA to keep array structure
        for key in ['Offset', 'SlopesPC', 'SlopesFF']:
            if when and key in when and when[key]:
                module_timestamps[key] = when[key]
            else:
                module_timestamps[key] = "NA"
        timestamps[qm] = module_timestamps

 seq = sequences[0] if sequences else 0

 if timestamps:
    with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fd:
        yaml.safe_dump({"time-summary": {f"S{seq}": timestamps}}, fd)
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print('Skipping plots')
    import sys
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.gca(projection='3d')

    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)
    Z = data.T

    # Plot the surface.
    ax.plot_surface(X, Y, Z, cmap=colormap.coolwarm, linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")


 def do_2d_plot(data, edges, y_axis, x_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),
              np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
                   norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 def get_trains_data(data_folder, source, include, detector_id, tid=None, modules=16, fillvalue=None):
    """Load single train for all module

    :param data_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param detector_id: The karabo id of the detector to get data for
    :param tid: Train Id to be loaded. First train is considered if None is given
    :param path: Path to find image data inside h5 file
    """
    try:
        run_data = RunDirectory(data_folder, include)
    except FileNotFoundError:
        warning(f'No corrected files for {include}. Skipping plots.')
        import sys
        sys.exit(0)
    if tid is not None:
        tid, data = run_data.select(
            f'{detector_id}/DET/*', source).train_from_id(tid, keep_dims=True)
    else:
        # A first full trainId for all available modules is of interest.
        tid, data = next(run_data.select(
            f'{detector_id}/DET/*', source).trains(require_all=True, keep_dims=True))

    stacked_data = stack_detector_data(
        train=data, data=source, fillvalue=fillvalue, modules=modules)

    return tid, stacked_data
 ```

 %% Cell type:code id: tags:

 ``` python
 if dinstance == "AGIPD500K":
    geom = AGIPD_500K2GGeometry.from_origin()
 else:
    geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
        (-525, 625),
        (-550, -10),
        (520, -160),
        (542.5, 475),
    ])
 ```

 %% Cell type:code id: tags:

 ``` python
 include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
 tid, corrected = get_trains_data(out_folder, 'image.data', include, karabo_id, modules=nmods)

 _, gains = get_trains_data(out_folder, 'image.gain', include, karabo_id, tid, modules=nmods)
 _, mask = get_trains_data(out_folder, 'image.mask', include, karabo_id, tid, modules=nmods)
 _, blshift = get_trains_data(out_folder, 'image.blShift', include, karabo_id, tid, modules=nmods)
 _, cellId = get_trains_data(out_folder, 'image.cellId', include, karabo_id, tid, modules=nmods)
 _, pulseId = get_trains_data(out_folder, 'image.pulseId', include, karabo_id, tid, modules=nmods, fillvalue=0)
 _, raw = get_trains_data(run_folder, 'image.data', include, karabo_id, tid, modules=nmods)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Analogue Gain ###

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(raw[:,0,...].flatten().astype(np.float32),
                                         raw[:,1,...].flatten().astype(np.float32),
                                         bins=(100, 100),
                                         range=[[4000, 8192], [4000, 8192]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Digitized Gain ###

 The following plot shows plots signal vs. digitized gain

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(corrected.flatten().astype(np.float32),
                                         gains.flatten().astype(np.float32), bins=(100, 3),
                                         range=[[-50, 8192], [0, 3]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Gain statistics in %")
 table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
          f'{gains[gains==1].size/gains.size*100:.03f}',
          f'{gains[gains==2].size/gains.size*100:.03f}']]
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["High", "Medium", "Low"])))
 ```

 %% Cell type:markdown id: tags:

 ### Intensity per Pulse ###

 %% Cell type:code id: tags:

 ``` python
 pulse_range = [np.min(pulseId[pulseId>=0]), np.max(pulseId[pulseId>=0])]

 # Modify pulse_range, if only one pulse is selected.
 if pulse_range[0] == pulse_range[1]:
    pulse_range = [0, pulse_range[1]+int(acq_rate)]

 mean_data = np.nanmean(corrected, axis=(2, 3))
 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100, int(pulse_range[1])),
                                      range=[[-50, 1000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")

 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100,  int(pulse_range[1])),
                                      range=[[-50, 200000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 ```

 %% Cell type:markdown id: tags:

 ### Baseline shift ###

 Estimated base-line shift with respect to the total ADU counts of corrected image.

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(blshift.flatten(), bins=100, log=True)
 _ = plt.xlabel('Baseline shift [ADU]')
 _ = plt.ylabel('Counts')
 _ = ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 10))
 corrected_ave = np.nansum(corrected, axis=(2, 3))
 plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)
 plt.xlim(-1, 1000)
 plt.grid()
 plt.xlabel('Illuminated corrected [MADU] ')
 _ = plt.ylabel('Estimated baseline shift [ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
 if cell_id_preview not in cellId[:, 0]:
    print(f"WARNING: The selected cell_id_preview value {cell_id_preview} is not available in the corrected data.")
    cell_id_preview = cellId[:, 0][0]
    cell_idx_preview = 0
    print(f"Previewing the first available cellId: {cell_id_preview}.")
 else:
    cell_idx_preview = np.where(cellId[:, 0] == cell_id_preview)[0][0]
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Raw preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown(f'Mean over images of the RAW data\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(raw[slice(*cell_sel.crange), 0, ...], axis=0)
    vmin, vmax = get_range(data, 5)
    ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 else:
    print("Skipping mean RAW preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'Single shot of the RAW data from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(raw[cell_idx_preview, 0, ...], 5)
 ax = geom.plot_data_fast(raw[cell_idx_preview, 0, ...], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Corrected preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown('### Mean CORRECTED Preview ###\n'))
    display(Markdown(f'A mean across train: {tid}\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(corrected, axis=0)
    vmin, vmax = get_range(data, 7)
    ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=-50, vmax=vmax)
 else:
    print("Skipping mean CORRECTED preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'A single shot of the CORRECTED image from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_idx_preview], 7, -50)
 vmin = - 50
 ax = geom.plot_data_fast(corrected[cell_idx_preview], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_idx_preview], 5, -50)
 nbins = np.int((vmax + 50) / 2)
 h = ax.hist(corrected[cell_idx_preview].flatten(),
            bins=nbins, range=(-50, vmax),
            histtype='stepfilled', log=True)
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected, 10, -100)
 vmax = np.nanmax(corrected)
 if vmax > 50000:
    vmax=50000
 nbins = np.int((vmax + 100) / 5)
 h = ax.hist(corrected.flatten(), bins=nbins,
            range=(-100, vmax), histtype='step', log=True, label = 'All')
 ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='High gain', color='green')
 ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Medium gain', color='red')
 ax.hist(corrected[gains == 2].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Low gain', color='yellow')
 ax.legend()
 ax.grid()
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Maximum GAIN Preview ###\n'))
 display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.max(gains, axis=0), ax=ax,
                         cmap="jet", vmin=-1, vmax=3)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'### Single Shot Bad Pixels ### \n'))
 display(Markdown(f'A single shot bad pixel map from cell {cell_id_preview} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(np.log2(mask[cell_idx_preview]), ax=ax, vmin=0, vmax=32, cmap="jet")
 ```

 %% Cell type:code id: tags:

 ``` python
 if round_photons:
    display(Markdown('### Photonization histograms ###'))

    x_preround = (agipd_corr.hist_bins_preround[1:] + agipd_corr.hist_bins_preround[:-1]) / 2
    x_postround = (agipd_corr.hist_bins_postround[1:] + agipd_corr.hist_bins_postround[:-1]) / 2
    x_photons = np.arange(0, (x_postround[-1] + 1) / photon_energy)

    fig, ax = plt.subplots(ncols=1, nrows=1, clear=True)
    ax.plot(x_preround, agipd_corr.shared_hist_preround, '.-', color='C0')
    ax.bar(x_postround, agipd_corr.shared_hist_postround, photon_energy, color='C1', alpha=0.5)
    ax.set_yscale('log')
    ax.set_ylim(0, max(agipd_corr.shared_hist_preround.max(), agipd_corr.shared_hist_postround.max())*3)
    ax.set_xlim(x_postround[0], x_postround[-1]+1)
    ax.set_xlabel('Photon energy / keV')
    ax.set_ylabel('Intensity')
    ax.vlines(x_photons * photon_energy, *ax.get_ylim(), color='k', linestyle='dashed')

    phx = ax.twiny()
    phx.set_xlim(x_postround[0] / photon_energy, (x_postround[-1]+1)/photon_energy)
    phx.set_xticks(x_photons)
    phx.set_xlabel('# Photons')
    pass
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train  ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(np.mean(mask>0, axis=0), vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train. Only Dark Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 cm = np.copy(mask)
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 ax = geom.plot_data_fast(np.mean(cm>0, axis=0),
                         vmin=0, ax=ax, vmax=1, cmap="jet")
 ```