diff --git a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
index 6b66a1a7a951bb15a65c0faf515592765e9bb80c..962d0d6cb514b0edd8a34f8867ad557595b2e879 100644
--- a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
+++ b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
@@ -18,7 +18,7 @@
    "outputs": [],
    "source": [
     "in_folder = \"/gpfs/exfel/exp/SPB/202131/p900230/raw\" # the folder to read data from, required\n",
-    "out_folder = \"/gpfs/exfel/data/scratch/esobolev/pycal_litfrm/p900230\"  # the folder to output to, required\n",
+    "out_folder = \"/gpfs/exfel/data/scratch/ahmedk/test/remove/agipd_resolve_conf\"  # the folder to output to, required\n",
     "sequences = [-1] # sequences to correct, set to -1 for all, range allowed\n",
     "modules = [-1] # modules to correct, set to -1 for all, range allowed\n",
     "train_ids = [-1] # train IDs to correct, set to -1 for all, range allowed\n",
@@ -26,31 +26,27 @@
     "\n",
     "karabo_id = \"SPB_DET_AGIPD1M-1\" # karabo karabo_id\n",
     "karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators\n",
-    "receiver_id = \"{}CH0\" # inset for receiver devices\n",
+    "receiver_template = \"{}CH0\" # inset for receiver devices\n",
     "path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data\n",
-    "h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images\n",
-    "h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images\n",
-    "h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information\n",
+    "instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images\n",
+    "index_source_template = 'INDEX/{}/DET/{}:xtdf/'  # path in the HDF5 file to images\n",
+    "ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information\n",
     "karabo_id_control = \"SPB_IRU_AGIPD1M1\" # karabo-id for control device\n",
-    "karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation\n",
     "\n",
     "slopes_ff_from_files = \"\" # Path to locally stored SlopesFF and BadPixelsFF constants\n",
     "\n",
     "use_dir_creation_date = True # use the creation data of the input dir for database queries\n",
     "cal_db_interface = \"tcp://max-exfl016:8015#8045\" # the database interface to use\n",
-    "cal_db_timeout = 30000 # in milli seconds\n",
+    "cal_db_timeout = 30000 # in milliseconds\n",
     "creation_date_offset = \"00:00:00\" # add an offset to creation date, e.g. to get different constants\n",
     "\n",
     "use_ppu_device = ''  # Device ID for a pulse picker device to only process picked trains, empty string to disable\n",
     "ppu_train_offset = 0  # When using the pulse picker, offset between the PPU's sequence start and actually picked train\n",
     "\n",
-    "use_litframe_device = '' # Device ID for a lit frame finder device to only process illuminated frames, empty string to disable\n",
-    "energy_threshold = -1000 # The low limit for the energy (uJ) exposed by frames subject to processing. If -1000, selection by pulse energy is disabled\n",
-    "\n",
-    "max_cells = 0 # number of memory cells used, set to 0 to automatically infer\n",
-    "bias_voltage = 300 # Bias voltage\n",
+    "mem_cells = 0  # Number of memory cells used, set to 0 to automatically infer\n",
+    "bias_voltage = 0  # bias voltage, set to 0 to use stored value in slow data.\n",
     "acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine\n",
-    "gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine\n",
+    "gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.\n",
     "gain_mode = -1  # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)\n",
     "photon_energy = 9.2 # photon energy in keV\n",
     "overwrite = True # set to True if existing data should be overwritten\n",
@@ -80,7 +76,7 @@
     "zero_nans = False # set NaN values in corrected data to 0\n",
     "zero_orange = False # set to 0 very negative and very large values in corrected data\n",
     "blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr\n",
-    "corr_asic_diag = False # if set, diagonal drop offs on ASICs are correted\n",
+    "corr_asic_diag = False # if set, diagonal drop offs on ASICs are corrected\n",
     "force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold\n",
     "force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold\n",
     "mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold\n",
@@ -89,17 +85,20 @@
     "mask_zero_std = False # Mask pixels with zero standard deviation across train\n",
     "low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain\n",
     "\n",
+    "use_litframe_device = '' # Device ID for a lit frame finder device to only process illuminated frames, empty string to disable\n",
+    "energy_threshold = -1000 # The low limit for the energy (uJ) exposed by frames subject to processing. If -1000, selection by pulse energy is disabled\n",
+    "\n",
     "# Plotting parameters\n",
     "skip_plots = False # exit after writing corrected files and metadata\n",
     "cell_id_preview = 1 # cell Id used for preview in single-shot plots\n",
     "\n",
     "# Paralellization parameters\n",
-    "chunk_size = 1000 # Size of chunk for image-weise correction\n",
-    "chunk_size_idim = 1  # chunking size of imaging dimension, adjust if user software is sensitive to this.\n",
+    "chunk_size = 1000  # Size of chunk for image-wise correction\n",
     "n_cores_correct = 16 # Number of chunks to be processed in parallel\n",
     "n_cores_files = 4 # Number of files to be processed in parallel\n",
-    "sequences_per_node = 2 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel\n",
-    "max_nodes = 8 # Maximum number of Slurm jobs to split correction work into\n",
+    "sequences_per_node = 2 # number of sequence files per cluster node if run as SLURM job, set to 0 to not run SLURM parallel\n",
+    "max_nodes = 8 # Maximum number of SLURM jobs to split correction work into\n",
+    "\n",
     "\n",
     "def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):\n",
     "    from xfel_calibrate.calibrate import balance_sequences as bs\n",
@@ -131,7 +130,7 @@
     "import matplotlib\n",
     "import matplotlib.pyplot as plt\n",
     "import yaml\n",
-    "from extra_data import RunDirectory, stack_detector_data, by_id\n",
+    "from extra_data import H5File, RunDirectory, stack_detector_data, by_id\n",
     "from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry\n",
     "from matplotlib import cm as colormap\n",
     "from matplotlib.colors import LogNorm\n",
@@ -150,13 +149,9 @@
     "from cal_tools import agipdalgs as calgs\n",
     "from cal_tools.agipdlib import (\n",
     "    AgipdCorrections,\n",
+    "    AgipdCtrl,\n",
     "    CellRange,\n",
     "    LitFrameSelection,\n",
-    "    get_acq_rate,\n",
-    "    get_gain_mode,\n",
-    "    get_integration_time,\n",
-    "    get_gain_setting,\n",
-    "    get_num_cells,\n",
     ")\n",
     "from cal_tools.ana_tools import get_range\n",
     "from cal_tools.enums import AgipdGainMode, BadPixels\n",
@@ -174,7 +169,8 @@
    "outputs": [],
    "source": [
     "in_folder = Path(in_folder)\n",
-    "out_folder = Path(out_folder)"
+    "out_folder = Path(out_folder)\n",
+    "run_folder = in_folder / f'r{run:04d}'"
    ]
   },
   {
@@ -240,12 +236,11 @@
     "if sequences == [-1]:\n",
     "    sequences = None\n",
     "\n",
-    "control_fn = in_folder / f'r{run:04d}' / f'RAW-R{run:04d}-{karabo_da_control}-S00000.h5'\n",
-    "h5path_ctrl = h5path_ctrl.format(karabo_id_control)\n",
-    "h5path = h5path.format(karabo_id, receiver_id)\n",
-    "h5path_idx = h5path_idx.format(karabo_id, receiver_id)\n",
+    "dc = RunDirectory(run_folder)\n",
     "\n",
-    "print(f'Path to control file {control_fn}')"
+    "ctrl_src = ctrl_source_template.format(karabo_id_control)\n",
+    "instrument_src = instrument_source_template.format(karabo_id, receiver_template)\n",
+    "index_src = index_source_template.format(karabo_id, receiver_template)"
    ]
   },
   {
@@ -291,7 +286,6 @@
    "source": [
     "if use_ppu_device:\n",
     "    # Obtain trains to process if using a pulse picker device.\n",
-    "    dc = RunDirectory(in_folder / f'r{run:04d}')\n",
     "\n",
     "    # Will throw an uncaught exception if the device is wrong.\n",
     "    seq_start = dc[use_ppu_device, 'trainTrigger.sequenceStart.value'].ndarray()\n",
@@ -301,21 +295,6 @@
     "    train_ids = np.unique(seq_start)[1:] + ppu_train_offset\n",
     "\n",
     "    print(f'PPU device {use_ppu_device} triggered for {len(train_ids)} train(s)')\n",
-    "    \n",
-    "    # Since we got the DataCollection already, narrow down the files we open.\n",
-    "    # This hardcodes the receiver_id and path_template parameters currently, but this\n",
-    "    # will disappear with moving the entire notebook to EXtra-data.\n",
-    "    subdc = dc.select_trains(by_id[train_ids]).select(f'{karabo_id}/DET/*CH0:xtdf')\n",
-    "    subseq = {int(f.filename[-8:-3]) for f in subdc.files}\n",
-    "    \n",
-    "    if sequences is None:\n",
-    "        # All sequences were meant to be processed by this job, so take the entire\n",
-    "        # subset of sequences.\n",
-    "        sequences = sorted(subseq)\n",
-    "    else:\n",
-    "        # If explicit sequences were specified (e.g. due to job balancing by xfel-calibrate)\n",
-    "        # only work on the intersection between that and what the PPU device offers.\n",
-    "        sequences = sorted(set(sequences) & subseq)\n",
     "\n",
     "elif train_ids != [-1]:\n",
     "    # Specific trains passed by parameter, convert to ndarray.\n",
@@ -332,9 +311,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# set everything up filewise\n",
@@ -368,25 +345,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "filename = file_list[0]\n",
-    "channel = int(re.findall(r\".*-AGIPD([0-9]+)-.*\", filename)[0])\n",
-    "\n",
-    "# Evaluate number of memory cells\n",
-    "mem_cells = get_num_cells(filename, karabo_id, channel)\n",
-    "if mem_cells is None:\n",
-    "    raise ValueError(f\"No raw images found in {filename}\")\n",
-    "\n",
-    "mem_cells_db = mem_cells if mem_cells_db == 0 else mem_cells_db\n",
-    "max_cells = mem_cells if max_cells == 0 else max_cells\n",
-    "\n",
-    "fast_paths = (filename, karabo_id, channel)\n",
-    "slow_paths = (control_fn, karabo_id_control)\n",
-    "\n",
-    "# Evaluate aquisition rate\n",
-    "if acq_rate == 0:\n",
-    "    acq_rate = get_acq_rate(fast_paths, slow_paths)\n",
-    "\n",
-    "print(f\"Maximum memory cells to calibrate: {max_cells}\")"
+    "first_mod_channel = sorted(modules)[0]\n",
+    "\n",
+    "instrument_src_mod = [\n",
+    "    s for s in list(dc.all_sources) if f\"{first_mod_channel}CH\" in s][0]\n",
+    "mod_channel = int(re.findall(rf\".*{first_mod_channel}CH([0-9]+):.*\", instrument_src_mod)[0])\n",
+    "\n",
+    "agipd_cond = AgipdCtrl(\n",
+    "    run_dc=dc,\n",
+    "    image_src=instrument_src_mod,\n",
+    "    ctrl_src=ctrl_src,\n",
+    "    raise_error=False,  # to be able to process very old data without gain_setting value\n",
+    ")"
    ]
   },
   {
@@ -395,24 +365,29 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "if use_litframe_device:\n",
-    "    # check run for the AgipdLitFrameFinder device\n",
-    "    try: dc\n",
-    "    except NameError: dc = RunDirectory(in_folder / f'r{run:04d}')\n",
+    "# Evaluate creation time\n",
+    "creation_time = None\n",
+    "if use_dir_creation_date:\n",
+    "    creation_time = cal_tools.tools.get_dir_creation_date(str(in_folder), run)\n",
+    "    offset = parser.parse(creation_date_offset)\n",
+    "    delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)\n",
+    "    creation_time += delta\n",
     "\n",
-    "    if use_litframe_device + ':output' in dc.instrument_sources:\n",
-    "        # Use selection provided by the AgipdLitFrameFinder (if the device is recorded)\n",
-    "        cell_sel = LitFrameSelection(use_litframe_device, dc, train_ids, max_pulses, energy_threshold)\n",
-    "        train_ids = cell_sel.train_ids\n",
-    "    else:\n",
-    "        # Use range selection (if the device is not recorded)\n",
-    "        print(f\"WARNING: LitFrameFinder {use_litframe_device} device is not found.\")\n",
-    "        cell_sel = CellRange(max_pulses, max_cells=max_cells)\n",
+    "if acq_rate == 0.:\n",
+    "    acq_rate = agipd_cond.get_acq_rate()\n",
+    "if mem_cells == 0.:\n",
+    "    mem_cells = agipd_cond.get_num_cells()\n",
+    "# TODO: look for alternative for passing creation_time\n",
+    "if gain_setting == -1:\n",
+    "    gain_setting = agipd_cond.get_gain_setting(creation_time)\n",
+    "if bias_voltage == 0.:\n",
+    "    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)\n",
+    "if integration_time == -1:\n",
+    "    integration_time = agipd_cond.get_integration_time()\n",
+    "if gain_mode == -1:\n",
+    "    gain_mode = agipd_cond.get_gain_mode()\n",
     "else:\n",
-    "    # Use range selection\n",
-    "    cell_sel = CellRange(max_pulses, max_cells=max_cells)\n",
-    "\n",
-    "print(cell_sel.msg())"
+    "    gain_mode = AgipdGainMode(gain_mode)"
    ]
   },
   {
@@ -421,37 +396,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Evaluate creation time\n",
-    "creation_time = None\n",
-    "if use_dir_creation_date:\n",
-    "    creation_time = cal_tools.tools.get_dir_creation_date(str(in_folder), run)\n",
-    "    offset = parser.parse(creation_date_offset)\n",
-    "    delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)\n",
-    "    creation_time += delta\n",
+    "if mem_cells is None:\n",
+    "    raise ValueError(f\"No raw images found in {filename}\")\n",
     "\n",
-    "# Evaluate gain setting\n",
-    "if gain_setting == 0.1:\n",
-    "    if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):\n",
-    "        print(\"Set gain-setting to None for runs taken before 2020-01-31\")\n",
-    "        gain_setting = None\n",
-    "    else:\n",
-    "        try:\n",
-    "            gain_setting = get_gain_setting(str(control_fn), h5path_ctrl)\n",
-    "        except Exception as e:\n",
-    "            print(f'ERROR: while reading gain setting from: \\n{control_fn}')\n",
-    "            print(e)\n",
-    "            print(\"Set gain setting to 0\")\n",
-    "            gain_setting = 0\n",
-    "\n",
-    "# Evaluate gain mode (operation mode)\n",
-    "if gain_mode < 0:\n",
-    "    gain_mode = get_gain_mode(control_fn, h5path_ctrl)\n",
-    "else:\n",
-    "    gain_mode = AgipdGainMode(gain_mode)\n",
+    "mem_cells_db = mem_cells if mem_cells_db == 0 else mem_cells_db\n",
     "\n",
-    "# Evaluate integration time\n",
-    "if integration_time < 0:\n",
-    "    integration_time = get_integration_time(control_fn, h5path_ctrl)"
+    "print(f\"Maximum memory cells to calibrate: {mem_cells}\")"
    ]
   },
   {
@@ -484,6 +434,30 @@
     "            corr_bools[to_disable] = False"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if use_litframe_device:\n",
+    "    # check run for the AgipdLitFrameFinder device\n",
+    "\n",
+    "    if use_litframe_device + ':output' in dc.instrument_sources:\n",
+    "        # Use selection provided by the AgipdLitFrameFinder (if the device is recorded)\n",
+    "        cell_sel = LitFrameSelection(use_litframe_device, dc, train_ids, max_pulses, energy_threshold)\n",
+    "        train_ids = cell_sel.train_ids\n",
+    "    else:\n",
+    "        # Use range selection (if the device is not recorded)\n",
+    "        print(f\"WARNING: LitFrameFinder {use_litframe_device} device is not found.\")\n",
+    "        cell_sel = CellRange(max_pulses, max_cells=mem_cells)\n",
+    "else:\n",
+    "    # Use range selection\n",
+    "    cell_sel = CellRange(max_pulses, max_cells=mem_cells)\n",
+    "\n",
+    "print(cell_sel.msg())"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -498,10 +472,10 @@
    "outputs": [],
    "source": [
     "agipd_corr = AgipdCorrections(\n",
-    "    max_cells,\n",
+    "    mem_cells,\n",
     "    cell_sel,\n",
-    "    h5_data_path=h5path,\n",
-    "    h5_index_path=h5path_idx,\n",
+    "    h5_data_path=instrument_src,\n",
+    "    h5_index_path=index_src,\n",
     "    corr_bools=corr_bools,\n",
     "    gain_mode=gain_mode,\n",
     "    comp_threads=os.cpu_count() // n_cores_files,\n",
@@ -591,7 +565,7 @@
    "outputs": [],
    "source": [
     "# allocate memory for images and hists\n",
-    "n_images_max = max_cells * 256\n",
+    "n_images_max = mem_cells * 256\n",
     "data_shape = (n_images_max, 512, 128)\n",
     "agipd_corr.allocate_images(data_shape, n_cores_files)"
    ]
@@ -621,6 +595,8 @@
     "\n",
     "    Yields (file data slot, start index, stop index)\n",
     "    \"\"\"\n",
+    "    \n",
+    "    \n",
     "    for i_proc, n_img in enumerate(img_counts):\n",
     "        n_chunks = math.ceil(n_img / chunk_size)\n",
     "        for i in range(n_chunks):\n",
@@ -650,8 +626,11 @@
     "    for file_batch in batches(file_list, n_cores_files):\n",
     "        # TODO: Move some printed output to logging or similar\n",
     "        print(f\"Processing next {len(file_batch)} files\")\n",
-    "        img_counts = pool.starmap(agipd_corr.read_file, zip(range(len(file_batch)), file_batch,\n",
-    "                                                                  [not common_mode]*len(file_batch)))\n",
+    "        step_timer.start()\n",
+    "        img_counts = pool.starmap(\n",
+    "            agipd_corr.read_file,\n",
+    "            zip(range(len(file_batch)), file_batch, [not common_mode]*len(file_batch))\n",
+    "        )\n",
     "        step_timer.done_step(f'Loading data from files')\n",
     "\n",
     "        if img_counts == 0:\n",
@@ -661,9 +640,12 @@
     "\n",
     "        if mask_zero_std:\n",
     "            # Evaluate zero-data-std mask\n",
-    "            pool.starmap(agipd_corr.mask_zero_std, itertools.product(\n",
-    "                range(len(file_batch)), np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)\n",
-    "            ))\n",
+    "            pool.starmap(\n",
+    "                agipd_corr.mask_zero_std, itertools.product(\n",
+    "                    range(len(file_batch)),\n",
+    "                    np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)\n",
+    "                )\n",
+    "            )\n",
     "            step_timer.done_step('Mask 0 std')\n",
     "\n",
     "        # Perform offset image-wise correction\n",
@@ -676,6 +658,8 @@
     "            step_timer.done_step(\"Base-line shift correction\")\n",
     "\n",
     "        if common_mode:\n",
+    "            # In common mode corrected is enabled.\n",
+    "            # Cell selection is only activated after common mode correction.\n",
     "            # Perform cross-file correction parallel over asics\n",
     "            pool.starmap(agipd_corr.cm_correction, itertools.product(\n",
     "                range(len(file_batch)), range(16)  # 16 ASICs per module\n",
@@ -683,11 +667,12 @@
     "            step_timer.done_step(\"Common-mode correction\")\n",
     "\n",
     "            img_counts = pool.map(agipd_corr.apply_selected_pulses, range(len(file_batch)))\n",
-    "            step_timer.done_step(\"Applying selected pulses after common mode correction\")\n",
+    "            step_timer.done_step(\"Applying selected cells after common mode correction\")\n",
     "\n",
     "        # Perform image-wise correction\n",
-    "        pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))\n",
-    "        step_timer.done_step(\"Gain corrections\")\n",
+    "        if any(agipd_corr.pc_bools):\n",
+    "            pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))\n",
+    "            step_timer.done_step(\"Gain corrections\")\n",
     "\n",
     "        # Save corrected data\n",
     "        pool.starmap(agipd_corr.write_file, [\n",
@@ -815,17 +800,17 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def get_trains_data(run_folder, source, include, detector_id, tid=None, modules=16, fillvalue=None):\n",
+    "def get_trains_data(data_folder, source, include, detector_id, tid=None, modules=16, fillvalue=None):\n",
     "    \"\"\"Load single train for all module\n",
     "\n",
-    "    :param run_folder: Path to folder with data\n",
+    "    :param data_folder: Path to folder with data\n",
     "    :param source: Data source to be loaded\n",
     "    :param include: Inset of file name to be considered\n",
     "    :param detector_id: The karabo id of the detector to get data for\n",
     "    :param tid: Train Id to be loaded. First train is considered if None is given\n",
     "    :param path: Path to find image data inside h5 file\n",
     "    \"\"\"\n",
-    "    run_data = RunDirectory(run_folder, include)\n",
+    "    run_data = RunDirectory(data_folder, include)\n",
     "    if tid is not None:\n",
     "        tid, data = run_data.select(f'{detector_id}/DET/*', source).train_from_id(tid)\n",
     "    else:\n",
@@ -865,7 +850,7 @@
     "_, blshift = get_trains_data(out_folder, 'image.blShift', include, karabo_id, tid, modules=nmods)\n",
     "_, cellId = get_trains_data(out_folder, 'image.cellId', include, karabo_id, tid, modules=nmods)\n",
     "_, pulseId = get_trains_data(out_folder, 'image.pulseId', include, karabo_id, tid, modules=nmods, fillvalue=0)\n",
-    "_, raw = get_trains_data(f'{in_folder}/r{run:04d}/', 'image.data', include, karabo_id, tid, modules=nmods)"
+    "_, raw = get_trains_data(run_folder, 'image.data', include, karabo_id, tid, modules=nmods)"
    ]
   },
   {
@@ -1255,5 +1240,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/notebooks/AGIPD/AGIPD_Retrieve_Constants_Precorrection.ipynb b/notebooks/AGIPD/AGIPD_Retrieve_Constants_Precorrection.ipynb
index dbcfe6158dd2c725c11fac2402d2acde19aab711..2cd11eb0b59cbb97f5c72c5e2ac9670e5573b95e 100644
--- a/notebooks/AGIPD/AGIPD_Retrieve_Constants_Precorrection.ipynb
+++ b/notebooks/AGIPD/AGIPD_Retrieve_Constants_Precorrection.ipynb
@@ -26,9 +26,10 @@
     "karabo_id = \"SPB_DET_AGIPD1M-1\" # karabo karabo_id\n",
     "karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators\n",
     "path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data\n",
-    "h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information\n",
+    "ctrl_source_template = '{}/MDL/FPGA_COMP_TEST'  # path to control information\n",
+    "instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images\n",
+    "receiver_template = \"{}CH0\" # inset for receiver devices\n",
     "karabo_id_control = \"SPB_IRU_AGIPD1M1\" # karabo-id for control device\n",
-    "karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation\n",
     "\n",
     "use_dir_creation_date = True # use the creation data of the input dir for database queries\n",
     "cal_db_interface = \"tcp://max-exfl016:8015#8045\" # the database interface to use\n",
@@ -38,13 +39,11 @@
     "calfile =  \"\" # path to calibration file. Leave empty if all data should come from DB\n",
     "nodb = False # if set only file-based constants will be used\n",
     "mem_cells = 0 # number of memory cells used, set to 0 to automatically infer\n",
-    "bias_voltage = 300\n",
+    "bias_voltage = 0  # bias voltage, set to 0 to use stored value in slow data.\n",
     "acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine\n",
-    "gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine\n",
+    "gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.\n",
     "gain_mode = -1  # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)\n",
     "photon_energy = 9.2 # photon energy in keV\n",
-    "max_cells_db_dark = 0  # set to a value different than 0 to use this value for dark data DB queries\n",
-    "max_cells_db = 0 # set to a value different than 0 to use this value for DB queries\n",
     "integration_time = -1 # integration time, negative values for auto-detection.\n",
     "\n",
     "# Correction Booleans\n",
@@ -86,20 +85,21 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "from pathlib import Path\n",
     "from typing import List, Tuple\n",
     "\n",
     "import matplotlib\n",
+    "import matplotlib.pyplot as plt\n",
+    "import multiprocessing\n",
     "import numpy as np\n",
+    "from datetime import timedelta\n",
+    "from dateutil import parser\n",
+    "from extra_data import RunDirectory\n",
     "\n",
     "matplotlib.use(\"agg\")\n",
-    "import multiprocessing\n",
-    "from datetime import timedelta\n",
-    "from pathlib import Path\n",
     "\n",
-    "import matplotlib.pyplot as plt\n",
     "from cal_tools import agipdlib, tools\n",
     "from cal_tools.enums import AgipdGainMode\n",
-    "from dateutil import parser\n",
     "from iCalibrationDB import Conditions, Constants, Detectors"
    ]
   },
@@ -121,8 +121,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "max_cells = mem_cells\n",
-    "\n",
     "creation_time = None\n",
     "if use_dir_creation_date:\n",
     "    creation_time = tools.get_dir_creation_date(str(in_folder), run)\n",
@@ -146,53 +144,21 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "control_fn = in_folder / f'r{run:04d}' / f'RAW-R{run:04d}-{karabo_da_control}-S00000.h5'\n",
-    "h5path_ctrl = h5path_ctrl.format(karabo_id_control)\n",
-    "slow_paths = (control_fn, karabo_id_control)\n",
-    "if gain_setting == 0.1:\n",
-    "    if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):\n",
-    "        print(\"Set gain-setting to None for runs taken before 2020-01-31\")\n",
-    "        gain_setting = None\n",
-    "    else:\n",
-    "        try:\n",
-    "            gain_setting = agipdlib.get_gain_setting(str(control_fn), h5path_ctrl)\n",
-    "        except Exception as e:\n",
-    "            print(f'ERROR: while reading gain setting from: \\n{control_fn}')\n",
-    "            print(e)\n",
-    "            print(\"Set gain setting to 0\")\n",
-    "            gain_setting = 0\n",
-    "\n",
-    "# Evaluate gain mode (operation mode)\n",
-    "if gain_mode < 0:\n",
-    "    gain_mode = agipdlib.get_gain_mode(control_fn, h5path_ctrl)\n",
-    "else:\n",
-    "    gain_mode = AgipdGainMode(gain_mode)\n",
-    "\n",
-    "# Evaluate integration time\n",
-    "if integration_time < 0:\n",
-    "    integration_time = agipdlib.get_integration_time(control_fn, h5path_ctrl)\n",
-    "            \n",
-    "print(f\"Gain setting: {gain_setting}\")\n",
-    "print(f\"Gain mode: {gain_mode.name}\")\n",
-    "print(f\"Detector in use is {karabo_id}\")\n",
+    "ctrl_src = ctrl_source_template.format(karabo_id_control)\n",
     "\n",
+    "print(f\"Detector in use is {karabo_id}\")\n",
     "\n",
     "# Extracting Instrument string\n",
     "instrument = karabo_id.split(\"_\")[0]\n",
     "# Evaluate detector instance for mapping\n",
     "if instrument == \"SPB\":\n",
-    "    dinstance = \"AGIPD1M1\"\n",
     "    nmods = 16\n",
     "elif instrument == \"MID\":\n",
-    "    dinstance = \"AGIPD1M2\"\n",
     "    nmods = 16\n",
     "elif instrument == \"HED\":\n",
-    "    dinstance = \"AGIPD500K\"\n",
     "    nmods = 8\n",
     "\n",
     "print(f\"Instrument {instrument}\")\n",
-    "print(f\"Detector instance {dinstance}\")\n",
-    "\n",
     "\n",
     "if karabo_da[0] == '-1':\n",
     "    if modules[0] == -1:\n",
@@ -202,6 +168,48 @@
     "    modules = [int(x[-2:]) for x in karabo_da]"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "run_dc = RunDirectory(in_folder / f\"r{run:04d}\")\n",
+    "\n",
+    "# set everything up filewise\n",
+    "mapped_files, _, _, _, _ = tools.map_modules_from_folder(\n",
+    "    str(in_folder), run, path_template, karabo_da, sequences=[0]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read AGIPD conditions from the 1st sequence of 1st module and slow data.\n",
+    "instrument_src = instrument_source_template.format(karabo_id, receiver_template)\n",
+    "instrument_src_mod = instrument_src.format(0)\n",
+    "\n",
+    "agipd_cond = agipdlib.AgipdCtrl(\n",
+    "    run_dc=run_dc,\n",
+    "    image_src=None,  # Not need, as we wont read mem_cells or acq_rate.\n",
+    "    ctrl_src=ctrl_src,\n",
+    ")\n",
+    "\n",
+    "if gain_setting == -1:\n",
+    "    gain_setting = agipd_cond.get_gain_setting(creation_time)\n",
+    "if bias_voltage == 0.:\n",
+    "    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)\n",
+    "if integration_time == -1:\n",
+    "    integration_time = agipd_cond.get_integration_time()\n",
+    "if gain_mode == -1:\n",
+    "    gain_mode = agipd_cond.get_gain_mode()\n",
+    "else:\n",
+    "    gain_mode = AgipdGainMode(gain_mode)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -216,34 +224,33 @@
    "outputs": [],
    "source": [
     "def retrieve_constants(\n",
-    "    qm_files: List[Path], qm: str, karabo_da: str, idx: int\n",
+    "    karabo_da: str, idx: int\n",
     ") -> Tuple[str, str, float, float, str, dict]:\n",
     "    \"\"\"\n",
     "    Retrieve constants for a module.\n",
     "\n",
     "    :return:\n",
-    "            qm: module virtual name i.e. Q1M1.\n",
     "            karabo_da: karabo data aggregator.\n",
     "            acq_rate: acquisition rate parameter.\n",
-    "            max_cells: number of memory cells.\n",
+    "            mem_cells: number of memory cells.\n",
     "            mdata_dict: (DICT) dictionary with the metadata for the retrieved constants.\n",
     "    \"\"\"\n",
-    "    if max_cells != 0:\n",
-    "        # either use overriding notebook parameter\n",
-    "        local_max_cells = max_cells\n",
+    "    if mem_cells == 0:\n",
+    "        # either or look around in sequence files\n",
+    "        agipd_cond.image_src = instrument_src.format(idx)\n",
+    "        local_mem_cells = agipd_cond.get_num_cells()\n",
     "    else:\n",
-    "        # or look around in sequence files\n",
-    "        for f in qm_files:\n",
-    "            local_max_cells = agipdlib.get_num_cells(f, karabo_id, idx)\n",
-    "            if local_max_cells is not None:\n",
-    "                break\n",
+    "        # or use overriding notebook parameter\n",
+    "        local_mem_cells = mem_cells\n",
+    "\n",
     "    # maybe we never found this in a sequence file...\n",
-    "    if local_max_cells is None:\n",
-    "        raise ValueError(f\"No raw images found for {qm} for all sequences\")\n",
+    "    if local_mem_cells is None:\n",
+    "        raise ValueError(\n",
+    "            \"No raw images found for \"\n",
+    "            f\"{tools.module_index_to_qm(module_index)}({karabo_da}) for all sequences\")\n",
     "\n",
-    "    if acq_rate == 0:\n",
-    "        local_acq_rate = agipdlib.get_acq_rate(\n",
-    "            fast_paths=(f, karabo_id, idx), slow_paths=slow_paths)\n",
+    "    if acq_rate == 0.:\n",
+    "        local_acq_rate = agipd_cond.get_acq_rate()\n",
     "    else:\n",
     "        local_acq_rate = acq_rate\n",
     "\n",
@@ -254,7 +261,7 @@
     "    const_dict = agipdlib.assemble_constant_dict(\n",
     "        corr_bools,\n",
     "        pc_bools,\n",
-    "        local_max_cells,\n",
+    "        local_mem_cells,\n",
     "        bias_voltage,\n",
     "        gain_setting,\n",
     "        local_acq_rate,\n",
@@ -271,7 +278,7 @@
     "    mdata_dict[\"constants\"] = dict()\n",
     "    mdata_dict[\"physical-detector-unit\"] = None  # initialization\n",
     "\n",
-    "    for const_name, (const_init_fun, const_shape, (cond_type, cond_param)) in const_dict.items():\n",
+    "    for const_name, (const_init_fun, const_shape, (cond_type, cond_param)) in const_dict.items():  # noqa\n",
     "        if gain_mode and const_name in (\"ThresholdsDark\",):\n",
     "            continue\n",
     "        \n",
@@ -280,11 +287,14 @@
     "        mdata_dict[\"constants\"][const_name] = const_mdata\n",
     "\n",
     "        if slopes_ff_from_files and const_name in [\"SlopesFF\", \"BadPixelsFF\"]:\n",
-    "            const_mdata[\"file-path\"] = f\"{slopes_ff_from_files}/slopesff_bpmask_module_{qm}.h5\"\n",
+    "            const_mdata[\"file-path\"] = (\n",
+    "                f\"{slopes_ff_from_files}/slopesff_bpmask_module_{tools.module_index_to_qm(module_index)}.h5\")  # noqa\n",
     "            const_mdata[\"creation-time\"] = \"00:00:00\"\n",
     "            continue\n",
     "        \n",
-    "        if gain_mode and const_name in (\"BadPixelsPC\", \"SlopesPC\", \"BadPixelsFF\", \"SlopesFF\"):\n",
+    "        if gain_mode and const_name in (\n",
+    "            \"BadPixelsPC\", \"SlopesPC\", \"BadPixelsFF\", \"SlopesFF\"\n",
+    "        ):\n",
     "            param_copy = cond_param.copy()\n",
     "            del param_copy[\"gain_mode\"]\n",
     "            condition = getattr(Conditions, cond_type).AGIPD(**param_copy)\n",
@@ -314,7 +324,7 @@
     "            const_mdata[\"file-path\"] = const_dict[const_name][:2]\n",
     "            const_mdata[\"creation-time\"] = None\n",
     "\n",
-    "    return qm, mdata_dict, karabo_da, local_acq_rate, local_max_cells"
+    "    return mdata_dict, karabo_da, local_acq_rate, local_mem_cells"
    ]
   },
   {
@@ -333,18 +343,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# set everything up filewise\n",
-    "mapped_files, _, _, _, _ = tools.map_modules_from_folder(\n",
-    "    str(in_folder), run, path_template, karabo_da, sequences\n",
-    ")\n",
-    "\n",
     "pc_bools = [corr_bools.get(\"rel_gain\"),\n",
     "            corr_bools.get(\"adjust_mg_baseline\"),\n",
     "            corr_bools.get('blc_noise'),\n",
     "            corr_bools.get('blc_hmatch'),\n",
     "            corr_bools.get('blc_stripes'),\n",
     "            melt_snow]\n",
-    "\n",
     "inp = []\n",
     "only_dark = False\n",
     "nodb_with_dark = False\n",
@@ -355,20 +359,13 @@
     "\n",
     "da_to_qm = dict()\n",
     "for module_index, k_da in zip(modules, karabo_da):\n",
-    "    qm = tools.module_index_to_qm(module_index)\n",
-    "    da_to_qm[k_da] = qm\n",
-    "    \n",
+    "    da_to_qm[k_da] = tools.module_index_to_qm(module_index)\n",
     "    if k_da in retrieved_constants:\n",
-    "        print(f\"Constant for {k_da} already in calibration_metadata.yml, won't query again.\")\n",
-    "        continue\n",
-    "    \n",
-    "    if qm in mapped_files and not mapped_files[qm].empty():\n",
-    "        # TODO: make map_modules_from_folder just return list(s)\n",
-    "        qm_files = [Path(mapped_files[qm].get()) for _ in range(mapped_files[qm].qsize())]\n",
-    "    else:\n",
+    "        print(\n",
+    "            f\"Constant for {k_da} already in calibration_metadata.yml, won't query again.\")\n",
     "        continue\n",
     "\n",
-    "    inp.append((qm_files, qm, k_da, module_index))"
+    "    inp.append((k_da, module_index))"
    ]
   },
   {
@@ -387,8 +384,20 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "for qm, md_dict, karabo_da, acq_rate, max_cells in results:\n",
+    "acq_rate_mods = []\n",
+    "mem_cells_mods = []\n",
+    "for md_dict, karabo_da, acq_rate, mem_cells in results:\n",
     "    retrieved_constants[karabo_da] = md_dict\n",
+    "    mem_cells_mods.append(mem_cells)\n",
+    "    acq_rate_mods.append(acq_rate)\n",
+    "\n",
+    "# Validate that mem_cells and acq_rate are the same for all modules.\n",
+    "# TODO: Should a warning be enough?\n",
+    "if len(set(mem_cells_mods)) != 1 or len(set(acq_rate_mods)) != 1:\n",
+    "    print(\n",
+    "        \"WARNING: Number of memory cells or \"\n",
+    "        \"acquisition rate are not identical for all modules.\\n\"\n",
+    "        f\"mem_cells: {mem_cells_mods}.\\nacq_rate: {acq_rate_mods}.\")\n",
     "\n",
     "# check if it is requested not to retrieve any constants from the database\n",
     "if nodb_with_dark:\n",
@@ -399,7 +408,7 @@
     "          ', '.join([tools.module_index_to_qm(x) for x in modules]))\n",
     "    print(f\"Operating conditions are:\")\n",
     "    print(f\"• Bias voltage: {bias_voltage}\")\n",
-    "    print(f\"• Memory cells: {max_cells}\")\n",
+    "    print(f\"• Memory cells: {mem_cells}\")\n",
     "    print(f\"• Acquisition rate: {acq_rate}\")\n",
     "    print(f\"• Gain mode: {gain_mode.name}\")\n",
     "    print(f\"• Gain setting: {gain_setting}\")\n",
@@ -460,5 +469,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb b/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
index 8043f4a2199709fc832cf03a1ea898da292bc501..be96463e0e8c90733e0e8532d1df0a732a714cc1 100644
--- a/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
+++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
@@ -6,7 +6,7 @@
    "source": [
     "# AGIPD Characterize Dark Images #\n",
     "\n",
-    "Author: S. Hauf, Version: 0.1\n",
+    "Author: European XFEL Detector Group, Version: 2.0\n",
     "\n",
     "The following code analyzes a set of dark images taken with the AGIPD detector to deduce detector offsets , noise, bad-pixel maps and thresholding. All four types of constants are evaluated per-pixel and per-memory cell. Data for the detector's three gain stages needs to be present, separated into separate runs.\n",
     "\n",
@@ -21,7 +21,6 @@
    "source": [
     "in_folder = \"/gpfs/exfel/d/raw/CALLAB/202031/p900113\" # path to input data, required\n",
     "out_folder = \"\" # path to output to, required\n",
-    "sequences = [-1] # sequence files to evaluate.\n",
     "modules = [-1]  # list of modules to evaluate, RANGE ALLOWED\n",
     "run_high = 9985 # run number in which high gain data was recorded, required\n",
     "run_med = 9984 # run number in which medium gain data was recorded, required\n",
@@ -30,13 +29,10 @@
     "\n",
     "karabo_id = \"HED_DET_AGIPD500K2G\" # karabo karabo_id\n",
     "karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators\n",
-    "receiver_id = \"{}CH0\" # inset for receiver devices\n",
-    "path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data\n",
-    "h5path = '/INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images\n",
-    "h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images\n",
-    "h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information\n",
+    "receiver_template = \"{}CH0\" # inset for receiver devices\n",
+    "instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images\n",
+    "ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information\n",
     "karabo_id_control = \"HED_EXP_AGIPD500K2G\" # karabo-id for control device '\n",
-    "karabo_da_control = \"AGIPD500K2G00\" # karabo DA for control infromation\n",
     "\n",
     "use_dir_creation_date = True  # use dir creation date as data production reference date\n",
     "cal_db_interface = \"tcp://max-exfl016:8020\" # the database interface to use\n",
@@ -45,12 +41,12 @@
     "db_output = False # output constants to database\n",
     "\n",
     "mem_cells = 0 # number of memory cells used, set to 0 to automatically infer\n",
-    "bias_voltage = 0 # detector bias voltage\n",
-    "gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine\n",
+    "bias_voltage = 0 # bias voltage, set to 0 to use stored value in slow data.\n",
+    "gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.\n",
+    "gain_mode = -1  # gain mode, use -1 to use value stored in slow data.\n",
     "integration_time = -1 # integration time, negative values for auto-detection.\n",
     "acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine\n",
     "interlaced = False # assume interlaced data format, for data prior to Dec. 2017\n",
-    "rawversion = 2 # RAW file format version\n",
     "\n",
     "thresholds_offset_sigma = 3. # offset sigma thresholds for offset deduced bad pixels\n",
     "thresholds_offset_hard = [0, 0]  # For setting the same threshold offset for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.\n",
@@ -67,8 +63,9 @@
     "thresholds_noise_hard_mg = [4, 20] # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels\n",
     "thresholds_noise_hard_lg = [4, 20] # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels\n",
     "\n",
-    "thresholds_gain_sigma = 5. # Gain separation sigma threshold\n",
-    "\n",
+    "thresholds_gain_sigma = 5.  # Gain separation sigma threshold\n",
+    "max_trains = 0  # Maximum number of trains to use for processing dark. Set to 0 to process all available trains.\n",
+    "min_trains = 1  # Miniumum number of trains for processing dark. If raw folder has less than minimum trains processing is stopped.\n",
     "high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. ~7mins extra time for 64 memory cells\n",
     "\n",
     "# This is used if modules is not specified:\n",
@@ -96,28 +93,24 @@
     "import os\n",
     "from collections import OrderedDict\n",
     "from datetime import timedelta\n",
-    "from typing import Tuple\n",
+    "from pathlib import Path\n",
+    "from typing import List, Tuple\n",
     "\n",
     "import dateutil.parser\n",
-    "import h5py\n",
     "import matplotlib\n",
     "import numpy as np\n",
     "import pasha as psh\n",
+    "import psutil\n",
     "import tabulate\n",
     "import yaml\n",
+    "from IPython.display import Latex, Markdown, display\n",
+    "from extra_data import RunDirectory\n",
     "\n",
     "matplotlib.use('agg')\n",
     "\n",
     "import iCalibrationDB\n",
     "import matplotlib.pyplot as plt\n",
-    "from cal_tools.agipdlib import (\n",
-    "    get_acq_rate,\n",
-    "    get_bias_voltage,\n",
-    "    get_gain_mode,\n",
-    "    get_gain_setting,\n",
-    "    get_integration_time,\n",
-    "    get_num_cells,\n",
-    ")\n",
+    "from cal_tools.agipdlib import AgipdCtrl\n",
     "from cal_tools.enums import AgipdGainMode, BadPixels\n",
     "from cal_tools.plotting import (\n",
     "    create_constant_overview,\n",
@@ -137,7 +130,6 @@
     "    save_const_to_h5,\n",
     "    send_to_db,\n",
     ")\n",
-    "from IPython.display import Latex, Markdown, display\n",
     "\n",
     "%matplotlib inline"
    ]
@@ -149,14 +141,19 @@
    "outputs": [],
    "source": [
     "# insert control device if format string (does nothing otherwise)\n",
-    "h5path_ctrl = h5path_ctrl.format(karabo_id_control)\n",
-    "\n",
-    "max_cells = mem_cells\n",
-    "\n",
-    "offset_runs = OrderedDict()\n",
-    "offset_runs[\"high\"] = run_high\n",
-    "offset_runs[\"med\"] = run_med\n",
-    "offset_runs[\"low\"] = run_low\n",
+    "ctrl_src = ctrl_source_template.format(karabo_id_control)\n",
+    "\n",
+    "runs_dict = OrderedDict()\n",
+    "\n",
+    "for gain_idx, (run_name, run_number) in enumerate(zip(\n",
+    "    [\"high\", \"med\", \"low\"],\n",
+    "    [run_high, run_med, run_low]\n",
+    ")):\n",
+    "    runs_dict[run_name] = {\n",
+    "        \"number\": run_number,\n",
+    "        \"gain\": gain_idx,\n",
+    "        \"dc\": RunDirectory(f'{in_folder}/r{run_number:04d}/')\n",
+    "    }\n",
     "\n",
     "creation_time=None\n",
     "if use_dir_creation_date:\n",
@@ -166,6 +163,10 @@
     "\n",
     "run, prop, seq = run_prop_seq_from_path(in_folder)\n",
     "\n",
+    "# Read report path and create file location tuple to add with the injection\n",
+    "file_loc = f\"proposal:{prop} runs:{run_low} {run_med} {run_high}\"\n",
+    "\n",
+    "report = get_report(out_folder)\n",
     "cal_db_interface = get_random_db_interface(cal_db_interface)\n",
     "print(f'Calibration database interface: {cal_db_interface}')\n",
     "\n",
@@ -181,31 +182,18 @@
     "    dinstance = \"AGIPD500K\"\n",
     "    nmods = 8\n",
     "\n",
-    "if sequences == [-1]:\n",
-    "    sequences = None\n",
-    "control_names = [f'{in_folder}/r{r:04d}/RAW-R{r:04d}-{karabo_da_control}-S00000.h5'\n",
-    "                 for r in (run_high, run_med, run_low)]\n",
+    "instrument_src = instrument_source_template.format(karabo_id, receiver_template)\n",
+    "run_numbers = [run_high, run_med, run_low]\n",
     "\n",
-    "if operation_mode not in (\"ADAPTIVE_GAIN\", \"FIXED_GAIN\"):\n",
-    "    print(f\"WARNING: unknown operation_mode \\\"{operation_mode}\\\" parameter set\")\n",
-    "run_gain_modes = [get_gain_mode(fn, h5path_ctrl) for fn in control_names]\n",
-    "if all(gm == AgipdGainMode.ADAPTIVE_GAIN for gm in run_gain_modes):\n",
-    "    fixed_gain_mode = False\n",
-    "    if operation_mode == \"FIXED_GAIN\":\n",
-    "        print(\"WARNING: operation_mode parameter is FIXED_GAIN, slow data indicates adaptive gain\")\n",
-    "elif run_gain_modes == [AgipdGainMode.FIXED_HIGH_GAIN, AgipdGainMode.FIXED_MEDIUM_GAIN, AgipdGainMode.FIXED_LOW_GAIN]:\n",
-    "    if operation_mode == \"ADAPTIVE_GAIN\":\n",
-    "        print(\"WARNING: operation_mode parameter ix ADAPTIVE_GAIN, slow data indicates fixed gain\")\n",
-    "    fixed_gain_mode = True\n",
-    "else:\n",
-    "    print(f'Something is clearly wrong; slow data indicates gain modes {run_gain_modes}')\n",
+    "def create_karabo_da_list(modules):\n",
+    "    return([\"AGIPD{:02d}\".format(i) for i in modules])\n",
     "\n",
-    "if integration_time < 0:\n",
-    "    integration_times = [get_integration_time(fn, h5path_ctrl) for fn in control_names]\n",
-    "    if len(set(integration_times)) > 1:\n",
-    "        print(f'WARNING: integration time is not constant across the specified dark runs')\n",
-    "\n",
-    "integration_time = integration_times[0]\n",
+    "if karabo_da[0] == '-1':\n",
+    "    if modules[0] == -1:\n",
+    "        modules = list(range(nmods))\n",
+    "    karabo_da = create_karabo_da_list(modules)\n",
+    "else:\n",
+    "    modules = [int(x[-2:]) for x in karabo_da]\n",
     "\n",
     "print(f\"Detector in use is {karabo_id}\")\n",
     "print(f\"Instrument {instrument}\")\n",
@@ -218,30 +206,85 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "runs = [run_high, run_med, run_low]\n",
-    "\n",
-    "if gain_setting == 0.1:\n",
-    "    if creation_time.replace(tzinfo=None) < dateutil.parser.parse('2020-01-31'):\n",
-    "        print(\"Set gain-setting to None for runs taken before 2020-01-31\")\n",
-    "        gain_setting = None\n",
+    "# Create out_folder if it doesn't exist.\n",
+    "Path(out_folder).mkdir(parents=True, exist_ok=True)\n",
+    "\n",
+    "n_files = 0\n",
+    "total_file_sizes = 0\n",
+    "max_trains_list = []\n",
+    "\n",
+    "for run_dict in runs_dict.values():\n",
+    "    missing_modules = []\n",
+    "    image_dc = run_dict[\"dc\"].select(f\"{karabo_id_control}*\", \"*\", require_all=True)\n",
+    "    # This is important in case of no slurm parallelization over modules is done.\n",
+    "    # (e.g. running notebook interactively)\n",
+    "    sources_l = [(f\"{karabo_id_control}*\", \"*\")]\n",
+    "    sources_l += [(instrument_src.format(m), \"*\") for m in modules]\n",
+    "    image_dc = run_dict[\"dc\"].select(sources_l, require_all=True)\n",
+    "    # validate that there are trains and that data sources are\n",
+    "    # present for any of the selected modules.\n",
+    "    if (\n",
+    "        len(image_dc.train_ids) == 0 or\n",
+    "        not np.any([\n",
+    "            karabo_id in s for s in run_dict[\"dc\"].select(sources_l, require_all=True).all_sources])  # noqa\n",
+    "    ):\n",
+    "        raise ValueError(f\"No images to process for run: {run_dict['number']}\")\n",
+    "\n",
+    "    max_trains_list.append(len(image_dc.train_ids))\n",
+    "\n",
+    "    # update run_dc with selected module sources\n",
+    "    run_dict[\"dc\"] = image_dc\n",
+    "\n",
+    "# Update modules and karabo_da lists based on available modules to processes.\n",
+    "modules = [m for m in modules if m not in missing_modules]\n",
+    "karabo_da = create_karabo_da_list(modules)\n",
+    "\n",
+    "# Remodifing run data collections to display actual total files number and size. \n",
+    "for run_dict in runs_dict.values():\n",
+    "    file_sizes = [os.path.getsize(f.filename) / 1e9 for f in run_dict[\"dc\"].deselect(f\"{karabo_id_control}*\").files]\n",
+    "    total_file_sizes += sum(file_sizes)\n",
+    "    n_files += len(file_sizes)\n",
+    "\n",
+    "print(f\"Will process data in a total of {n_files} files ({total_file_sizes:.02f} GB).\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Read and validate the runs control data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read_run_conditions(runs_dict: dict):\n",
+    "    agipd_cond = AgipdCtrl(\n",
+    "        run_dc=runs_dict[\"dc\"],\n",
+    "        image_src=instrument_src_mod,\n",
+    "        ctrl_src=ctrl_src,\n",
+    "    )\n",
+    "    cond_dict[\"runs\"].append(runs_dict[\"number\"])\n",
+    "    if acq_rate == 0:\n",
+    "        cond_dict[\"acq_rate\"].append(agipd_cond.get_acq_rate())\n",
+    "    if mem_cells == 0:\n",
+    "        cond_dict[\"mem_cells\"].append(agipd_cond.get_num_cells())\n",
+    "    if gain_setting == -1:    \n",
+    "        cond_dict[\"gain_setting\"].append(\n",
+    "            agipd_cond.get_gain_setting(creation_time))\n",
+    "    if bias_voltage == 0.:\n",
+    "        cond_dict[\"bias_voltage\"].append(\n",
+    "            agipd_cond.get_bias_voltage(karabo_id_control))\n",
+    "    if integration_time == -1:\n",
+    "        cond_dict[\"integration_time\"].append(\n",
+    "            agipd_cond.get_integration_time())\n",
+    "    if gain_mode == -1:\n",
+    "        cond_dict[\"gain_mode\"].append(agipd_cond.get_gain_mode())\n",
     "    else:\n",
-    "        try:\n",
-    "            # extract gain setting and validate that all runs have the same setting\n",
-    "            gsettings = []\n",
-    "            for r in runs:\n",
-    "                control_fname = '{}/r{:04d}/RAW-R{:04d}-{}-S00000.h5'.format(in_folder, r, r,\n",
-    "                                                                             karabo_da_control)\n",
-    "                gsettings.append(get_gain_setting(control_fname, h5path_ctrl))\n",
-    "            if not all(g == gsettings[0] for g in gsettings):\n",
-    "                raise ValueError(f\"Different gain settings for the 3 input runs {gsettings}\")\n",
-    "            gain_setting = gsettings[0]\n",
-    "        except Exception as e:\n",
-    "            print(f'Error while reading gain setting from: \\n{control_fname}')\n",
-    "            print(f'Error: {e}')\n",
-    "            if \"component not found\" in str(e):\n",
-    "                print(\"Gain setting is not found in the control information\")\n",
-    "            print(\"Data will not be processed\")\n",
-    "            sequences = []"
+    "        cond_dict[\"gain_mode\"].append(AgipdGainMode(gain_mode))"
    ]
   },
   {
@@ -250,26 +293,116 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "if karabo_da[0] == '-1':\n",
-    "    if modules[0] == -1:\n",
-    "        modules = list(range(nmods))\n",
-    "    karabo_da = [\"AGIPD{:02d}\".format(i) for i in modules]\n",
-    "else:\n",
-    "    modules = [int(x[-2:]) for x in karabo_da]\n",
-    "h5path = h5path.format(karabo_id, receiver_id)\n",
-    "h5path_idx = h5path_idx.format(karabo_id, receiver_id)\n",
-    "\n",
-    "if bias_voltage == 0:\n",
-    "    # Read the bias voltage from files, if recorded.\n",
-    "    # If not available, make use of the historical voltage the detector is running at\n",
-    "    bias_voltage = get_bias_voltage(control_names[0], karabo_id_control)\n",
-    "    bias_voltage = bias_voltage if bias_voltage is not None else 300\n",
+    "def validate_gain_modes(gain_modes: List[AgipdGainMode]):\n",
+    "    # Validate that gain modes are not a mix of adaptive and fixed gain.\n",
+    "    if all(\n",
+    "        gm == AgipdGainMode.ADAPTIVE_GAIN for gm in gain_modes\n",
+    "    ):\n",
+    "        fixed_gain_mode = False\n",
+    "    elif any(\n",
+    "        gm == AgipdGainMode.ADAPTIVE_GAIN for gm in gain_modes\n",
+    "    ):\n",
+    "        raise ValueError(\n",
+    "            f\"ERROR: Given runs {self.read_conditions['run_number']}\"\n",
+    "            \" have a mix of ADAPTIVE and FIXED gain modes: \"\n",
+    "            f\"{self.read_conditions['gain_mode']}.\"\n",
+    "    )\n",
+    "    else:\n",
+    "        fixed_gain_mode = True\n",
+    "    return fixed_gain_mode"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read slow data from 1st channel only.\n",
+    "# Read all modules in one notebook and validate the conditions across detectors?\n",
+    "# Currently slurm jobs run per one module.\n",
+    "\n",
+    "# TODO: what if first module is not available. Maybe only channel 2 available\n",
+    "instrument_src_mod = instrument_src.format(modules[0])\n",
+    "\n",
+    "cond_dict = dict()\n",
+    "fixed_gain_mode = None\n",
+    "\n",
+    "with multiprocessing.Manager() as manager:\n",
+    "    cond_dict[\"runs\"] = manager.list()\n",
+    "    cond_dict[\"acq_rate\"] = manager.list()\n",
+    "    cond_dict[\"mem_cells\"] = manager.list()\n",
+    "    cond_dict[\"gain_setting\"] = manager.list()\n",
+    "    cond_dict[\"gain_mode\"] = manager.list()\n",
+    "    cond_dict[\"bias_voltage\"] = manager.list()\n",
+    "    cond_dict[\"integration_time\"] = manager.list()\n",
+    "\n",
+    "    with multiprocessing.Pool(processes=len(modules)) as pool:\n",
+    "        pool.starmap(read_run_conditions, zip(runs_dict.values()))\n",
+    "\n",
+    "    for cond, vlist in cond_dict.items():\n",
+    "        if cond == \"runs\":\n",
+    "            continue\n",
+    "        elif cond == \"gain_mode\":\n",
+    "            fixed_gain_mode = validate_gain_modes(cond_dict[\"gain_mode\"])\n",
+    "        if not all(x == vlist[0] for x in vlist):\n",
+    "            # TODO: raise ERROR??\n",
+    "            print(\n",
+    "                f\"WARNING: {cond} is not the same for the runs \"\n",
+    "                f\"{cond_dict['runs']} with values\"\n",
+    "                f\" of {cond_dict[cond]}, respectively.\"\n",
+    "            )\n",
+    "    if cond_dict[\"acq_rate\"]: acq_rate = cond_dict[\"acq_rate\"][0]\n",
+    "    if cond_dict[\"mem_cells\"]: mem_cells = cond_dict[\"mem_cells\"][0]\n",
+    "    if cond_dict[\"gain_setting\"]: gain_setting = cond_dict[\"gain_setting\"][0]\n",
+    "    if cond_dict[\"gain_mode\"]: gain_mode = list(cond_dict[\"gain_mode\"])\n",
+    "    if cond_dict[\"bias_voltage\"]: bias_voltage = cond_dict[\"bias_voltage\"][0]\n",
+    "    if cond_dict[\"integration_time\"]: integration_time = cond_dict[\"integration_time\"][0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Determine the gain operation mode based on the gain_mode stored in control h5file.\n",
+    "if operation_mode not in (\"ADAPTIVE_GAIN\", \"FIXED_GAIN\"):\n",
+    "    print(f\"WARNING: unknown operation_mode \\\"{operation_mode}\\\" parameter set\")\n",
     "\n",
+    "if (\n",
+    "    gain_mode == [\n",
+    "        AgipdGainMode.FIXED_HIGH_GAIN,\n",
+    "        AgipdGainMode.FIXED_MEDIUM_GAIN,\n",
+    "        AgipdGainMode.FIXED_LOW_GAIN\n",
+    "    ] and\n",
+    "    operation_mode == \"ADAPTIVE_GAIN\"\n",
+    "):\n",
+    "    print(\n",
+    "        \"WARNING: operation_mode parameter is ADAPTIVE_GAIN, \"\n",
+    "        \"slow data indicates FIXED_GAIN.\")\n",
+    "elif not fixed_gain_mode and operation_mode == \"FIXED_GAIN\":\n",
+    "    print(\n",
+    "        \"WARNING: operation_mode parameter is FIXED_GAIN, \"\n",
+    "        \"slow data indicates ADAPTIVE_GAIN\")\n",
+    "elif not all(gm == AgipdGainMode.ADAPTIVE_GAIN for gm in gain_mode):\n",
+    "    raise ValueError(\n",
+    "        \"ERROR: Wrong arrangment of given dark runs. \"\n",
+    "        f\"Given runs' gain_modes are {gain_mode} for runs: {runs}.\"\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "print(\"Parameters are:\")\n",
     "print(f\"Proposal: {prop}\")\n",
-    "print(f\"Memory cells: {mem_cells}/{max_cells}\")\n",
-    "print(\"Runs: {}\".format([v for v in offset_runs.values()]))\n",
-    "print(f\"Sequences: {sequences if sequences else 'All'}\")\n",
+    "print(f\"Acquisition rate: {acq_rate}\")\n",
+    "print(f\"Memory cells: {mem_cells}\")\n",
+    "print(f\"Runs: {run_numbers}\")\n",
     "print(f\"Interlaced mode: {interlaced}\")\n",
     "print(f\"Using DB: {db_output}\")\n",
     "print(f\"Input: {in_folder}\")\n",
@@ -315,56 +448,28 @@
     "    ]"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The following lines will create a queue of files which will the be executed module-parallel. Distiguishing between different gains."
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# set everything up filewise\n",
-    "os.makedirs(out_folder, exist_ok=True)\n",
-    "gain_mapped_files, _, total_file_size = map_gain_stages(\n",
-    "    in_folder, offset_runs, path_template, karabo_da, sequences\n",
-    ")\n",
-    "# TODO: Keep this commented out to use it later again, this is false information at the moment.\n",
-    "# print(f\"Will process a total of {total_files} files ({total_file_size:.02f} GB).\")\n",
-    "\n",
-    "# TODO: Remove all of this nonsense with Extra-data.\n",
-    "inp = []\n",
-    "for gain_index, (gain, qm_file_map) in enumerate(gain_mapped_files.items()):\n",
-    "    selected_inp = None\n",
-    "    for module_index in modules:\n",
-    "        max_n_imgs = 0\n",
-    "        qm = module_index_to_qm(module_index)\n",
-    "        if qm not in qm_file_map:\n",
-    "            print(f\"Did not find files for {qm}\")\n",
-    "            continue\n",
-    "        file_queue = qm_file_map[qm]\n",
-    "        while not file_queue.empty():\n",
-    "            filename = file_queue.get()\n",
-    "            # TODO: remove after using EXtra-data to read files\n",
-    "            # and skip empty trains.\n",
-    "            with h5py.File(filename, \"r\") as fin:\n",
-    "                n_imgs = fin[h5path.format(module_index)+\"/trainId\"].shape[0]\n",
-    "                if n_imgs != 0 and n_imgs > max_n_imgs:\n",
-    "                    selected_inp = (filename, module_index, gain_index)\n",
-    "                    max_n_imgs = n_imgs\n",
-    "        print(f\"Process {selected_inp[0]} for {qm}\")\n",
-    "        inp.append(selected_inp)\n",
-    "    if selected_inp is None:\n",
-    "        raise ValueError(\n",
-    "            \"No images to process for run: \"\n",
-    "            f\"{[v for v in offset_runs.values()][gain_index]}\"\n",
-    "        )\n",
-    "\n",
-    "total_files = len(inp)"
+    "# Check if max_trains can be processed.\n",
+    "\n",
+    "# more relevant if running on multiple modules (i.e. within notebook)\n",
+    "# mem_cells * gains * n_constants * modules * agipd_[x,y]image_size * 2\n",
+    "av_mem = psutil.virtual_memory().available\n",
+    "possible_trains = av_mem // (352 * 3 * 3 * len(modules) * 131072 * 2)\n",
+    "if max_trains == 0:\n",
+    "    max_trains = max(max_trains_list)\n",
+    "if max_trains > possible_trains:\n",
+    "    max_trains = possible_trains\n",
+    "    print(\n",
+    "        f\"WARNING: available memory for processing is { av_mem / 1e9:.02f} GB.\"\n",
+    "        f\" Modifing max_trains to process to {max_trains}\")\n",
+    "\n",
+    "for run_dict in runs_dict.values():\n",
+    "    run_dict[\"dc\"] = run_dict[\"dc\"].select_trains(np.s_[:max_trains])"
    ]
   },
   {
@@ -382,49 +487,31 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# min() only relevant if running on multiple modules (i.e. within notebook)\n",
-    "parallel_num_procs = min(12, total_files)\n",
+    "parallel_num_procs = min(12, len(modules)*3)\n",
     "parallel_num_threads = multiprocessing.cpu_count() // parallel_num_procs\n",
     "print(f\"Will use {parallel_num_procs} processes with {parallel_num_threads} threads each\")\n",
     "\n",
-    "\n",
     "def characterize_module(\n",
-    "    fast_data_filename: str, channel: int, gain_index: int\n",
-    ") -> Tuple[np.array, np.array, np.array, np.array, int, np.array, int, float]:\n",
-    "    if max_cells == 0:\n",
-    "        num_cells = get_num_cells(fast_data_filename, karabo_id, channel)\n",
-    "    else:\n",
-    "        num_cells = max_cells\n",
+    "    channel: int, runs_dict: dict,\n",
+    ") -> Tuple[int, int, np.array, np.array, np.array, np.array, np.array]:\n",
     "\n",
-    "    if acq_rate == 0.:\n",
-    "        slow_paths = control_names[gain_index], karabo_id_control\n",
-    "        fast_paths = fast_data_filename, karabo_id, channel\n",
-    "        local_acq_rate = get_acq_rate(fast_paths, slow_paths)\n",
-    "    else:\n",
-    "        local_acq_rate = acq_rate\n",
+    "    # Select the corresponding module channel.\n",
+    "    instrument_src_mod = instrument_src.format(channel)\n",
     "\n",
-    "    local_thresholds_offset_hard = thresholds_offset_hard[gain_index]\n",
-    "    local_thresholds_noise_hard = thresholds_noise_hard[gain_index]\n",
+    "    run_dc = runs_dict[\"dc\"]\n",
+    "    gain_index = runs_dict[\"gain\"]\n",
     "\n",
-    "    h5path_f = h5path.format(channel)\n",
-    "    h5path_idx_f = h5path_idx.format(channel)\n",
+    "    if run_dc[instrument_src_mod, \"image.data\"].shape[0] < min_trains:\n",
+    "        print(\n",
+    "            f\"WARNING: {run_dc.files} have less than \"\n",
+    "            \"minimum trains: {min_trains}.\")\n",
     "\n",
-    "    with h5py.File(fast_data_filename, \"r\") as infile:\n",
-    "        if rawversion == 2:\n",
-    "            count = np.squeeze(infile[f\"{h5path_idx_f}/count\"])\n",
-    "            first = np.squeeze(infile[f\"{h5path_idx_f}/first\"])\n",
-    "            last_index = int(first[count != 0][-1]+count[count != 0][-1])\n",
-    "            first_index = int(first[count != 0][0])\n",
-    "        else:\n",
-    "            status = np.squeeze(infile[f\"{h5path_idx_f}/status\"])\n",
-    "            if np.count_nonzero(status != 0) == 0:\n",
-    "                return\n",
-    "            last = np.squeeze(infile[f\"{h5path_idx_f}/last\"])\n",
-    "            first = np.squeeze(infile[f\"{h5path_idx_f}/first\"])\n",
-    "            last_index = int(last[status != 0][-1]) + 1\n",
-    "            first_index = int(first[status != 0][0])\n",
-    "        im = np.array(infile[f\"{h5path_f}/data\"][first_index:last_index,...])\n",
-    "        cell_ids = np.squeeze(infile[f\"{h5path_f}/cellId\"][first_index:last_index,...])\n",
+    "    # Read module's image and cellId data.\n",
+    "    im = run_dc[instrument_src_mod, \"image.data\"].ndarray()\n",
+    "    cell_ids = np.squeeze(run_dc[instrument_src_mod, \"image.cellId\"].ndarray())\n",
+    "\n",
+    "    local_thresholds_offset_hard = thresholds_offset_hard[gain_index]\n",
+    "    local_thresholds_noise_hard = thresholds_noise_hard[gain_index]    \n",
     "\n",
     "    if interlaced:\n",
     "        if not fixed_gain_mode:\n",
@@ -435,13 +522,12 @@
     "        if not fixed_gain_mode:\n",
     "            ga = im[:, 1, ...]\n",
     "        im = im[:, 0, ...].astype(np.float32)\n",
-    "\n",
     "    im = np.transpose(im)\n",
     "    if not fixed_gain_mode:\n",
     "        ga = np.transpose(ga)\n",
     "\n",
     "    context = psh.context.ThreadContext(num_workers=parallel_num_threads)\n",
-    "    offset = context.alloc(shape=(im.shape[0], im.shape[1], num_cells), dtype=np.float64)\n",
+    "    offset = context.alloc(shape=(im.shape[0], im.shape[1], mem_cells), dtype=np.float64)\n",
     "    noise = context.alloc(like=offset)\n",
     "\n",
     "    if fixed_gain_mode:\n",
@@ -460,7 +546,6 @@
     "            ga_slice = ga[..., cell_slice_index]\n",
     "            gains[..., cell_number] = np.median(ga_slice, axis=2)\n",
     "            gains_std[..., cell_number] = np.std(ga_slice, axis=2)\n",
-    "\n",
     "    context.map(process_cell, np.unique(cell_ids))\n",
     "\n",
     "    # bad pixels\n",
@@ -483,7 +568,7 @@
     "    bp[(noise < local_thresholds_noise_hard[0]) | (noise > local_thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD\n",
     "    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR\n",
     "\n",
-    "    return offset, noise, gains, gains_std, bp, num_cells, local_acq_rate"
+    "    return channel, gain_index, offset, noise, gains, gains_std, bp"
    ]
   },
   {
@@ -493,7 +578,18 @@
    "outputs": [],
    "source": [
     "with multiprocessing.Pool(processes=parallel_num_procs) as pool:\n",
-    "    results = pool.starmap(characterize_module, inp)"
+    "    results = pool.starmap(\n",
+    "        characterize_module, itertools.product(modules, list(runs_dict.values())))\n",
+    "\n",
+    "# mapped values for processing 2 modules example:\n",
+    "# [\n",
+    "#     0, {\"gain\": 0, \"run_number\": <run-high>, \"dc\": <high-dc>},\n",
+    "#     0, {\"gain\": 1, \"run_number\": <run-med>, \"dc\": <med-dc>},\n",
+    "#     0, {\"gain\": 2, \"run_number\": <run-low>, \"dc\": <low-dc>},\n",
+    "#     1, {\"gain\": 0, \"run_number\": <run-high>, \"dc\": <high-dc>},\n",
+    "#     1, {\"gain\": 1, \"run_number\": <run-med>, \"dc\": <med-dc>},\n",
+    "#     1, {\"gain\": 2, \"run_number\": <run-low>, \"dc\": <low-dc>},\n",
+    "# ]"
    ]
   },
   {
@@ -509,13 +605,8 @@
     "    gain_g = OrderedDict()\n",
     "    gainstd_g = OrderedDict()\n",
     "\n",
-    "all_cells = []\n",
-    "all_acq_rate = []\n",
     "\n",
-    "for (_, module_index, gain_index), (offset, noise, gains, gains_std, bp,\n",
-    "                                    thiscell, thisacq) in zip(inp, results):\n",
-    "    all_cells.append(thiscell)\n",
-    "    all_acq_rate.append(thisacq)\n",
+    "for module_index, gain_index, offset, noise, gains, gains_std, bp in results:\n",
     "    qm = module_index_to_qm(module_index)\n",
     "    if qm not in offset_g:\n",
     "        offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2], 3))\n",
@@ -530,13 +621,7 @@
     "    badpix_g[qm][..., gain_index] = bp\n",
     "    if not fixed_gain_mode:\n",
     "        gain_g[qm][..., gain_index] = gains\n",
-    "        gainstd_g[qm][..., gain_index] = gains_std\n",
-    "\n",
-    "\n",
-    "max_cells = np.max(all_cells)\n",
-    "print(f\"Using {max_cells} memory cells\")\n",
-    "acq_rate = np.max(all_acq_rate)\n",
-    "print(f\"Using {acq_rate} MHz acquisition rate\")"
+    "        gainstd_g[qm][..., gain_index] = gains_std"
    ]
   },
   {
@@ -598,19 +683,6 @@
     "        res[qm]['ThresholdsDark'] = thresholds_g[qm]"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Read report path and create file location tuple to add with the injection\n",
-    "proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]\n",
-    "file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_low, run_med, run_high)\n",
-    "\n",
-    "report = get_report(out_folder)"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -620,7 +692,7 @@
     "# set the operating condition\n",
     "# note: iCalibrationDB only adds gain_mode if it is truthy, so we don't need to handle None\n",
     "condition = iCalibrationDB.Conditions.Dark.AGIPD(\n",
-    "    memory_cells=max_cells,\n",
+    "    memory_cells=mem_cells,\n",
     "    bias_voltage=bias_voltage,\n",
     "    acquisition_rate=acq_rate,\n",
     "    gain_setting=gain_setting,\n",
@@ -643,7 +715,7 @@
     "    constant=iCalibrationDB.CalibrationConstant(),\n",
     "    condition=condition,\n",
     "    cal_db_interface=cal_db_interface,\n",
-    "    snapshot_at=creation_time.isoformat(),\n",
+    "    snapshot_at=creation_time.isoformat() if creation_time else None,\n",
     "    timeout=cal_db_timeout\n",
     ")\n",
     "for module_index, module_da, module_pdu in zip(modules, karabo_da, all_pdus):\n",
@@ -654,6 +726,13 @@
     "    }"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Sending calibration constants to the database."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -678,11 +757,18 @@
     "                                  file_loc, report, creation_time, out_folder)\n",
     "            print(f\"Calibration constant {const} for {qm} is stored locally in {file_loc}.\\n\")\n",
     "\n",
-    "    print(\"Constants parameter conditions are:\\n\")\n",
-    "    print(f\"• memory_cells: {max_cells}\\n• bias_voltage: {bias_voltage}\\n\"\n",
-    "          f\"• acquisition_rate: {acq_rate}\\n• gain_setting: {gain_setting}\\n\"\n",
-    "          f\"• gain_mode: {fixed_gain_mode}\\n• integration_time: {integration_time}\\n\"\n",
-    "          f\"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\\n\")"
+    "print(\"Constants parameter conditions are:\\n\")\n",
+    "print(f\"• memory_cells: {mem_cells}\\n• bias_voltage: {bias_voltage}\\n\"\n",
+    "      f\"• acquisition_rate: {acq_rate}\\n• gain_setting: {gain_setting}\\n\"\n",
+    "      f\"• gain_mode: {fixed_gain_mode}\\n• integration_time: {integration_time}\\n\"\n",
+    "      f\"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Retrieving previous calibration constants for comparison."
    ]
   },
   {
@@ -698,11 +784,6 @@
     "def retrieve_old_constant(qm, const):\n",
     "    dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()\n",
     "\n",
-    "    # This should be used in case of running notebook\n",
-    "    # by a different method other than myMDC which already\n",
-    "    # sends CalCat info.\n",
-    "    # TODO: Set db_module to \"\" by default in the first cell\n",
-    "\n",
     "    data, mdata = get_from_db(\n",
     "        karabo_id=karabo_id,\n",
     "        karabo_da=qm_dict[qm][\"karabo_da\"],\n",
@@ -710,7 +791,7 @@
     "        condition=condition,\n",
     "        empty_constant=None,\n",
     "        cal_db_interface=cal_db_interface,\n",
-    "        creation_time=creation_time-timedelta(seconds=1),\n",
+    "        creation_time=creation_time-timedelta(seconds=1) if creation_time else None,\n",
     "        strategy=\"pdu_prior_in_time\",\n",
     "        verbosity=1,\n",
     "        timeout=cal_db_timeout\n",
@@ -776,7 +857,7 @@
    "source": [
     "cell = 3\n",
     "gain = 0\n",
-    "show_overview(res, cell, gain, infix=\"{}-{}-{}\".format(*offset_runs.values()))"
+    "show_overview(res, cell, gain, infix=\"{}-{}-{}\".format(*run_numbers))"
    ]
   },
   {
@@ -794,7 +875,7 @@
    "source": [
     "cell = 3\n",
     "gain = 1\n",
-    "show_overview(res, cell, gain, infix=\"{}-{}-{}\".format(*offset_runs.values()))"
+    "show_overview(res, cell, gain, infix=\"{}-{}-{}\".format(*run_numbers))"
    ]
   },
   {
@@ -812,7 +893,7 @@
    "source": [
     "cell = 3\n",
     "gain = 2\n",
-    "show_overview(res, cell, gain, infix=\"{}-{}-{}\".format(*offset_runs.values()))"
+    "show_overview(res, cell, gain, infix=\"{}-{}-{}\".format(*run_numbers))"
    ]
   },
   {
@@ -856,6 +937,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "\n",
     "## Aggregate values, and per Cell behaviour ##\n",
     "\n",
     "The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior."
@@ -867,7 +949,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "create_constant_overview(offset_g, \"Offset (ADU)\", max_cells, 4000, 8000,\n",
+    "create_constant_overview(offset_g, \"Offset (ADU)\", mem_cells, 4000, 8000,\n",
     "                         badpixels=[badpix_g, np.nan])"
    ]
   },
@@ -877,7 +959,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "create_constant_overview(noise_g, \"Noise (ADU)\", max_cells, 0, 100,\n",
+    "create_constant_overview(noise_g, \"Noise (ADU)\", mem_cells, 0, 100,\n",
     "                         badpixels=[badpix_g, np.nan])"
    ]
   },
@@ -895,7 +977,7 @@
     "        bp_thresh[mod][...,:2] = con[...,:2]\n",
     "        bp_thresh[mod][...,2:] = con\n",
     "\n",
-    "    create_constant_overview(thresholds_g, \"Threshold (ADU)\", max_cells, 4000, 10000, 5,\n",
+    "    create_constant_overview(thresholds_g, \"Threshold (ADU)\", mem_cells, 4000, 10000, 5,\n",
     "                             badpixels=[bp_thresh, np.nan],\n",
     "                             gmap=['HG-MG Threshold', 'MG-LG Threshold', 'High gain', 'Medium gain', 'low gain'],\n",
     "                             marker=['d','d','','','']\n",
@@ -911,7 +993,7 @@
     "bad_pixel_aggregate_g = OrderedDict()\n",
     "for m, d in badpix_g.items():\n",
     "    bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)\n",
-    "create_constant_overview(bad_pixel_aggregate_g, \"Bad pixel fraction\", max_cells, 0, 0.10, 3)"
+    "create_constant_overview(bad_pixel_aggregate_g, \"Bad pixel fraction\", mem_cells, 0, 0.10, 3)"
    ]
   },
   {
@@ -1120,7 +1202,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.11"
+   "version": "3.8.12"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_Summary.ipynb b/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_Summary.ipynb
index 021e16ec615b20c9ecf79b9570d87bcd0387f2c0..8854d0f5c6d0ca5791829d12ecd84c775bbbd77a 100644
--- a/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_Summary.ipynb
+++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_Summary.ipynb
@@ -22,13 +22,8 @@
     "run = 449 # runs of image data used to create histograms\n",
     "\n",
     "karabo_id = \"MID_DET_AGIPD1M-1\" # karabo karabo_id\n",
-    "receiver_id = \"{}CH0\" # inset for receiver devices\n",
-    "path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data\n",
-    "h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images\n",
-    "h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images\n",
-    "h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information\n",
-    "karabo_id_control = \"MID_IRU_AGIPD1M1\" # karabo-id for control device\n",
-    "karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation\n",
+    "ctrl_source_template = '{}/MDL/FPGA_COMP' # path to control information\n",
+    "karabo_id_control = \"MID_EXP_AGIPD1M1\" # karabo-id for control device\n",
     "\n",
     "use_dir_creation_date = True # use the creation data of the input dir for database queries\n",
     "cal_db_interface = \"tcp://max-exfl016:8015#8045\" # the database interface to use\n",
@@ -56,9 +51,9 @@
     "\n",
     "# Detector conditions\n",
     "max_cells = 0 # number of memory cells used, set to 0 to automatically infer\n",
-    "bias_voltage = 300 # Bias voltage\n",
+    "bias_voltage = 0. # Bias voltage\n",
     "acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine\n",
-    "gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine\n",
+    "gain_setting = -1 # the gain setting, use 0.1 to try to auto-determine\n",
     "photon_energy = 8.05 # photon energy in keV\n",
     "integration_time = -1 # integration time, negative values for auto-detection."
    ]
@@ -80,13 +75,7 @@
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import tabulate\n",
-    "from cal_tools.agipdlib import (\n",
-    "    get_acq_rate,\n",
-    "    get_bias_voltage,\n",
-    "    get_gain_setting,\n",
-    "    get_integration_time,\n",
-    "    get_num_cells,\n",
-    ")\n",
+    "from cal_tools.agipdlib import AgipdCtrl\n",
     "from cal_tools.agipdutils_ff import (\n",
     "    BadPixelsFF,\n",
     "    any_in,\n",
@@ -104,7 +93,7 @@
     "    send_to_db\n",
     ")\n",
     "from dateutil import parser\n",
-    "from extra_data import RunDirectory, stack_detector_data\n",
+    "from extra_data import H5File, RunDirectory, stack_detector_data\n",
     "from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry\n",
     "from iCalibrationDB import Conditions, Constants, Detectors\n",
     "from iminuit import Minuit\n",
@@ -132,45 +121,38 @@
    "outputs": [],
    "source": [
     "# Get operation conditions\n",
-    "filename = glob.glob(f\"{raw_folder}/r{run:04d}/*-AGIPD[0-1][0-9]-*\")[0]\n",
-    "channel = int(re.findall(r\".*-AGIPD([0-9]+)-.*\", filename)[0])\n",
-    "control_fname = f'{raw_folder}/r{run:04d}/RAW-R{run:04d}-{karabo_da_control}-S00000.h5'\n",
-    "h5path_ctrl = h5path_ctrl.format(karabo_id_control)\n",
-    "\n",
-    "# Evaluate number of memory cells\n",
-    "mem_cells = get_num_cells(filename, karabo_id, channel)\n",
-    "if mem_cells is None:\n",
-    "    raise ValueError(f\"No raw images found in {filename}\")\n",
+    "ctrl_source = ctrl_source_template.format(karabo_id_control)\n",
     "\n",
-    "# Evaluate aquisition rate\n",
-    "fast_paths = (filename, karabo_id, channel)\n",
-    "slow_paths = (control_fname, karabo_id_control)\n",
-    "\n",
-    "if acq_rate == 0.:\n",
-    "    acq_rate = get_acq_rate(fast_paths,slow_paths)\n",
+    "raw_dc = RunDirectory(f'{raw_folder}/r{run:04d}/')\n",
     "\n",
+    "# Read operating conditions from AGIPD00 files\n",
+    "instrument_src_mod = [\n",
+    "    s for s in list(raw_dc.all_sources) if \"0CH\" in s][0]\n",
+    "ctrl_src = [\n",
+    "    s for s in list(raw_dc.all_sources) if ctrl_source in s][0]\n",
     "# Evaluate creation time\n",
     "creation_time = None\n",
     "if use_dir_creation_date:\n",
     "    creation_time = get_dir_creation_date(raw_folder, run)\n",
-    "    \n",
-    "# Evaluate gain setting\n",
-    "if gain_setting == 0.1:\n",
-    "    if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):\n",
-    "        print(\"Set gain-setting to None for runs taken before 2020-01-31\")\n",
-    "        gain_setting = None\n",
-    "    else:\n",
-    "        try:\n",
-    "            gain_setting = get_gain_setting(control_fname, h5path_ctrl)\n",
-    "        except Exception as e:\n",
-    "            print(f'Error while reading gain setting from: \\n{control_fname}')\n",
-    "            print(e)\n",
-    "            print(\"Set gain settion to 0\")\n",
-    "            gain_setting = 0\n",
-    "\n",
-    "# Evaluate integration time\n",
-    "if integration_time < 0:\n",
-    "    integration_time = get_integration_time(control_fname, h5path_ctrl)\n",
+    "\n",
+    "agipd_cond = AgipdCtrl(\n",
+    "    run_dc=raw_dc,\n",
+    "    image_src=instrument_src_mod,\n",
+    "    ctrl_src=ctrl_src,\n",
+    "    raise_error=False,  # to be able to process very old data without mosetting value\n",
+    ")\n",
+    "\n",
+    "mem_cells = agipd_cond.get_num_cells()\n",
+    "if mem_cells is None:\n",
+    "    raise ValueError(f\"No raw images found in {raw_dc[instrument_src_mod].files}\")\n",
+    "if acq_rate == 0.:\n",
+    "    acq_rate = agipd_cond.get_acq_rate()\n",
+    "if gain_setting == -1:\n",
+    "    gain_setting = agipd_cond.get_gain_setting(creation_time)\n",
+    "if bias_voltage == 0.:\n",
+    "    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)\n",
+    "if integration_time == -1:\n",
+    "    integration_time = agipd_cond.get_integration_time()\n",
     "\n",
     "# Evaluate detector instance for mapping\n",
     "instrument = karabo_id.split(\"_\")[0]\n",
diff --git a/notebooks/generic/overallmodules_Darks_Summary_NBC.ipynb b/notebooks/generic/overallmodules_Darks_Summary_NBC.ipynb
index 6fbfb3d357a1bbf6fc04212144c1ee7b2227efcb..e204cf3f605f20c7342638395402119b7f8e7864 100644
--- a/notebooks/generic/overallmodules_Darks_Summary_NBC.ipynb
+++ b/notebooks/generic/overallmodules_Darks_Summary_NBC.ipynb
@@ -6,8 +6,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#Author: K. Ahmed, M. Karnevsky, Version: 0.1\n",
-    "#The following is a summary for the processing of dark images and calibration constants production.\n",
+    "# Author: European XFEL Detector Group, Version: 1.0\n",
+    "\n",
+    "#  Summary for processed of dark calibration constants and a comparison with previous injected constants.\n",
     "\n",
     "out_folder = \"/gpfs/exfel/data/scratch/ahmedk/test/fixed_gain/SPB_summary_fix2\" # path to output to, required\n",
     "karabo_id = \"SPB_DET_AGIPD1M-1\" # detector instance\n",
@@ -25,7 +26,6 @@
     "import os\n",
     "import warnings\n",
     "from collections import OrderedDict\n",
-    "from datetime import datetime\n",
     "from pathlib import Path\n",
     "\n",
     "warnings.filterwarnings('ignore')\n",
@@ -40,7 +40,6 @@
     "\n",
     "matplotlib.use(\"agg\")\n",
     "import matplotlib.gridspec as gridspec\n",
-    "import matplotlib.patches as patches\n",
     "import matplotlib.pyplot as plt\n",
     "\n",
     "%matplotlib inline\n",
@@ -49,8 +48,6 @@
     "from cal_tools.ana_tools import get_range\n",
     "from cal_tools.plotting import show_processed_modules\n",
     "from cal_tools.tools import CalibrationMetadata, module_index_to_qm\n",
-    "from iCalibrationDB import Detectors\n",
-    "from XFELDetAna.plotting.heatmap import heatmapPlot\n",
     "from XFELDetAna.plotting.simpleplot import simplePlot"
    ]
   },
diff --git a/setup.py b/setup.py
index 60090e3420b1fdcbd78e5bc250c44452ab17bab2..e73c52388c666bcf62a753bf39d580c60c70b61e 100644
--- a/setup.py
+++ b/setup.py
@@ -51,7 +51,7 @@ install_requires = [
         "dill==0.3.0",
         "docutils==0.17.1",
         "dynaconf==3.1.4",
-        "extra_data==1.8.0",
+        "extra_data==1.9.1",
         "extra_geom==1.6.0",
         "gitpython==3.1.0",
         "h5py==3.5.0",
@@ -78,6 +78,7 @@ install_requires = [
         "pasha==0.1.0",
         "prettytable==0.7.2",
         "princess==0.5",
+        "psutil==5.9.0",
         "pypandoc==1.4",
         "python-dateutil==2.8.1",
         "pyyaml==5.3",
diff --git a/src/cal_tools/agipdlib.py b/src/cal_tools/agipdlib.py
index ec6b4e25c7fe896a6e5b7fa2b99408ccb12ae627..a39ca0ea0ab304fad6d35c8f2be7b25bd0193317 100644
--- a/src/cal_tools/agipdlib.py
+++ b/src/cal_tools/agipdlib.py
@@ -1,15 +1,14 @@
 import os
 import posixpath
-import traceback
 import zlib
 from multiprocessing.pool import ThreadPool
-from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
 
 import h5py
 import numpy as np
 import sharedmem
-from extra_data import DataCollection
+from dateutil import parser
+from extra_data import DataCollection, H5File, by_id, components
 from iCalibrationDB import Conditions, Constants
 
 from cal_tools import agipdalgs as calgs
@@ -27,10 +26,35 @@ from cal_tools.enums import AgipdGainMode, BadPixels, SnowResolution
 from cal_tools.h5_copy_except import h5_copy_except_paths
 from cal_tools.tools import get_constant_from_db_and_time
 
+class AgipdCtrl:
+    def __init__(
+        self,
+        run_dc: DataCollection,
+        image_src: str,
+        ctrl_src: str,
+        raise_error: bool = True,
+    ):
+        """
+        Initialize AgipdCondition class to read all required AGIPD parameters.
 
-def get_num_cells(fname, loc, module):
-    with h5py.File(fname, "r") as f:
-        cells = f[f"INSTRUMENT/{loc}/DET/{module}CH0:xtdf/image/cellId"][()]
+        :param image_src: H5 source for image data.
+        :param ctrl_src: H5 source for control (slow) data.
+        """
+        self.run_dc = run_dc
+        self.image_src = image_src
+        self.ctrl_src = ctrl_src
+
+        self.raise_error = raise_error
+
+    def get_num_cells(self) -> Optional[int]:
+        """
+        :return mem_cells: Number of memory cells.
+                          return None, if no data available.
+        """
+        cells = np.squeeze(
+            self.run_dc[
+                self.image_src, "image.cellId"].drop_empty_trains().ndarray()
+        )
         if cells.shape[0] == 0:
             return None
         maxcell = np.max(cells)
@@ -38,91 +62,77 @@ def get_num_cells(fname, loc, module):
         dists = [abs(o - maxcell) for o in options]
         return options[np.argmin(dists)]
 
+    def get_acq_rate(self) -> Optional[float]:
+        """Get the acquisition rate from said detector module.
+
+        If the data is available from the middlelayer FPGA_COMP device,
+        then it is retrieved from there.
+        If not, the rate is calculated from two different pulses time.
 
-def get_acq_rate(fast_paths: Tuple[str, str, int],
-                 slow_paths: Optional[Tuple[str, str]] = ('', '')
-                 ) -> Optional[float]:
-    """Get the acquisition rate from said detector module.
+        The first entry is deliberately not used, as the detector just began
+        operating, and it might have skipped a train.
+
+        :return acq_rate: the acquisition rate.
+                          return None, if not available.
+        """
+        # Attempt to look for acquisition rate in slow data
+        rep_rate_src = (
+            self.ctrl_src, "bunchStructure.repetitionRate.value")
+
+        if (
+            rep_rate_src[0] in self.run_dc.all_sources and
+            rep_rate_src[1] in self.run_dc.keys_for_source(rep_rate_src[0])
+        ):
+            # The acquisition rate value is stored in a 1D array of type
+            # float.
+            # It is desired to loose precision here because the usage is
+            # about bucketing the rate for managing meta-data.
+
+            return round(float(self.run_dc[rep_rate_src].as_single_value()), 1)
+
+        train_pulses = np.squeeze(
+            self.run_dc[
+                self.image_src, "image.pulseId"
+            ].drop_empty_trains().train_from_index(0)[1]
+        )
 
-    If the data is available from the middlelayer FPGA_COMP device, then it is
-    retrieved from there. If not, the rate is calculated from two different
-    pulses time.
+        # Compute acquisition rate from fast data
+        diff = train_pulses[1] - train_pulses[0]
+        options = {8: 0.5, 4: 1.1, 2: 2.2, 1: 4.5}
+        return options.get(diff, None)
 
-    The first entry is deliberatly not used, as the detector just began
-    operating, and it might have skipped a train.
+    def get_gain_setting(
+        self,
+        creation_time: "datetime.datetime",
+    ) -> Optional[int]:
+        """Retrieve Gain setting.
 
-    :param slow_paths: in which file and h5 path to look for slow data.
-                       The first string is the filename with complete path,
-                       the second string is the key `karabo_id_control`
+        If the data is available from the middlelayer FPGA_COMP device,
+        then it is retrieved from there.
+        If not, the setting is calculated off `setupr` and `patternTypeIndex`
 
-    :param fast_paths: in which module file and h5 path to look for pulses.
-                       The first string is the filename with complete path,
-                       the second string is the module device name `karabo_id`,
-                       the third parameter is the module number, used to
-                       navigate through the h5 file structure.
+        gain-setting 1: setupr@dark=8, setupr@slopespc=40
+        gain-setting 0: setupr@dark=0, setupr@slopespc=32
 
-    :return acq_rate: the acquisition rate.
-                      If not found in either files, return None.
-    """
-    # Attempt to look for acquisition rate in slow data
-    slow_data_file, karabo_id_control = slow_paths
-    slow_data_file = Path(slow_data_file)
-    if slow_data_file.is_file():
-        slow_data_path = f'CONTROL/{karabo_id_control}/MDL/FPGA_COMP/bunchStructure/repetitionRate/value'  # noqa
-        with h5py.File(slow_data_file, "r") as fin:
-            if slow_data_path in fin:
-                # The acquisition rate value is stored in a 1D array of type
-                # float. Use the 3rd value, arbitrarily chosen.
-                # It is desired to loose precision here because the usage is
-                # about bucketing the rate for managing meta-data.
-                return round(float(fin[slow_data_path][3]), 1)
-
-    # Compute acquisition rate from fast data
-    fast_data_file, karabo_id, module = fast_paths
-    fast_data_file = Path(fast_data_file)
-    if fast_data_file.is_file():
-        fast_data_path = f'INSTRUMENT/{karabo_id}/DET/{module}CH0:xtdf/image/pulseId'  # noqa
-        with h5py.File(fast_data_file, "r") as fin:
-            if fast_data_path in fin:
-                # pulses is of shape (NNNN, 1), of type uint8.
-                # Squeeze out the data, and subtract the 3rd entry from the 2nd
-                # to get a rate.
-                pulses = np.squeeze(fin[fast_data_path][1:3])
-                diff = pulses[1] - pulses[0]
-                options = {8: 0.5, 4: 1.1, 2: 2.2, 1: 4.5}
-                return options.get(diff, None)
-
-
-def get_gain_setting(fname: str, h5path_ctrl: str) -> int:
-    """Retrieve Gain setting.
-
-    If the data is available from the middlelayer FPGA_COMP device, then it is
-    retrieved from there.
-    If not, the setting is calculated off `setupr` and `patternTypeIndex`
-
-    gain-setting 1: setupr@dark=8, setupr@slopespc=40
-    gain-setting 0: setupr@dark=0, setupr@slopespc=32
-
-    patternTypeIndex 1: High-gain
-    patternTypeIndex 2: Medium-gain
-    patternTypeIndex 3: Low-gain
-    patternTypeIndex 4: SlopesPC
-
-    :param fname: path to file with control information
-    :param h5path_ctrl: path to control information inside the file
-    :return: gain setting
-    """
-    gain_path = f'{h5path_ctrl}/gain/value'
-    with h5py.File(fname, "r") as fin:
-        if gain_path in fin:
-            return fin[gain_path][0]
+        patternTypeIndex 1: High-gain
+        patternTypeIndex 2: Medium-gain
+        patternTypeIndex 3: Low-gain
+        patternTypeIndex 4: SlopesPC
 
-        # Get the index at which the train is not zero.
-        train_id = fin["INDEX/trainId"][()]
-        idx = np.nonzero(train_id)[0][0]
+        :return: gain setting.
+                 return 0, if not available.
+        """
+        # TODO: remove after fixing get_possible_conditions
+        if creation_time and creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):
+            print("Set gain-setting to None for runs taken before 2020-01-31")
+            return
+
+        if "gain.value" in self.run_dc.keys_for_source(self.ctrl_src):
+            return self.run_dc[self.ctrl_src, "gain.value"].as_single_value()
 
-        setupr = fin[f'{h5path_ctrl}/setupr/value'][idx]
-        pattern_type_idx = fin[f'{h5path_ctrl}/patternTypeIndex/value'][idx]
+        setupr = self.run_dc[self.ctrl_src, "setupr.value"].as_single_value()
+        pattern_type_idx = self.run_dc[
+            self.ctrl_src, "patternTypeIndex.value"].as_single_value()
 
         if (setupr == 0 and pattern_type_idx < 4) or (
                 setupr == 32 and pattern_type_idx == 4):
@@ -131,60 +141,85 @@ def get_gain_setting(fname: str, h5path_ctrl: str) -> int:
                 setupr == 40 and pattern_type_idx == 4):
             return 1
         else:
-            raise ValueError('Could not derive gain setting from setupr and patternTypeIndex')  # noqa
+            if self.raise_error:
+                raise ValueError(
+                    "Could not derive gain setting from"
+                    " setupr and patternTypeIndex"
+                )
 
+            print(
+                "WARNING: gain_setting is not available "
+                f"at source {self.ctrl_src}.\nSet gain_setting to 0.")
+            # TODO: why return 0 and not None?
+            return 0
 
-def get_gain_mode(fname: str, h5path_ctrl: str) -> AgipdGainMode:
-    """Returns the gain mode (adaptive or fixed) from slow data"""
+    def get_gain_mode(self) -> AgipdGainMode:
+        """Returns the gain mode (adaptive or fixed) from slow data"""
 
-    h5path_run = h5path_ctrl.replace("CONTROL/", "RUN/", 1)
-    h5path_gainmode = f'{h5path_run}/gainModeIndex/value'
-    with h5py.File(fname, "r") as fd:
-        if h5path_gainmode in fd:
-            return AgipdGainMode(fd[h5path_gainmode][0])
-    return AgipdGainMode.ADAPTIVE_GAIN
+        if (
+            self.ctrl_src in self.run_dc.all_sources and
+            "gainModeIndex.value" in self.run_dc.keys_for_source(
+                self.ctrl_src)
+        ):
+            return AgipdGainMode(int(
+                self.run_dc.get_run_value(
+                    self.ctrl_src, "gainModeIndex.value")))
 
+        return AgipdGainMode.ADAPTIVE_GAIN
 
-def get_bias_voltage(fname: str, karabo_id_control: str,
-                     module: Optional[int] = 0) -> int:
-    """Read the voltage information from the FPGA device of module 0.
+    def get_bias_voltage(
+        self,
+        karabo_id_control: str,
+        module: Optional[int] = 0
+    ) -> int:
+        """Read the voltage information from the FPGA device of module 0.
 
-    Different modules may operate at different voltages. In practice, they all
-    operate at the same voltage. As such, it is okay to read a single module's
-    value.
+        Different modules may operate at different voltages.
+        In practice, they all operate at the same voltage.
+        As such, it is okay to read a single module's value.
 
-    This value is read from slow data.
+        If the FPGA module source is not available, 300 will be returned.
+        300 is the default bias_voltage value before adding it to slow data.
 
-    If the file cannot be accessed, an OSError will be raised.
-    If the hdf5 path cannot be accessed, None will be returned.
+        :param karabo_id_control: The detector karabo id, for the control device.
+        :param module: defaults to module 0
+        :return: voltage, a uint16
+        """
+        voltage_src = (
+            f"{karabo_id_control}/FPGA/M_{module}",
+            "highVoltage.actual.value")
 
-    :param fname: path to slow data file with control information
-    :param karabo_id: The detector Karabo id, for creating the hdf5 path
-    :param module: defaults to module 0
-    :return: voltage, a uint16
-    """
-    voltage_path = f'/CONTROL/{karabo_id_control}/FPGA/M_{module}/highVoltage/actual/value'  # noqa
-    with h5py.File(fname, "r") as fin:
-        if voltage_path in fin:
-            return fin[voltage_path][0]
+        if (
+            voltage_src[0] in self.run_dc.all_sources and
+            voltage_src[1] in self.run_dc.keys_for_source(voltage_src[0])
+        ):
 
+            return self.run_dc[voltage_src].as_single_value(atol=1, reduce_by='max')
+        else:
+            print(
+                "WARNING: Unable to read bias_voltage from"
+                f" {voltage_src[0]}/{voltage_src[1].replace('.','/')} "
+                "Returning 300 as default bias voltage value."
+            )
+            return 300
 
-def get_integration_time(fname: str, h5path_ctrl: str) -> int:
-    """Read integration time from the FPGA device.
+    def get_integration_time(self) -> int:
+        """Read integration time from the FPGA device.
 
-    The integration time is specified as an integer number of clock
-    cycles each spanning ~9ns. The default (and legacy) value is 12.
+        The integration time is specified as an integer number of clock
+        cycles each spanning ~9ns. The default (and legacy) value is 12.
 
-    :param fname: path to file with control information
-    :param h5path_ctrl: path to control information inside the file
-    :return: integration time
-    """
-    h5path_run = h5path_ctrl.replace('CONTROL/', 'RUN/', 1)
-    h5path_time = f'{h5path_run}/integrationTime/value'
-    with h5py.File(fname, 'r') as fd:
-        if h5path_time in fd:
-            return int(fd[h5path_time][0])
-    return 12
+        :return: integration time
+        """
+        if (
+            self.ctrl_src in self.run_dc.all_sources and
+            'integrationTime.value' in self.run_dc.keys_for_source(
+                self.ctrl_src)
+        ):
+            return int(self.run_dc.get_run_value(
+                self.ctrl_src, 'integrationTime.value'))
+
+        return 12
 
 
 class CellSelection:
@@ -228,8 +263,8 @@ class AgipdCorrections:
         self,
         max_cells: int,
         cell_sel: CellSelection,
-        h5_data_path: str = "INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/",
-        h5_index_path: str = "INDEX/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/",
+        h5_data_path: str = "SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/",
+        h5_index_path: str = "SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/",
         corr_bools: Optional[dict] = None,
         gain_mode: AgipdGainMode = AgipdGainMode.ADAPTIVE_GAIN,
         comp_threads: int = 1,
@@ -361,70 +396,63 @@ class AgipdCorrections:
         """
         module_idx = int(file_name.split('/')[-1].split('-')[2][-2:])
         agipd_base = self.h5_data_path.format(module_idx)
-        idx_base = self.h5_index_path.format(module_idx)
         data_dict = self.shared_dict[i_proc]
         data_dict['moduleIdx'][0] = module_idx
-        try:
-            f = h5py.File(file_name, "r")
-
-            (valid, first_index, last_index,
-             train_ids, valid_indices) = self.get_valid_image_idx(idx_base, f)
-
-            if len(valid_indices) == 0:
-                # If there's not a single valid index, exit early.
-                data_dict['nImg'][0] = 0
-                return 0
-
-            # store valid trains in shared memory
-            valid_train_ids = train_ids[valid]
-            n_valid_trains = len(valid_train_ids)
-            data_dict["n_valid_trains"][0] = n_valid_trains
-            data_dict["valid_trains"][:n_valid_trains] = valid_train_ids
-
-            # get cell selection for the images in this file
-            cm = (self.cell_sel.CM_NONE if apply_sel_pulses
-                  else self.cell_sel.CM_PRESEL)
-            img_selected = self.cell_sel.get_cells_on_trains(
-                valid_train_ids, cm=cm)
-            data_dict["cm_presel"][0] = (cm == self.cell_sel.CM_PRESEL)
-
-            group = f[agipd_base]['image']
-            allcells = np.squeeze(group['cellId'])
-            allpulses = np.squeeze(group['pulseId'])
-
-            firange = self.gen_valid_range(first_index, last_index,
-                                           self.max_cells, allcells,
-                                           allpulses, valid_indices,
-                                           img_selected)
-
-            if firange is None:
-                # gen_valid_range() returns None if there are no cells
-                # to correct, exit early.
-                data_dict['nImg'][0] = 0
-                return 0
-
-            n_img = firange.shape[0]
-            data_dict['nImg'][0] = n_img
-            if np.all(np.diff(firange) == 1):
-                # if firange consists of contiguous indices
-                # convert firange from fancy indexing to slicing
-                firange = slice(firange[0], firange[-1]+1)
-                raw_data = group['data'][firange]
-            else:
-                # Avoid very slow performance using fancing indexing,
-                # if firange consists of non-contiguous indices.
-                raw_data = group['data'][:][firange]
-            data_dict['data'][:n_img] = raw_data[:, 0]
-            data_dict['rawgain'][:n_img] = raw_data[:, 1]
-            data_dict['cellId'][:n_img] = allcells[firange]
-            data_dict['pulseId'][:n_img] = allpulses[firange]
-            data_dict['trainId'][:n_img] = np.squeeze(group['trainId'][:][firange])  # noqa
-        except Exception as e:
-            print(f'Error during reading data from file {file_name}: {e}')
-            print(f'Error traceback: {traceback.format_exc()}')
-            n_img = 0
+
+        h5_dc = H5File(file_name)
+
+        # Exclude trains without data.
+        im_dc = h5_dc.select(agipd_base, "image.*", require_all=True)
+
+        valid_train_ids = self.get_valid_image_idx(
+            im_dc[agipd_base, "image.trainId"])
+
+        if len(valid_train_ids) == 0:
+            # If there's not a single valid train, exit early.
+            print(f"WARNING: No valid trains for {im_dc.files} to process.")
             data_dict['nImg'][0] = 0
+            return 0
+
+        # store valid trains in shared memory
+        # valid_train_ids = train_ids[valid]
+        n_valid_trains = len(valid_train_ids)
+        data_dict["n_valid_trains"][0] = n_valid_trains
+        data_dict["valid_trains"][:n_valid_trains] = valid_train_ids
+
+        # get cell selection for the images in this file
+        cm = ( self.cell_sel.CM_NONE if apply_sel_pulses
+                else self.cell_sel.CM_PRESEL )
+
+        img_selected = self.cell_sel.get_cells_on_trains(
+            valid_train_ids, cm=cm)
+        data_dict["cm_presel"][0] = (cm == self.cell_sel.CM_PRESEL)
+
+        # Exclude non_valid trains from the selected data collection.
+        im_dc = im_dc.select_trains(by_id(valid_train_ids))
+
+        if "AGIPD500K" in agipd_base:
+            agipd_comp = components.AGIPD500K(im_dc)
+        else:
+            agipd_comp = components.AGIPD1M(im_dc)
 
+        kw = {
+            "unstack_pulses": False,
+            "pulses": np.nonzero(img_selected),
+        }
+
+        # [n_modules, n_imgs, 2, x, y]
+        raw_data = agipd_comp.get_array("image.data", **kw)[0]
+        n_img = raw_data.shape[0]
+
+        data_dict['nImg'][0] = n_img
+        data_dict['data'][:n_img] = raw_data[:, 0]
+        data_dict['rawgain'][:n_img] = raw_data[:, 1]
+        data_dict['cellId'][:n_img] = agipd_comp.get_array(
+            "image.cellId", **kw)[0]
+        data_dict['pulseId'][:n_img] = agipd_comp.get_array(
+            "image.pulseId", **kw)[0]
+        data_dict['trainId'][:n_img] = agipd_comp.get_array(
+            "image.trainId", **kw)[0]
         return n_img
 
     def write_file(self, i_proc, file_name, ofile_name):
@@ -437,7 +465,7 @@ class AgipdCorrections:
         """
 
         module_idx = int(file_name.split('/')[-1].split('-')[2][-2:])
-        agipd_base = self.h5_data_path.format(module_idx)
+        agipd_base = f'INSTRUMENT/{self.h5_data_path}/'.format(module_idx)
         idx_base = self.h5_index_path.format(module_idx)
         data_path = f'{agipd_base}/image'
         data_dict = self.shared_dict[i_proc]
@@ -821,78 +849,32 @@ class AgipdCorrections:
         # Copy the data across into the existing shared-memory array
         mask[...] = msk[...]
 
-    def get_valid_image_idx(
-        self, idx_base: str, infile: str, raw_format_version: int = 2
-    ):
-        """Return the indices of valid data"""
-        if raw_format_version == 2:
-            idxtrains = np.squeeze(infile['/INDEX/trainId'])
-
-            # Check against train ID filter list, if any
-            if self.train_ids is not None:
-                valid = np.in1d(idxtrains, self.train_ids)
+    def get_valid_image_idx(self, im_dc: DataCollection) -> list:  # noqa
+        """Return a list of valid train ids.
 
-                if not valid.any():
-                    # Shortcut to avoid any further loading.
-                    return valid, 0, 0, idxtrains, np.zeros(0, dtype=np.int32)
-            else:
-                valid = np.ones_like(idxtrains, dtype=bool)
-
-            # Load count and offsets and filter for non-emtpy trains.
-            count = np.squeeze(infile[idx_base + "image/count"])
-            first = np.squeeze(infile[idx_base + "image/first"])
-            valid &= count != 0
-
-            # Validate that train indices values fall
-            # between medianTrain +- 1e4
-            medianTrain = np.median(idxtrains)
-            lowok = (idxtrains > medianTrain - 1e4)
-            highok = (idxtrains < medianTrain + 1e4)
-            valid &= lowok & highok
-
-            if not valid.any():
-                # Shortcut if no valid trains are left.
-                return valid, 0, 0, idxtrains, np.zeros(0, dtype=np.int32)
-
-            # Last index = last valid train + max. number of memory cells
-            last_index = int(first[valid][-1] + count[valid][-1])
-            first_index = int(first[valid][0])
-            # do actual validity filtering:
-            validc, validf = count[valid], first[valid]
-
-            # Creating an array of validated indices.
-            # If all indices were validated this array will be the same,
-            # as what is stored at /DET/image/trainId
-            valid_indices = np.concatenate(
-                [
-                    np.arange(validf[i], validf[i] + validc[i])
-                    for i in range(validf.size)
-                ],
-                axis=0,
-            )
-            valid_indices = np.squeeze(valid_indices).astype(np.int32)
-
-        elif raw_format_version == 1:
-            status = np.squeeze(infile[idx_base + "image/status"])
-            if np.count_nonzero(status != 0) == 0:
-                raise IOError(f"File {infile} has no valid counts")
-            last = np.squeeze(infile[idx_base + "image/last"])
-            first = np.squeeze(infile[idx_base + "image/first"])
-            valid = status != 0
-            last_index = int(last[status != 0][-1]) + 1
-            first_index = int(first[status != 0][0])
-
-            idxtrains = np.squeeze(infile["/INDEX/trainId"])
-            medianTrain = np.nanmedian(idxtrains)
-            lowok = (idxtrains > medianTrain - 1e4)
-            highok = (idxtrains < medianTrain + 1e4)
-            valid &= lowok & highok
-            valid_indices = None
+        Exclude non-valid train ids from past or future.
+        """
+        dc_trains = im_dc.train_ids
+        if len(dc_trains) == 0:
+            return 0
+        # Check against train ID filter list, if any
+        if self.train_ids is not None:
+            valid = np.in1d(dc_trains, self.train_ids)
         else:
-            raise AttributeError(
-                f"Not a known raw format version: {raw_format_version}")
+            valid = np.ones_like(dc_trains, dtype=bool)
+
+        # Train indices are of type=f32
+        # Validate that train indices values fall
+        # between medianTrain +- 1e4
+        medianTrain = np.nanmedian(dc_trains)
+        lowok = (dc_trains > medianTrain - 1e4)
+        highok = (dc_trains < medianTrain + 1e4)
+        valid &= lowok & highok
+
+        # exclude non valid trains
+        valid_trains = valid * dc_trains
 
-        return (valid, first_index, last_index, idxtrains, valid_indices)
+        return valid_trains[valid_trains!=0]
 
     def apply_selected_pulses(self, i_proc: int) -> int:
         """Select sharedmem data indices to correct based on selected
@@ -939,53 +921,6 @@ class AgipdCorrections:
 
         return n_img
 
-    def gen_valid_range(self, first_index: int, last_index: int,
-                        max_cells: int, allcells: np.array,
-                        allpulses: np.array,
-                        valid_indices: Optional[np.array] = None,
-                        img_selected: Optional[np.array] = None,
-                        ) -> np.array:
-        """ Validate the arrays of image.cellId and image.pulseId
-        to check presence of data and to avoid empty trains.
-
-        selected pulses range given from the AGIPD correction notebook
-        is taken into account if apply_sel_pulses is True
-
-        :param first_index: first index of image data
-        :param last_index: last index of image data
-        :param max_cells: number of memory cells to correct
-        :param allcells: array of image.cellsIds of raw data
-        :param allpulses: array of image.pulseIds of raw data
-        :param valid_indices: validated indices of image.data
-        :param img_selected: mask of selected cells for given
-            range of trains
-        :return firange: An array of validated image.data
-                         indices to correct
-        """
-
-        if valid_indices is not None:
-            allcells = allcells[valid_indices]
-            allpulses = allpulses[valid_indices]
-        else:
-            allcells = allcells[first_index:last_index]
-            allpulses = allpulses[first_index:last_index]
-
-        can_calibrate = (allcells < max_cells)
-
-        if img_selected is not None:
-            can_calibrate &= img_selected
-
-        if not np.any(can_calibrate):
-            return
-
-        if valid_indices is None:
-            firange = np.arange(first_index, last_index)
-        else:
-            firange = valid_indices
-        firange = firange[can_calibrate]
-
-        return firange
-
     def copy_and_sanitize_non_cal_data(self, infile, outfile, agipd_base,
                                        idx_base, trains):
         """ Copy and sanitize data in `infile` that is not touched by
@@ -1551,7 +1486,6 @@ class CellRange(CellSelection):
         return np.tile(self._sel_for_cm(self.flag, self.flag_cm, cm),
                        len(train_sel))
 
-
 class LitFrameSelection(CellSelection):
     """Selection of detector memery cells indicated as lit frames
     by the AgipdLitFrameFinder