Merge branch 'feat/read_with_EXtra_data' into 'master'

[EPIX100] Reading ePix100 data with EXtra-data, Correct and Dark notebooks. See merge request detectors/pycalibration!500

Merge branch 'feat/read_with_EXtra_data' into 'master'
[EPIX100] Reading ePix100 data with EXtra-data, Correct and Dark notebooks. See merge request detectors/pycalibration!500
c3234477 · Karim Ahmed · b468a401 · 183003a7 · c3234477 · c3234477
Commit c3234477 authored 3 years ago by Karim Ahmed
--- a/notebooks/ePix100/Characterize_Darks_ePix100_NBC.ipynb
+++ b/notebooks/ePix100/Characterize_Darks_ePix100_NBC.ipynb
@@ -6,9 +6,9 @@
   "source": [
    "# ePix100 Dark Characterization\n",
    "\n",
-    "Author: M. Karnevskiy, Version 1.0\n",
+    "Author: European XFEL Detector Group, Version: 2.0\n",
    "\n",
-    "The following notebook provides dark image analysis of the ePix100 detector.\n",
+    "The following notebook provides dark image analysis and calibration constants of the ePix100 detector.\n",
    "\n",
    "Dark characterization evaluates offset and noise of the detector and gives information about bad pixels. Resulting maps are saved as .h5 files for a latter use and injected to calibration DB."
   ]
@@ -19,33 +19,40 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "cluster_profile = \"noDB\" # ipcluster profile to use\n",
    "in_folder = '/gpfs/exfel/exp/HED/202030/p900136/raw' # input folder, required\n",
-    "out_folder = '/gpfs/exfel/data/scratch/ahmedk/test/HED_dark/' # output folder, required\n",
+    "out_folder = '' # output folder, required\n",
    "sequence = 0 # sequence file to use\n",
    "run = 182 # which run to read data from, required\n",
    "\n",
+    "# Parameters for accessing the raw data.\n",
    "karabo_id = \"HED_IA1_EPX100-2\" # karabo karabo_id\n",
    "karabo_da = [\"EPIX02\"]  # data aggregators\n",
-    "receiver_id = \"RECEIVER\" # inset for receiver devices\n",
+    "receiver_template = \"RECEIVER\" # detector receiver template for accessing raw data files\n",
    "path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5' # the template to use to access data\n",
-    "h5path = '/INSTRUMENT/{}/DET/{}:daqOutput/data/image/pixels' # path in the HDF5 file to images\n",
-    "h5path_t = '/INSTRUMENT/{}/DET/{}:daqOutput/data/backTemp'  # path to find temperature at\n",
-    "h5path_cntrl = '/CONTROL/{}/DET'  # path to control data\n",
+    "instrument_source_template = '{}/DET/{}:daqOutput' # instrument detector data source in h5files\n",
    "\n",
+    "# Parameters for the calibration database.\n",
    "use_dir_creation_date = True\n",
    "cal_db_interface = \"tcp://max-exfl016:8020\" # calibration DB interface to use\n",
    "cal_db_timeout = 300000 # timeout on caldb requests\n",
    "db_output = False # Output constants to the calibration database\n",
    "local_output = True # output constants locally\n",
    "\n",
-    "number_dark_frames = 0 # number of images to be used, if set to 0 all available images are used\n",
-    "temp_limits = 5 # limit for parameter Operational temperature\n",
-    "db_module = 'ePix100_M17' # detector karabo_id\n",
+    "# Conditions used for injected calibration constants.\n",
    "bias_voltage = 200 # bias voltage\n",
    "in_vacuum = False # detector operated in vacuum\n",
    "fix_temperature = 290. # fix temperature to this value\n",
-    "operation_mode = ''  # Detector operation mode, optional"
+    "temp_limits = 5 # limit for parameter Operational temperature\n",
+    "\n",
+    "# Parameters used during selecting raw data trains.\n",
+    "min_trains = 1 # Minimum number of trains that should be available to process dark constants. Default 1.\n",
+    "max_trains = 1000  # Maximum number of trains to use for processing dark constants. Set to 0 to use all available trains.\n",
+    "\n",
+    "# Don't delete! myMDC sends this by default.\n",
+    "operation_mode = ''  # Detector operation mode, optional\n",
+    "\n",
+    "# TODO: delete after removing from calibration_configurations\n",
+    "db_module = ''  # ID of module in calibration database, this parameter is ignore in the notebook. TODO: remove from calibration_configurations."
   ]
  },
  {
@@ -57,14 +64,13 @@
    "import os\n",
    "import warnings\n",
    "\n",
-    "warnings.filterwarnings('ignore')\n",
-    "\n",
-    "import h5py\n",
-    "import matplotlib.pyplot as plt\n",
-    "from IPython.display import Latex, Markdown, display\n",
-    "\n",
-    "%matplotlib inline\n",
    "import numpy as np\n",
+    "import pasha as psh\n",
+    "from extra_data import RunDirectory\n",
+    "\n",
+    "import XFELDetAna.xfelprofiler as xprof\n",
+    "from XFELDetAna import xfelpyanatools as xana\n",
+    "from XFELDetAna.plotting.util import prettyPlotting\n",
    "from cal_tools.tools import (\n",
    "    get_dir_creation_date,\n",
    "    get_pdu_from_db,\n",
@@ -73,32 +79,25 @@
    "    save_const_to_h5,\n",
    "    send_to_db,\n",
    ")\n",
-    "from iCalibrationDB import Conditions, Constants, Detectors, Versions\n",
-    "from iCalibrationDB.detectors import DetectorTypes\n",
-    "from XFELDetAna.util import env\n",
+    "from iCalibrationDB import Conditions, Constants"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%matplotlib inline\n",
    "\n",
-    "env.iprofile = cluster_profile\n",
-    "from XFELDetAna import xfelpyanatools as xana\n",
-    "from XFELDetAna import xfelpycaltools as xcal\n",
-    "from XFELDetAna.detectors.fastccd import readerh5 as fastccdreaderh5\n",
-    "from XFELDetAna.plotting.util import prettyPlotting\n",
+    "warnings.filterwarnings('ignore')\n",
    "\n",
    "prettyPlotting = True\n",
-    "import XFELDetAna.xfelprofiler as xprof\n",
    "\n",
    "profiler = xprof.Profiler()\n",
    "profiler.disable()\n",
-    "from XFELDetAna.xfelreaders import ChunkReader\n",
-    "\n",
-    "h5path = h5path.format(karabo_id, receiver_id)\n",
-    "h5path_t = h5path_t.format(karabo_id, receiver_id)\n",
-    "h5path_cntrl = h5path_cntrl.format(karabo_id)\n",
    "\n",
-    "def nImagesOrLimit(nImages, limit):\n",
-    "    if limit == 0:\n",
-    "        return nImages\n",
-    "    else:\n",
-    "        return min(nImages, limit)"
+    "instrument_src = instrument_source_template.format(karabo_id, receiver_template)"
   ]
  },
  {
@@ -110,19 +109,16 @@
    "# Read report path and create file location tuple to add with the injection\n",
    "proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]\n",
    "file_loc = f'proposal:{proposal} runs:{run}'\n",
-    "\n",
+    "pixels_x = 708\n",
+    "pixels_y = 768\n",
    "report = get_report(out_folder)\n",
    "\n",
-    "x = 708  # rows of the xPix100\n",
-    "y = 768  # columns of the xPix100\n",
-    "\n",
    "ped_dir = os.path.join(in_folder, f\"r{run:04d}\")\n",
    "fp_name = path_template.format(run, karabo_da[0]).format(sequence)\n",
-    "filename = os.path.join(ped_dir, fp_name)\n",
+    "run_dir = RunDirectory(ped_dir)\n",
    "\n",
-    "print(f\"Reading data from: {filename}\\n\")\n",
    "print(f\"Run number: {run}\")\n",
-    "print(f\"HDF5 path: {h5path}\")\n",
+    "print(f\"Instrument H5File source: {instrument_src}\")\n",
    "if use_dir_creation_date:\n",
    "    creation_time = get_dir_creation_date(in_folder, run)\n",
    "    print(f\"Using {creation_time.isoformat()} as creation time\")\n",
@@ -135,60 +131,58 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "sensorSize = [x, y]\n",
-    "chunkSize = 100  #Number of images to read per chunk\n",
-    "\n",
-    "#Sensor area will be analysed according to blocksize\n",
-    "blockSize = [sensorSize[0] // 2, sensorSize[1] // 2]\n",
-    "xcal.defaultBlockSize = blockSize\n",
-    "cpuCores = 4  #Specifies the number of running cpu cores\n",
-    "memoryCells = 1  #No mamery cells\n",
-    "\n",
-    "#Specifies total number of images to proceed\n",
-    "nImages = fastccdreaderh5.getDataSize(filename, h5path)[0]\n",
-    "nImages = nImagesOrLimit(nImages, number_dark_frames)\n",
-    "print(\"\\nNumber of dark images to analyze: \", nImages)\n",
-    "run_parallel = False\n",
-    "\n",
-    "with h5py.File(filename, 'r') as f:\n",
-    "    integration_time = int(f[os.path.join(h5path_cntrl, \"CONTROL\",\"expTime\", \"value\")][0])\n",
-    "    temperature = np.mean(f[h5path_t])/100.\n",
+    "pixel_data = (instrument_src, \"data.image.pixels\")\n",
+    "\n",
+    "# Specifies total number of images to proceed\n",
+    "n_trains = run_dir.get_data_counts(*pixel_data).shape[0]\n",
+    "\n",
+    "# Modify n_trains to process based on given maximum\n",
+    "# and minimun number of trains.\n",
+    "if max_trains:\n",
+    "    n_trains = min(max_trains, n_trains)\n",
+    "\n",
+    "if n_trains < min_trains:\n",
+    "    raise ValueError(\n",
+    "        f\"Less than {min_trains} trains are available in RAW data.\"\n",
+    "         \" Not enough data to process darks.\")\n",
+    "\n",
+    "print(f\"Number of dark images to analyze: {n_trains}.\")\n",
+    "\n",
+    "integration_time = int(run_dir.get_array(\n",
+    "    f\"{karabo_id}/DET/CONTROL\",\n",
+    "    \"expTime.value\")[0])\n",
+    "temperature = np.mean(run_dir.get_array(\n",
+    "    instrument_src,\n",
+    "    \"data.backTemp\").values) / 100.\n",
+    "\n",
+    "if fix_temperature != 0:\n",
+    "    temperature_k = fix_temperature\n",
+    "    print(\"Temperature is fixed!\")\n",
+    "else:\n",
    "    temperature_k = temperature + 273.15\n",
-    "    if fix_temperature != 0:\n",
-    "        temperature_k = fix_temperature\n",
-    "        print(\"Temperature is fixed!\")\n",
-    "    print(f\"Bias voltage is {bias_voltage} V\")\n",
-    "    print(f\"Detector integration time is set to {integration_time}\")\n",
-    "    print(f\"Mean temperature was {temperature:0.2f} °C / {temperature_k:0.2f} K\")\n",
-    "    print(f\"Operated in vacuum: {in_vacuum} \")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "reader = ChunkReader(filename, fastccdreaderh5.readData,\n",
-    "                     nImages, chunkSize,\n",
-    "                     path=h5path,\n",
-    "                     pixels_x=sensorSize[0],\n",
-    "                     pixels_y=sensorSize[1], )"
+    "\n",
+    "print(f\"Bias voltage is {bias_voltage} V\")\n",
+    "print(f\"Detector integration time is set to {integration_time}\")\n",
+    "print(f\"Mean temperature was {temperature:0.2f} °C / {temperature_k:0.2f} K\")\n",
+    "print(f\"Operated in vacuum: {in_vacuum} \")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
   "outputs": [],
   "source": [
-    "noiseCal = xcal.NoiseCalculator(sensorSize, memoryCells,\n",
-    "                                cores=cpuCores, blockSize=blockSize,\n",
-    "                                parallel=run_parallel)\n",
-    "histCalRaw = xcal.HistogramCalculator(sensorSize, bins=1000,\n",
-    "                                      range=[0, 10000], parallel=False,\n",
-    "                                      memoryCells=memoryCells,\n",
-    "                                      cores=cpuCores, blockSize=blockSize)"
+    "data_dc = run_dir.select(\n",
+    "    *pixel_data, require_all=True).select_trains(np.s_[:n_trains])\n",
+    "print(f\"Reading data from: {data_dc.files}\\n\")\n",
+    "\n",
+    "data = data_dc[pixel_data].ndarray()\n",
+    "\n",
+    "noise_data = np.std(data, axis=0)\n",
+    "offset_data = np.mean(data, axis=0)"
   ]
  },
  {
@@ -197,18 +191,10 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "for data in reader.readChunks():\n",
-    "    dx = np.count_nonzero(data, axis=(0, 1))\n",
-    "    data = data[:, :, dx != 0]\n",
-    "    histCalRaw.fill(data)\n",
-    "    noiseCal.fill(data) #Fill calculators with data\n",
-    "    \n",
    "constant_maps = {}\n",
-    "constant_maps['Offset'] = noiseCal.getOffset()  #Produce offset map\n",
-    "constant_maps['Noise'] = noiseCal.get()  #Produce noise map\n",
-    "\n",
-    "noiseCal.reset()  #Reset noise calculator\n",
-    "print(\"Initial maps were created\")"
+    "constant_maps['Offset'] = offset_data[..., np.newaxis]\n",
+    "constant_maps['Noise'] = noise_data[..., np.newaxis]\n",
+    "print(\"Initial constant maps are created\")"
   ]
  },
  {
@@ -251,31 +237,26 @@
    "fig = xana.heatmapPlot(constant_maps['Offset'][:, :, 0],\n",
    "                       x_label='Columns', y_label='Rows',\n",
    "                       lut_label='Offset (ADU)',\n",
-    "                       x_range=(0, y),\n",
-    "                       y_range=(0, x), vmin=1000, vmax=4000)\n",
+    "                       x_range=(0, pixels_y),\n",
+    "                       y_range=(0, pixels_x), vmin=1000, vmax=4000)\n",
    "\n",
    "fig = xana.heatmapPlot(constant_maps['Noise'][:, :, 0],\n",
    "                       x_label='Columns', y_label='Rows',\n",
    "                       lut_label='Noise (ADU)',\n",
-    "                       x_range=(0, y),\n",
-    "                       y_range=(0, x), vmax=2 * np.mean(constant_maps['Noise']))"
+    "                       x_range=(0, pixels_y),\n",
+    "                       y_range=(0, pixels_x), vmax=2 * np.mean(constant_maps['Noise']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "# Save constants to DB\n",
    "dclass=\"ePix100\"\n",
    "md = None\n",
    "\n",
-    "# If PDU(db_module) is not given with input parameters \n",
-    "# retrieve the connected physical detector unit\n",
-    "\n",
    "for const_name in constant_maps.keys():\n",
    "    det = getattr(Constants, dclass)\n",
    "    const = getattr(det, const_name)()\n",
@@ -291,14 +272,9 @@
    "            parm.lower_deviation = temp_limits\n",
    "            parm.upper_deviation = temp_limits\n",
    "\n",
-    "    # This should be used in case of running notebook \n",
-    "    # by a different method other than myMDC which already\n",
-    "    # sends CalCat info.\n",
-    "    # TODO: Set db_module to \"\" by default in the first cell\n",
-    "    if not db_module:\n",
-    "        db_module = get_pdu_from_db(karabo_id, karabo_da, const,\n",
-    "                                    condition, cal_db_interface,\n",
-    "                                    snapshot_at=creation_time)[0]\n",
+    "    db_module = get_pdu_from_db(karabo_id, karabo_da, const,\n",
+    "                                condition, cal_db_interface,\n",
+    "                                snapshot_at=creation_time)[0]\n",
    "\n",
    "    if db_output:\n",
    "        md = send_to_db(db_module, karabo_id, const, condition,\n",
@@ -317,13 +293,6 @@
    "      f\"• Temperature: {temperature_k}\\n• In Vacuum: {in_vacuum}\\n\"\n",
    "      f\"• Creation time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\\n\")"
   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
  }
 ],
 "metadata": {
@@ -342,7 +311,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.6.7"
+   "version": "3.8.11"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,

 %% Cell type:markdown id: tags:

 # ePix100 Dark Characterization

-Author: M. Karnevskiy, Version 1.0
+Author: European XFEL Detector Group, Version: 2.0

-The following notebook provides dark image analysis of the ePix100 detector.
+The following notebook provides dark image analysis and calibration constants of the ePix100 detector.

 Dark characterization evaluates offset and noise of the detector and gives information about bad pixels. Resulting maps are saved as .h5 files for a latter use and injected to calibration DB.

 %% Cell type:code id: tags:

 ``` python
-cluster_profile = "noDB" # ipcluster profile to use
 in_folder = '/gpfs/exfel/exp/HED/202030/p900136/raw' # input folder, required
-out_folder = '/gpfs/exfel/data/scratch/ahmedk/test/HED_dark/' # output folder, required
+out_folder = '' # output folder, required
 sequence = 0 # sequence file to use
 run = 182 # which run to read data from, required

+# Parameters for accessing the raw data.
 karabo_id = "HED_IA1_EPX100-2" # karabo karabo_id
 karabo_da = ["EPIX02"]  # data aggregators
-receiver_id = "RECEIVER" # inset for receiver devices
+receiver_template = "RECEIVER" # detector receiver template for accessing raw data files
 path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5' # the template to use to access data
-h5path = '/INSTRUMENT/{}/DET/{}:daqOutput/data/image/pixels' # path in the HDF5 file to images
-h5path_t = '/INSTRUMENT/{}/DET/{}:daqOutput/data/backTemp'  # path to find temperature at
-h5path_cntrl = '/CONTROL/{}/DET'  # path to control data
+instrument_source_template = '{}/DET/{}:daqOutput' # instrument detector data source in h5files

+# Parameters for the calibration database.
 use_dir_creation_date = True
 cal_db_interface = "tcp://max-exfl016:8020" # calibration DB interface to use
 cal_db_timeout = 300000 # timeout on caldb requests
 db_output = False # Output constants to the calibration database
 local_output = True # output constants locally

-number_dark_frames = 0 # number of images to be used, if set to 0 all available images are used
-temp_limits = 5 # limit for parameter Operational temperature
-db_module = 'ePix100_M17' # detector karabo_id
+# Conditions used for injected calibration constants.
 bias_voltage = 200 # bias voltage
 in_vacuum = False # detector operated in vacuum
 fix_temperature = 290. # fix temperature to this value
+temp_limits = 5 # limit for parameter Operational temperature
+
+# Parameters used during selecting raw data trains.
+min_trains = 1 # Minimum number of trains that should be available to process dark constants. Default 1.
+max_trains = 1000  # Maximum number of trains to use for processing dark constants. Set to 0 to use all available trains.
+
+# Don't delete! myMDC sends this by default.
 operation_mode = ''  # Detector operation mode, optional
+
+# TODO: delete after removing from calibration_configurations
+db_module = ''  # ID of module in calibration database, this parameter is ignore in the notebook. TODO: remove from calibration_configurations.
 ```

 %% Cell type:code id: tags:

 ``` python
 import os
 import warnings

-warnings.filterwarnings('ignore')
-
-import h5py
-import matplotlib.pyplot as plt
-from IPython.display import Latex, Markdown, display
-
-%matplotlib inline
 import numpy as np
+import pasha as psh
+from extra_data import RunDirectory
+
+import XFELDetAna.xfelprofiler as xprof
+from XFELDetAna import xfelpyanatools as xana
+from XFELDetAna.plotting.util import prettyPlotting
 from cal_tools.tools import (
    get_dir_creation_date,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    save_const_to_h5,
    send_to_db,
 )
-from iCalibrationDB import Conditions, Constants, Detectors, Versions
-from iCalibrationDB.detectors import DetectorTypes
-from XFELDetAna.util import env
+from iCalibrationDB import Conditions, Constants
+```

-env.iprofile = cluster_profile
-from XFELDetAna import xfelpyanatools as xana
-from XFELDetAna import xfelpycaltools as xcal
-from XFELDetAna.detectors.fastccd import readerh5 as fastccdreaderh5
-from XFELDetAna.plotting.util import prettyPlotting
+%% Cell type:code id: tags:
+
+``` python
+%matplotlib inline
+
+warnings.filterwarnings('ignore')

 prettyPlotting = True
-import XFELDetAna.xfelprofiler as xprof

 profiler = xprof.Profiler()
 profiler.disable()
-from XFELDetAna.xfelreaders import ChunkReader

-h5path = h5path.format(karabo_id, receiver_id)
-h5path_t = h5path_t.format(karabo_id, receiver_id)
-h5path_cntrl = h5path_cntrl.format(karabo_id)
-
-def nImagesOrLimit(nImages, limit):
-    if limit == 0:
-        return nImages
-    else:
-        return min(nImages, limit)
+instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read report path and create file location tuple to add with the injection
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = f'proposal:{proposal} runs:{run}'
-
+pixels_x = 708
+pixels_y = 768
 report = get_report(out_folder)

-x = 708  # rows of the xPix100
-y = 768  # columns of the xPix100
-
 ped_dir = os.path.join(in_folder, f"r{run:04d}")
 fp_name = path_template.format(run, karabo_da[0]).format(sequence)
-filename = os.path.join(ped_dir, fp_name)
+run_dir = RunDirectory(ped_dir)

-print(f"Reading data from: {filename}\n")
 print(f"Run number: {run}")
-print(f"HDF5 path: {h5path}")
+print(f"Instrument H5File source: {instrument_src}")
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time.isoformat()} as creation time")
 os.makedirs(out_folder, exist_ok=True)
 ```

 %% Cell type:code id: tags:

 ``` python
-sensorSize = [x, y]
-chunkSize = 100  #Number of images to read per chunk
+pixel_data = (instrument_src, "data.image.pixels")
+
+# Specifies total number of images to proceed
+n_trains = run_dir.get_data_counts(*pixel_data).shape[0]

-#Sensor area will be analysed according to blocksize
-blockSize = [sensorSize[0] // 2, sensorSize[1] // 2]
-xcal.defaultBlockSize = blockSize
-cpuCores = 4  #Specifies the number of running cpu cores
-memoryCells = 1  #No mamery cells
-
-#Specifies total number of images to proceed
-nImages = fastccdreaderh5.getDataSize(filename, h5path)[0]
-nImages = nImagesOrLimit(nImages, number_dark_frames)
-print("\nNumber of dark images to analyze: ", nImages)
-run_parallel = False
-
-with h5py.File(filename, 'r') as f:
-    integration_time = int(f[os.path.join(h5path_cntrl, "CONTROL","expTime", "value")][0])
-    temperature = np.mean(f[h5path_t])/100.
+# Modify n_trains to process based on given maximum
+# and minimun number of trains.
+if max_trains:
+    n_trains = min(max_trains, n_trains)
+
+if n_trains < min_trains:
+    raise ValueError(
+        f"Less than {min_trains} trains are available in RAW data."
+         " Not enough data to process darks.")
+
+print(f"Number of dark images to analyze: {n_trains}.")
+
+integration_time = int(run_dir.get_array(
+    f"{karabo_id}/DET/CONTROL",
+    "expTime.value")[0])
+temperature = np.mean(run_dir.get_array(
+    instrument_src,
+    "data.backTemp").values) / 100.
+
+if fix_temperature != 0:
+    temperature_k = fix_temperature
+    print("Temperature is fixed!")
+else:
    temperature_k = temperature + 273.15
-    if fix_temperature != 0:
-        temperature_k = fix_temperature
-        print("Temperature is fixed!")
-    print(f"Bias voltage is {bias_voltage} V")
-    print(f"Detector integration time is set to {integration_time}")
-    print(f"Mean temperature was {temperature:0.2f} °C / {temperature_k:0.2f} K")
-    print(f"Operated in vacuum: {in_vacuum} ")
+
+print(f"Bias voltage is {bias_voltage} V")
+print(f"Detector integration time is set to {integration_time}")
+print(f"Mean temperature was {temperature:0.2f} °C / {temperature_k:0.2f} K")
+print(f"Operated in vacuum: {in_vacuum} ")
 ```

 %% Cell type:code id: tags:

 ``` python
-reader = ChunkReader(filename, fastccdreaderh5.readData,
-                     nImages, chunkSize,
-                     path=h5path,
-                     pixels_x=sensorSize[0],
-                     pixels_y=sensorSize[1], )
-```
+data_dc = run_dir.select(
+    *pixel_data, require_all=True).select_trains(np.s_[:n_trains])
+print(f"Reading data from: {data_dc.files}\n")

-%% Cell type:code id: tags:
+data = data_dc[pixel_data].ndarray()

-``` python
-noiseCal = xcal.NoiseCalculator(sensorSize, memoryCells,
-                                cores=cpuCores, blockSize=blockSize,
-                                parallel=run_parallel)
-histCalRaw = xcal.HistogramCalculator(sensorSize, bins=1000,
-                                      range=[0, 10000], parallel=False,
-                                      memoryCells=memoryCells,
-                                      cores=cpuCores, blockSize=blockSize)
+noise_data = np.std(data, axis=0)
+offset_data = np.mean(data, axis=0)
 ```

 %% Cell type:code id: tags:

 ``` python
-for data in reader.readChunks():
-    dx = np.count_nonzero(data, axis=(0, 1))
-    data = data[:, :, dx != 0]
-    histCalRaw.fill(data)
-    noiseCal.fill(data) #Fill calculators with data
-
 constant_maps = {}
-constant_maps['Offset'] = noiseCal.getOffset()  #Produce offset map
-constant_maps['Noise'] = noiseCal.get()  #Produce noise map
-
-noiseCal.reset()  #Reset noise calculator
-print("Initial maps were created")
+constant_maps['Offset'] = offset_data[..., np.newaxis]
+constant_maps['Noise'] = noise_data[..., np.newaxis]
+print("Initial constant maps are created")
 ```

 %% Cell type:code id: tags:

 ``` python
 #**************OFFSET MAP HISTOGRAM***********#
 ho, co = np.histogram(constant_maps['Offset'].flatten(), bins=700)

 do = {'x': co[:-1],
      'y': ho,
      'y_err': np.sqrt(ho[:]),
      'drawstyle': 'bars',
      'color': 'cornflowerblue',
      }

 fig = xana.simplePlot(do, figsize='1col', aspect=2,
                      x_label='Offset (ADU)',
                      y_label="Counts", y_log=True,
                      )

 #*****NOISE MAP HISTOGRAM FROM THE OFFSET CORRECTED DATA*******#
 hn, cn = np.histogram(constant_maps['Noise'].flatten(), bins=200)

 dn = {'x': cn[:-1],
      'y': hn,
      'y_err': np.sqrt(hn[:]),
      'drawstyle': 'bars',
      'color': 'cornflowerblue',
      }

 fig = xana.simplePlot(dn, figsize='1col', aspect=2,
                      x_label='Noise (ADU)',
                      y_label="Counts",
                      y_log=True)

 #**************HEAT MAPS*******************#
 fig = xana.heatmapPlot(constant_maps['Offset'][:, :, 0],
                       x_label='Columns', y_label='Rows',
                       lut_label='Offset (ADU)',
-                       x_range=(0, y),
-                       y_range=(0, x), vmin=1000, vmax=4000)
+                       x_range=(0, pixels_y),
+                       y_range=(0, pixels_x), vmin=1000, vmax=4000)

 fig = xana.heatmapPlot(constant_maps['Noise'][:, :, 0],
                       x_label='Columns', y_label='Rows',
                       lut_label='Noise (ADU)',
-                       x_range=(0, y),
-                       y_range=(0, x), vmax=2 * np.mean(constant_maps['Noise']))
+                       x_range=(0, pixels_y),
+                       y_range=(0, pixels_x), vmax=2 * np.mean(constant_maps['Noise']))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Save constants to DB
 dclass="ePix100"
 md = None

-# If PDU(db_module) is not given with input parameters
-# retrieve the connected physical detector unit
-
 for const_name in constant_maps.keys():
    det = getattr(Constants, dclass)
    const = getattr(det, const_name)()
    const.data = constant_maps[const_name].data

    # set the operating condition
    condition =  getattr(Conditions.Dark, dclass)(bias_voltage=bias_voltage,
                                                  integration_time=integration_time,
                                                  temperature=temperature_k,
                                                  in_vacuum=in_vacuum)
    for parm in condition.parameters:
        if parm.name == "Sensor Temperature":
            parm.lower_deviation = temp_limits
            parm.upper_deviation = temp_limits

-    # This should be used in case of running notebook
-    # by a different method other than myMDC which already
-    # sends CalCat info.
-    # TODO: Set db_module to "" by default in the first cell
-    if not db_module:
-        db_module = get_pdu_from_db(karabo_id, karabo_da, const,
-                                    condition, cal_db_interface,
-                                    snapshot_at=creation_time)[0]
+    db_module = get_pdu_from_db(karabo_id, karabo_da, const,
+                                condition, cal_db_interface,
+                                snapshot_at=creation_time)[0]

    if db_output:
        md = send_to_db(db_module, karabo_id, const, condition,
                        file_loc=file_loc, report_path=report,
                        cal_db_interface=cal_db_interface,
                        creation_time=creation_time,
                        timeout=cal_db_timeout)
    if local_output:
        md = save_const_to_h5(db_module, karabo_id, const, condition,
                              const.data, file_loc, report,
                              creation_time, out_folder)
        print(f"Calibration constant {const_name} is stored locally at {out_folder} \n")

 print("Constants parameter conditions are:\n")
 print(f"• Bias voltage: {bias_voltage}\n• Integration time: {integration_time}\n"
      f"• Temperature: {temperature_k}\n• In Vacuum: {in_vacuum}\n"
      f"• Creation time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
 ```
-
-%% Cell type:code id: tags:
-
-``` python
-```

--- a/notebooks/ePix100/Correction_ePix100_NBC.ipynb
+++ b/notebooks/ePix100/Correction_ePix100_NBC.ipynb
@@ -4,11 +4,11 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# ePIX Data Correction\n",
+    "# ePix100 Data Correction\n",
    "\n",
-    "Authors: Q. Tian S. Hauf M. Cascella, Version 1.0\n",
+    "Author: European XFEL Detector Group, Version: 2.0\n",
    "\n",
-    "The following notebook provides Offset correction of images acquired with the ePix100 detector."
+    "The following notebook provides data correction of images acquired with the ePix100 detector."
   ]
  },
  {
@@ -17,44 +17,49 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "cluster_profile = \"noDB\"  # ipcluster profile to use\n",
-    "in_folder = \"/gpfs/exfel/exp/MID/202121/p002929/raw\"  # input folder, required\n",
+    "in_folder = \"/gpfs/exfel/exp/CALLAB/202031/p900113/raw\"  # input folder, required\n",
    "out_folder = \"\"  # output folder, required\n",
    "sequences = [-1]  # sequences to correct, set to -1 for all, range allowed\n",
-    "run = 126  # which run to read data from, required\n",
+    "sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel\n",
+    "run = 9988  # which run to read data from, required\n",
    "\n",
+    "# Parameters for accessing the raw data.\n",
    "karabo_id = \"MID_EXP_EPIX-1\"  # karabo karabo_id\n",
    "karabo_da = \"EPIX01\"  # data aggregators\n",
-    "db_module = \"ePix100_M15\"  # module id in the database\n",
-    "receiver_id = \"RECEIVER\"  # inset for receiver devices\n",
+    "db_module = \"\"  # module id in the database\n",
+    "receiver_template = \"RECEIVER\"  # detector receiver template for accessing raw data files\n",
    "path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5'  # the template to use to access data\n",
-    "h5path = '/INSTRUMENT/{}/DET/{}:daqOutput/data/image'  # path in the HDF5 file to images\n",
-    "h5path_t = '/INSTRUMENT/{}/DET/{}:daqOutput/data/backTemp'  # path to find temperature at\n",
-    "h5path_cntrl = '/CONTROL/{}/DET'  # path to control data\n",
+    "instrument_source_template = '{}/DET/{}:daqOutput'  # instrument detector data source in h5files\n",
+    "\n",
+    "# Parameters affecting writing corrected data.\n",
+    "chunk_size_idim = 1  # H5 chunking size of output data\n",
+    "overwrite = True  # overwrite output folder\n",
+    "\n",
+    "# Only for testing\n",
+    "limit_images = 0  # ONLY FOR TESTING. process only first N images, 0 - process all.\n",
    "\n",
+    "# Parameters for the calibration database.\n",
    "use_dir_creation_date = True  # date constants injected before directory creation time\n",
    "cal_db_interface = \"tcp://max-exfl016:8015#8025\"  # calibration DB interface to use\n",
    "cal_db_timeout = 300000  # timeout on caldb requests\n",
    "\n",
-    "cpuCores = 4  # Specifies the number of running cpu cores\n",
-    "chunk_size_idim = 1  # H5 chunking size of output data\n",
-    "overwrite = True  # overwrite output folder\n",
-    "limit_images = 0  # process only first N images, 0 - process all\n",
-    "sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel\n",
-    "\n",
+    "# Conditions for retrieving calibration constants.\n",
    "bias_voltage = 200  # bias voltage\n",
    "in_vacuum = False  # detector operated in vacuum\n",
    "fix_temperature = 290.  # fix temperature to this value\n",
+    "temp_deviations = 5.  # temperature deviation for the constant operating conditions\n",
    "gain_photon_energy = 9.0  # Photon energy used for gain calibration\n",
    "photon_energy = 0.  # Photon energy to calibrate in number of photons, 0 for calibration in keV\n",
    "\n",
+    "# Flags to select type of applied corrections.\n",
    "pattern_classification = True  # do clustering.\n",
    "relative_gain = True  # Apply relative gain correction.\n",
    "absolute_gain = True  # Apply absolute gain correction (implies relative gain).\n",
    "common_mode = True  # Apply common mode correction.\n",
-    "cm_min_frac = 0.25 # No CM correction is performed if after masking the ratio of good pixels falls below this \n",
-    "cm_noise_sigma = 5. # CM correction noise standard deviation\n",
    "\n",
+    "# Parameters affecting applied correction.\n",
+    "cm_min_frac = 0.25  # No CM correction is performed if after masking the ratio of good pixels falls below this \n",
+    "cm_noise_sigma = 5.  # CM correction noise standard deviation\n",
    "split_evt_primary_threshold = 7.  # primary threshold for split event correction\n",
    "split_evt_secondary_threshold = 5.  # secondary threshold for split event correction\n",
    "split_evt_mip_threshold = 1000.  # minimum ionizing particle threshold\n",
@@ -72,23 +77,27 @@
   "outputs": [],
   "source": [
    "import tabulate\n",
+    "import traceback\n",
    "import warnings\n",
    "\n",
    "import h5py\n",
+    "import pasha as psh\n",
+    "import multiprocessing\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
-    "from IPython.display import Latex, display\n",
+    "from IPython.display import Latex, Markdown, display\n",
+    "from extra_data import RunDirectory, H5File\n",
    "from pathlib import Path\n",
    "\n",
-    "import XFELDetAna.xfelprofiler as xprof\n",
    "from XFELDetAna import xfelpyanatools as xana\n",
    "from XFELDetAna import xfelpycaltools as xcal\n",
    "from XFELDetAna.plotting.util import prettyPlotting\n",
-    "from XFELDetAna.util import env\n",
+    "from cal_tools import h5_copy_except\n",
    "from cal_tools.tools import (\n",
    "    get_constant_from_db,\n",
    "    get_dir_creation_date,\n",
    ")\n",
+    "from cal_tools.step_timing import StepTimer\n",
    "from iCalibrationDB import (\n",
    "    Conditions,\n",
    "    Constants,\n",
@@ -98,10 +107,6 @@
    "\n",
    "prettyPlotting = True\n",
    "\n",
-    "profiler = xprof.Profiler()\n",
-    "profiler.disable()\n",
-    "env.iprofile = cluster_profile\n",
-    "\n",
    "%matplotlib inline"
   ]
  },
@@ -111,20 +116,12 @@
   "metadata": {},
   "outputs": [],
   "source": [
+    "instrument_src = instrument_source_template.format(karabo_id, receiver_template)\n",
+    "\n",
    "if absolute_gain:\n",
-    "    relative_gain = True"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "h5path = h5path.format(karabo_id, receiver_id)\n",
-    "h5path_t = h5path_t.format(karabo_id, receiver_id)\n",
-    "h5path_cntrl = h5path_cntrl.format(karabo_id)\n",
-    "plot_unit = 'ADU'"
+    "    relative_gain = True\n",
+    "\n",
+    "plot_unit = 'ADU'\n"
   ]
  },
  {
@@ -138,12 +135,14 @@
    "\n",
    "in_folder = Path(in_folder)\n",
    "ped_dir = in_folder / f\"r{run:04d}\"\n",
+    "run_dc = RunDirectory(ped_dir)\n",
+    "\n",
    "fp_name = path_template.format(run, karabo_da)\n",
    "\n",
    "print(f\"Reading from: {ped_dir / fp_name}\")\n",
    "print(f\"Run is: {run}\")\n",
-    "print(f\"HDF5 path: {h5path}\")\n",
-    "print(f\"Data is output to: {out_folder}\")\n",
+    "print(f\"Instrument H5File source: {instrument_src}\")\n",
+    "print(f\"Data corrected files are stored at: {out_folder}\")\n",
    "\n",
    "creation_time = None\n",
    "if use_dir_creation_date:\n",
@@ -158,37 +157,17 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "sensorSize = [x, y]\n",
-    "chunkSize = 100  # Number of images to read per chunk\n",
-    "# Sensor area will be analysed according to blocksize\n",
-    "blockSize = [sensorSize[0]//2, sensorSize[1]//2]\n",
-    "xcal.defaultBlockSize = blockSize\n",
-    "memoryCells = 1  # ePIX has no memory cells\n",
-    "run_parallel = True\n",
-    "\n",
-    "# Read slow data from the first available sequence file.\n",
-    "filename = ped_dir / fp_name.format(\n",
-    "    sequences[0] if sequences[0] != -1 else 0)\n",
-    "with h5py.File(filename, 'r') as f:\n",
-    "    integration_time = int(\n",
-    "        f[f\"{h5path_cntrl}/CONTROL/expTime/value\"][0])\n",
-    "    temperature = np.mean(f[h5path_t]) / 100.\n",
-    "    temperature_k = temperature + 273.15\n",
-    "    if fix_temperature != 0:\n",
-    "        temperature_k = fix_temperature\n",
-    "        print(\"Temperature is fixed!\")\n",
-    "    print(f\"Bias voltage is {bias_voltage} V\")\n",
-    "    print(f\"Detector integration time is set to {integration_time}\")\n",
-    "    print(\n",
-    "        f\"Mean temperature was {temperature:0.2f} °C \"\n",
-    "        f\"/ {temperature_k:0.2f} K at beginning of run\"\n",
-    "    )\n",
-    "    print(f\"Operated in vacuum: {in_vacuum} \")\n",
+    "seq_files = [Path(f.filename) for f in run_dc.select(f\"*{karabo_id}*\").files]\n",
    "\n",
-    "out_folder = Path(out_folder)\n",
-    "if out_folder.is_dir() and not overwrite:\n",
-    "    raise AttributeError(\"Output path exists! Exiting\")\n",
-    "out_folder.mkdir(parents=True, exist_ok=True)"
+    "# If a set of sequences requested to correct,\n",
+    "# adapt seq_files list.\n",
+    "if sequences != [-1]:\n",
+    "    seq_files = [f for f in seq_files if any(f.match(f\"*-S{s:05d}.h5\") for s in sequences)]\n",
+    "\n",
+    "if not len(seq_files):\n",
+    "    raise IndexError(\"No sequence files available for the selected sequences.\")\n",
+    "\n",
+    "print(f\"Processing a total of {len(seq_files)} sequence files\")"
   ]
  },
  {
@@ -197,17 +176,52 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# Glob the right *.h5 fast data files.\n",
-    "seq_files = sorted(ped_dir.glob(f\"*{karabo_da}*.h5\"))\n",
+    "step_timer = StepTimer()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step_timer.start()\n",
    "\n",
-    "# If a set of sequences requested to correct,\n",
-    "# adapt seq_files list.\n",
-    "if sequences != [-1]:\n",
-    "    seq_files = [f for f in seq_files\n",
-    "                 if any(\n",
-    "                     f.match(f\"*-S{s:05d}.h5\") for s in sequences)]\n",
+    "sensorSize = [x, y]\n",
+    "# Sensor area will be analysed according to blocksize\n",
+    "blockSize = [sensorSize[0]//2, sensorSize[1]//2]\n",
+    "xcal.defaultBlockSize = blockSize\n",
+    "memoryCells = 1  # ePIX has no memory cells\n",
+    "run_parallel = False\n",
+    "\n",
+    "# Read control data.\n",
+    "integration_time = int(run_dc.get_run_value(\n",
+    "    f\"{karabo_id}/DET/CONTROL\",\n",
+    "    \"expTime.value\"))\n",
+    "temperature = np.mean(run_dc.get_array(\n",
+    "    f\"{karabo_id}/DET/{receiver_template}:daqOutput\",\n",
+    "    f\"data.backTemp\").values) / 100.\n",
+    "\n",
+    "if fix_temperature != 0:\n",
+    "    temperature_k = fix_temperature\n",
+    "    print(\"Temperature is fixed!\")\n",
+    "else:\n",
+    "    temperature_k = temperature + 273.15\n",
    "\n",
-    "print(f\"Processing a total of {len(seq_files)} sequence files\")"
+    "print(f\"Bias voltage is {bias_voltage} V\")\n",
+    "print(f\"Detector integration time is set to {integration_time}\")\n",
+    "print(\n",
+    "    f\"Mean temperature was {temperature:0.2f} °C \"\n",
+    "    f\"/ {temperature_k:0.2f} K at beginning of the run.\"\n",
+    ")\n",
+    "print(f\"Operated in vacuum: {in_vacuum} \")\n",
+    "\n",
+    "out_folder = Path(out_folder)\n",
+    "if out_folder.is_dir() and not overwrite:\n",
+    "    raise AttributeError(\n",
+    "        \"Output path exists! Exiting as overwriting corrected data is prohibited.\")\n",
+    "out_folder.mkdir(parents=True, exist_ok=True)\n",
+    "step_timer.done_step(f'Reading control parameters.')\n"
   ]
  },
  {
@@ -231,19 +245,17 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
+    "## Retrieving calibration constants\n",
+    "\n",
    "As a first step, dark maps have to be loaded."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
-    "temp_limits = 5.\n",
-    "\n",
    "cond_dict = {\n",
    "    \"bias_voltage\": bias_voltage,\n",
    "    \"integration_time\": integration_time,\n",
@@ -274,8 +286,8 @@
    "    # TODO: Fix this logic.\n",
    "    for parm in condition.parameters:\n",
    "        if parm.name == \"Sensor Temperature\":\n",
-    "            parm.lower_deviation = temp_limits\n",
-    "            parm.upper_deviation = temp_limits\n",
+    "            parm.lower_deviation = temp_deviations\n",
+    "            parm.upper_deviation = temp_deviations\n",
    "\n",
    "    const_data[cname] = get_constant_from_db(\n",
    "        karabo_id=karabo_id,\n",
@@ -295,8 +307,7 @@
    "        \"No gain correction will be applied\"\n",
    "    )\n",
    "    relative_gain = False\n",
-    "    absolute_gain = False\n",
-    "    plot_unit = 'ADU'"
+    "    absolute_gain = False\n"
   ]
  },
  {
@@ -305,12 +316,12 @@
   "metadata": {},
   "outputs": [],
   "source": [
+    "# Initializing some parameters.\n",
+    "hscale = 1\n",
+    "stats = True\n",
    "hrange = np.array([-50, 1000])\n",
    "nbins = hrange[1] - hrange[0]\n",
-    "hscale = 1\n",
-    "\n",
-    "commonModeBlockSize = [x//2, y//2]\n",
-    "stats = True"
+    "commonModeBlockSize = [x//2, y//2]"
   ]
  },
  {
@@ -319,23 +330,23 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "offsetCorrection = xcal.OffsetCorrection(\n",
+    "histCalOffsetCor = xcal.HistogramCalculator(\n",
    "    sensorSize,\n",
-    "    const_data[\"Offset\"],\n",
+    "    bins=nbins,\n",
+    "    range=hrange,\n",
+    "    parallel=run_parallel,\n",
    "    nCells=memoryCells,\n",
-    "    cores=cpuCores,\n",
-    "    gains=None,\n",
-    "    blockSize=blockSize,\n",
-    "    parallel=run_parallel\n",
+    "    blockSize=blockSize\n",
    ")\n",
    "\n",
-    "histCalOffsetCor = xcal.HistogramCalculator(\n",
+    "\n",
+    "# *****************Histogram Calculators****************** #\n",
+    "histCalCor = xcal.HistogramCalculator(\n",
    "    sensorSize,\n",
-    "    bins=nbins,\n",
-    "    range=hrange,\n",
+    "    bins=1050,\n",
+    "    range=[-50, 1000],\n",
    "    parallel=run_parallel,\n",
    "    nCells=memoryCells,\n",
-    "    cores=cpuCores,\n",
    "    blockSize=blockSize\n",
    ")"
   ]
@@ -353,10 +364,8 @@
    "        range=hrange,\n",
    "        parallel=run_parallel,\n",
    "        nCells=memoryCells,\n",
-    "        cores=cpuCores,\n",
    "        blockSize=blockSize,\n",
    "    )\n",
-    "\n",
    "    cmCorrectionB = xcal.CommonModeCorrection(\n",
    "        shape=sensorSize,\n",
    "        blockSize=commonModeBlockSize, \n",
@@ -364,6 +373,7 @@
    "        nCells=memoryCells, \n",
    "        noiseMap=const_data['Noise'],\n",
    "        runParallel=run_parallel,\n",
+    "        parallel=run_parallel,\n",
    "        stats=stats,\n",
    "        minFrac=cm_min_frac,\n",
    "        noiseSigma=cm_noise_sigma,\n",
@@ -375,6 +385,7 @@
    "        nCells=memoryCells, \n",
    "        noiseMap=const_data['Noise'],\n",
    "        runParallel=run_parallel,\n",
+    "        parallel=run_parallel,\n",
    "        stats=stats,\n",
    "        minFrac=cm_min_frac,\n",
    "        noiseSigma=cm_noise_sigma,\n",
@@ -386,10 +397,11 @@
    "        nCells=memoryCells, \n",
    "        noiseMap=const_data['Noise'],\n",
    "        runParallel=run_parallel,\n",
+    "        parallel=run_parallel,\n",
    "        stats=stats,\n",
    "        minFrac=cm_min_frac,\n",
    "        noiseSigma=cm_noise_sigma,\n",
-    "    )\n"
+    "    )"
   ]
  },
  {
@@ -411,7 +423,6 @@
    "        gain_cnst/const_data[\"RelativeGain\"][..., None],\n",
    "        nCells=memoryCells,\n",
    "        parallel=run_parallel,\n",
-    "        cores=cpuCores,\n",
    "        blockSize=blockSize,\n",
    "        gains=None,\n",
    "    )\n",
@@ -422,7 +433,6 @@
    "        range=hrange,\n",
    "        parallel=run_parallel,\n",
    "        nCells=memoryCells,\n",
-    "        cores=cpuCores,\n",
    "        blockSize=blockSize\n",
    "    )\n",
    "    \n",
@@ -433,7 +443,6 @@
    "            range=hrange*hscale,\n",
    "            parallel=run_parallel,\n",
    "            nCells=memoryCells,\n",
-    "            cores=cpuCores,\n",
    "            blockSize=blockSize\n",
    "        )"
   ]
@@ -453,19 +462,16 @@
    "        split_evt_mip_threshold,\n",
    "        tagFirstSingles=0,\n",
    "        nCells=memoryCells,\n",
-    "        cores=cpuCores,\n",
    "        allowElongated=False,\n",
    "        blockSize=[x, y],\n",
-    "        runParallel=run_parallel,\n",
+    "        parallel=run_parallel,\n",
    "    )\n",
-    "\n",
    "    histCalSECor = xcal.HistogramCalculator(\n",
    "        sensorSize,\n",
    "        bins=nbins,\n",
    "        range=hrange,\n",
    "        parallel=run_parallel,\n",
    "        nCells=memoryCells,\n",
-    "        cores=cpuCores,\n",
    "        blockSize=blockSize,\n",
    "    )\n",
    "    histCalGainCorSingles = xcal.HistogramCalculator(\n",
@@ -474,7 +480,6 @@
    "        range=hrange*hscale,\n",
    "        parallel=run_parallel,\n",
    "        nCells=memoryCells,\n",
-    "        cores=cpuCores,\n",
    "        blockSize=blockSize\n",
    "    )"
   ]
@@ -483,7 +488,89 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "Applying corrections"
+    "## Applying corrections"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def correct_train(wid, index, tid, d):\n",
+    "\n",
+    "    d = d[pixel_data[0]][pixel_data[1]][..., np.newaxis].astype(np.float32)\n",
+    "    d = np.compress(\n",
+    "        np.any(d > 0, axis=(0, 1)), d, axis=2)\n",
+    "    \n",
+    "    # Offset correction.\n",
+    "    d -= const_data[\"Offset\"]\n",
+    "\n",
+    "    histCalOffsetCor.fill(d)\n",
+    "    # Common Mode correction.\n",
+    "    if common_mode:\n",
+    "        # Block CM\n",
+    "        d = cmCorrectionB.correct(d)\n",
+    "        # Row CM\n",
+    "        d = cmCorrectionR.correct(d)\n",
+    "        # COL CM\n",
+    "        d = cmCorrectionC.correct(d)\n",
+    "        histCalCMCor.fill(d)\n",
+    "\n",
+    "    # relative gain correction.\n",
+    "    if relative_gain:\n",
+    "        d = gainCorrection.correct(d)\n",
+    "        histCalRelGainCor.fill(d)\n",
+    "\n",
+    "    data[index, ...] = np.squeeze(d)\n",
+    "\n",
+    "    \"\"\"The gain correction is currently applying\n",
+    "    an absolute correction (not a relative correction\n",
+    "    as the implied by the name);\n",
+    "    it changes the scale (the unit of measurement)\n",
+    "    of the data from ADU to either keV or n_of_photons.\n",
+    "    But the pattern classification relies on comparing\n",
+    "    data with the noise map, which is still in ADU.\n",
+    "\n",
+    "    The best solution is to do a relative gain\n",
+    "    correction first and apply the global absolute\n",
+    "    gain to the data at the end, after clustering.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    if pattern_classification:\n",
+    "\n",
+    "        d_clu, patterns = patternClassifier.classify(d)\n",
+    "\n",
+    "        d_clu[d_clu < (split_evt_primary_threshold*const_data[\"Noise\"])] = 0\n",
+    "\n",
+    "        data_patterns[index, ...] = np.squeeze(patterns)\n",
+    "\n",
+    "        data_clu[index, ...] = np.squeeze(d_clu)\n",
+    "\n",
+    "        d_clu[patterns != 100] = np.nan\n",
+    "\n",
+    "        histCalSECor.fill(d_clu)\n",
+    "\n",
+    "    # absolute gain correction\n",
+    "    # changes data from ADU to keV (or n. of photons)\n",
+    "    if absolute_gain:\n",
+    "\n",
+    "        d = d * gain_cnst\n",
+    "        if photon_energy > 0:\n",
+    "            d /= photon_energy\n",
+    "        histCalAbsGainCor.fill(d)\n",
+    "\n",
+    "        if pattern_classification:\n",
+    "            # Modify pattern classification.\n",
+    "            d_clu = d_clu * gain_cnst\n",
+    "            if photon_energy > 0:\n",
+    "                d_clu /= photon_energy\n",
+    "\n",
+    "            histCalGainCorSingles.fill(d_clu)\n",
+    "\n",
+    "            data_clu[index, ...] = np.squeeze(d_clu)\n",
+    "\n",
+    "    histCalCor.fill(d)"
   ]
  },
  {
@@ -492,28 +579,10 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "def copy_and_sanitize_non_cal_data(\n",
-    "    infile: h5py,\n",
-    "    outfile: h5py,\n",
-    "    h5base: str\n",
-    "):\n",
-    "    \"\"\" Copy and sanitize data in `infile`\n",
-    "    that is not touched by `correctEPIX`. \"\"\"\n",
-    "\n",
-    "    if h5base.startswith(\"/\"):\n",
-    "        h5base = h5base[1:]\n",
-    "    dont_copy = [h5base+\"/pixels\"]\n",
-    "\n",
-    "    def visitor(k, item):\n",
-    "        if k not in dont_copy:\n",
-    "            if isinstance(item, h5py.Group):\n",
-    "                outfile.create_group(k)\n",
-    "            elif isinstance(item, h5py.Dataset):\n",
-    "                group = str(k).split(\"/\")\n",
-    "                group = \"/\".join(group[:-1])\n",
-    "                infile.copy(k, outfile[group])\n",
-    "\n",
-    "    infile.visititems(visitor)"
+    "pixel_data = (instrument_src, \"data.image.pixels\")\n",
+    "\n",
+    "# 10 is a number chosen after testing 1 ... 71 parallel threads\n",
+    "context = psh.context.ThreadContext(num_workers=10)"
   ]
  },
  {
@@ -523,85 +592,76 @@
   "outputs": [],
   "source": [
    "for f in seq_files:\n",
-    "    data = None\n",
+    "\n",
+    "    seq_dc = H5File(f)\n",
+    "\n",
+    "    n_imgs = seq_dc.get_data_counts(*pixel_data).shape[0]\n",
+    "\n",
+    "    # Data shape in seq_dc excluding trains with empty images. \n",
+    "    dshape = seq_dc[pixel_data].shape\n",
+    "    dataset_chunk = ((chunk_size_idim,) + dshape[1:])  # e.g. (1, pixels_x, pixels_y) \n",
+    "\n",
+    "    if n_imgs - dshape[0] != 0:\n",
+    "        print(f\"- WARNING: {f} has {n_imgs - dshape[0]} trains with empty data.\")\n",
+    "\n",
+    "    # This parameter is only used for testing.\n",
+    "    if limit_images > 0:\n",
+    "        n_imgs = min(n_imgs, limit_images)\n",
+    "\n",
+    "    data = context.alloc(shape=dshape, dtype=np.float32)\n",
+    "\n",
+    "    if pattern_classification:\n",
+    "        data_clu = context.alloc(shape=dshape, dtype=np.float32)\n",
+    "        data_patterns = context.alloc(shape=dshape, dtype=np.int32)\n",
+    "\n",
+    "    step_timer.start()\n",
+    "\n",
+    "    context.map(\n",
+    "        correct_train, seq_dc.select(\n",
+    "            *pixel_data, require_all=True).select_trains(np.s_[:n_imgs])\n",
+    "    )\n",
+    "    step_timer.done_step(f'Correcting {n_imgs} trains.')\n",
+    "\n",
+    "    # Store detector h5 information in the corrected file\n",
+    "    # and deselect data to correct and store later.\n",
+    "    step_timer.start()\n",
+    "\n",
    "    out_file = out_folder / f.name.replace(\"RAW\", \"CORR\")\n",
-    "    with h5py.File(f, \"r\") as infile, h5py.File(out_file, \"w\") as ofile:  # noqa\n",
-    "        try:\n",
-    "            copy_and_sanitize_non_cal_data(infile, ofile, h5path)\n",
-    "            data = infile[h5path+\"/pixels\"][()]\n",
-    "            data = np.compress(\n",
-    "                np.any(data > 0, axis=(1, 2)), data, axis=0)\n",
-    "            if limit_images > 0:\n",
-    "                data = data[:limit_images, ...]\n",
-    "\n",
-    "            oshape = data.shape\n",
-    "            data = np.moveaxis(data, 0, 2)\n",
-    "            ddset = ofile.create_dataset(\n",
-    "                h5path+\"/pixels\",\n",
-    "                oshape,\n",
-    "                chunks=(chunk_size_idim, oshape[1], oshape[2]),\n",
+    "    data_path = \"INSTRUMENT/\"+instrument_src+\"/data/image\"\n",
+    "    pixels_path = f\"{data_path}/pixels\"\n",
+    "    \n",
+    "    # First copy all raw data source to the corrected file,\n",
+    "    # while excluding the raw data image /data/image/pixels.\n",
+    "    with h5py.File(out_file, 'w') as ofile:\n",
+    "        # Copy RAW non-calibrated sources.\n",
+    "        with h5py.File(f, 'r') as sfile:\n",
+    "            h5_copy_except.h5_copy_except_paths(\n",
+    "                sfile, ofile,\n",
+    "                [pixels_path])\n",
+    "\n",
+    "        # Create dataset in CORR h5 file and add corrected images.\n",
+    "        dataset = ofile.create_dataset(\n",
+    "            pixels_path,\n",
+    "            data=data,\n",
+    "            chunks=dataset_chunk,\n",
+    "            dtype=np.float32)\n",
+    "\n",
+    "        if pattern_classification:\n",
+    "            # Save /data/image//pixels_classified in corrected file.\n",
+    "            datasetc = ofile.create_dataset(\n",
+    "                f\"{data_path}/pixels_classified\",\n",
+    "                data=data_clu,\n",
+    "                chunks=dataset_chunk,\n",
    "                dtype=np.float32)\n",
    "\n",
-    "            # Offset correction.\n",
-    "            data = offsetCorrection.correct(data.astype(np.float32))\n",
-    "            histCalOffsetCor.fill(data)\n",
-    "            \n",
-    "            # Common Mode correction.\n",
-    "            if common_mode:\n",
-    "                # Block CM\n",
-    "                data = cmCorrectionB.correct(data)\n",
-    "                # Row CM\n",
-    "                data = cmCorrectionR.correct(data)\n",
-    "                # COL CM\n",
-    "                data = cmCorrectionC.correct(data)\n",
-    "                histCalCMCor.fill(data)\n",
-    "\n",
-    "            # relative gain correction.\n",
-    "            if relative_gain:\n",
-    "                data = gainCorrection.correct(data.astype(np.float32))\n",
-    "                histCalRelGainCor.fill(data)\n",
-    "\n",
-    "            ddset[...] = np.moveaxis(data, 2, 0)\n",
-    "\n",
-    "            if pattern_classification:\n",
-    "                ddsetc = ofile.create_dataset(\n",
-    "                    h5path+\"/pixels_classified\",\n",
-    "                    oshape,\n",
-    "                    chunks=(chunk_size_idim, oshape[1], oshape[2]),\n",
-    "                    dtype=np.float32, compression=\"gzip\")\n",
-    "\n",
-    "                ddsetp = ofile.create_dataset(\n",
-    "                    h5path+\"/patterns\",\n",
-    "                    oshape,\n",
-    "                    chunks=(chunk_size_idim, oshape[1], oshape[2]),\n",
-    "                    dtype=np.int32, compression=\"gzip\")\n",
-    "\n",
-    "                data_clu, patterns = patternClassifier.classify(data)\n",
-    "\n",
-    "                data_clu[data_clu < (split_evt_primary_threshold*const_data[\"Noise\"])] = 0  # noqa\n",
-    "                ddsetc[...] = np.moveaxis(data_clu, 2, 0)\n",
-    "                ddsetp[...] = np.moveaxis(patterns, 2, 0)\n",
-    "\n",
-    "                data_clu[patterns != 100] = np.nan\n",
-    "                histCalSECor.fill(data_clu)\n",
-    "\n",
-    "            # absolute gain correction\n",
-    "            # changes data from ADU to keV (or n. of photons)\n",
-    "            if absolute_gain:\n",
-    "                data = data * gain_cnst\n",
-    "                if photon_energy > 0:\n",
-    "                    data /= photon_energy\n",
-    "                histCalAbsGainCor.fill(data)\n",
-    "\n",
-    "                if pattern_classification:\n",
-    "                    data_clu = data_clu *gain_cnst\n",
-    "                    if photon_energy > 0:\n",
-    "                        data_clu /= photon_energy\n",
-    "                    ddsetc[...] = np.moveaxis(data_clu, 2, 0)\n",
-    "                    histCalGainCorSingles.fill(data_clu)\n",
-    "\n",
-    "        except Exception as e:\n",
-    "            print(f\"ERROR applying common mode correction for {f}: {e}\")"
+    "            # Save /data/image//patterns in corrected file.\n",
+    "            datasetp = ofile.create_dataset(\n",
+    "                f\"{data_path}/patterns\",\n",
+    "                data=data_patterns,\n",
+    "                chunks=dataset_chunk,\n",
+    "                dtype=np.int32)\n",
+    "\n",
+    "        step_timer.done_step('Storing data.')"
   ]
  },
  {
@@ -610,7 +670,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "ho, eo, co, so = histCalOffsetCor.get()\n",
+    "ho, eo, co, so = histCalCor.get()\n",
    "\n",
    "d = [{\n",
    "    'x': co,\n",
@@ -619,9 +679,21 @@
    "    'drawstyle': 'steps-mid',\n",
    "    'errorstyle': 'bars',\n",
    "    'errorcoarsing': 2,\n",
-    "    'label': 'Offset corr.'\n",
+    "    'label': 'Total corr.'\n",
    "}]\n",
    "\n",
+    "ho, eo, co, so = histCalOffsetCor.get()\n",
+    "\n",
+    "d.append({\n",
+    "    'x': co,\n",
+    "    'y': ho,\n",
+    "    'y_err': np.sqrt(ho[:]),\n",
+    "    'drawstyle': 'steps-mid',\n",
+    "    'errorstyle': 'bars',\n",
+    "    'errorcoarsing': 2,\n",
+    "    'label': 'Offset corr.'\n",
+    "})\n",
+    "\n",
    "if common_mode:\n",
    "    ho, eo, co, so = histCalCMCor.get()\n",
    "    d.append({\n",
@@ -715,7 +787,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Mean Image of last Sequence ##"
+    "## Mean Image of the corrected data"
   ]
  },
  {
@@ -724,20 +796,22 @@
   "metadata": {},
   "outputs": [],
   "source": [
+    "step_timer.start()\n",
    "fig = xana.heatmapPlot(\n",
-    "    np.nanmedian(data, axis=2),\n",
+    "    np.nanmedian(data, axis=0),\n",
    "    x_label='Columns', y_label='Rows',\n",
    "    lut_label=f'Signal ({plot_unit})',\n",
    "    x_range=(0, y),\n",
    "    y_range=(0, x),\n",
-    "    vmin=-50, vmax=50)"
+    "    vmin=-50, vmax=50)\n",
+    "step_timer.done_step(f'Plotting mean image of {data.shape[0]} trains.')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Single Shot of last Sequence ##"
+    "## Single Shot of the corrected data"
   ]
  },
  {
@@ -746,13 +820,15 @@
   "metadata": {},
   "outputs": [],
   "source": [
+    "step_timer.start()\n",
    "fig = xana.heatmapPlot(\n",
-    "    data[..., 0],\n",
+    "    data[0, ...],\n",
    "    x_label='Columns', y_label='Rows',\n",
    "    lut_label=f'Signal ({plot_unit})',\n",
    "    x_range=(0, y),\n",
    "    y_range=(0, x),\n",
-    "    vmin=-50, vmax=50)"
+    "    vmin=-50, vmax=50)\n",
+    "step_timer.done_step(f'Plotting single shot of corrected data.')"
   ]
  }
 ],
@@ -793,5 +869,5 @@
  }
 },
 "nbformat": 4,
- "nbformat_minor": 1
+ "nbformat_minor": 4
 }
 %% Cell type:markdown id: tags:

-# ePIX Data Correction
+# ePix100 Data Correction

-Authors: Q. Tian S. Hauf M. Cascella, Version 1.0
+Author: European XFEL Detector Group, Version: 2.0

-The following notebook provides Offset correction of images acquired with the ePix100 detector.
+The following notebook provides data correction of images acquired with the ePix100 detector.

 %% Cell type:code id: tags:

 ``` python
-cluster_profile = "noDB"  # ipcluster profile to use
-in_folder = "/gpfs/exfel/exp/MID/202121/p002929/raw"  # input folder, required
+in_folder = "/gpfs/exfel/exp/CALLAB/202031/p900113/raw"  # input folder, required
 out_folder = ""  # output folder, required
 sequences = [-1]  # sequences to correct, set to -1 for all, range allowed
-run = 126  # which run to read data from, required
+sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
+run = 9988  # which run to read data from, required

+# Parameters for accessing the raw data.
 karabo_id = "MID_EXP_EPIX-1"  # karabo karabo_id
 karabo_da = "EPIX01"  # data aggregators
-db_module = "ePix100_M15"  # module id in the database
-receiver_id = "RECEIVER"  # inset for receiver devices
+db_module = ""  # module id in the database
+receiver_template = "RECEIVER"  # detector receiver template for accessing raw data files
 path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5'  # the template to use to access data
-h5path = '/INSTRUMENT/{}/DET/{}:daqOutput/data/image'  # path in the HDF5 file to images
-h5path_t = '/INSTRUMENT/{}/DET/{}:daqOutput/data/backTemp'  # path to find temperature at
-h5path_cntrl = '/CONTROL/{}/DET'  # path to control data
+instrument_source_template = '{}/DET/{}:daqOutput'  # instrument detector data source in h5files
+
+# Parameters affecting writing corrected data.
+chunk_size_idim = 1  # H5 chunking size of output data
+overwrite = True  # overwrite output folder

+# Only for testing
+limit_images = 0  # ONLY FOR TESTING. process only first N images, 0 - process all.
+
+# Parameters for the calibration database.
 use_dir_creation_date = True  # date constants injected before directory creation time
 cal_db_interface = "tcp://max-exfl016:8015#8025"  # calibration DB interface to use
 cal_db_timeout = 300000  # timeout on caldb requests

-cpuCores = 4  # Specifies the number of running cpu cores
-chunk_size_idim = 1  # H5 chunking size of output data
-overwrite = True  # overwrite output folder
-limit_images = 0  # process only first N images, 0 - process all
-sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
-
+# Conditions for retrieving calibration constants.
 bias_voltage = 200  # bias voltage
 in_vacuum = False  # detector operated in vacuum
 fix_temperature = 290.  # fix temperature to this value
+temp_deviations = 5.  # temperature deviation for the constant operating conditions
 gain_photon_energy = 9.0  # Photon energy used for gain calibration
 photon_energy = 0.  # Photon energy to calibrate in number of photons, 0 for calibration in keV

+# Flags to select type of applied corrections.
 pattern_classification = True  # do clustering.
 relative_gain = True  # Apply relative gain correction.
 absolute_gain = True  # Apply absolute gain correction (implies relative gain).
 common_mode = True  # Apply common mode correction.
-cm_min_frac = 0.25 # No CM correction is performed if after masking the ratio of good pixels falls below this
-cm_noise_sigma = 5. # CM correction noise standard deviation

+# Parameters affecting applied correction.
+cm_min_frac = 0.25  # No CM correction is performed if after masking the ratio of good pixels falls below this
+cm_noise_sigma = 5.  # CM correction noise standard deviation
 split_evt_primary_threshold = 7.  # primary threshold for split event correction
 split_evt_secondary_threshold = 5.  # secondary threshold for split event correction
 split_evt_mip_threshold = 1000.  # minimum ionizing particle threshold


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import tabulate
+import traceback
 import warnings

 import h5py
+import pasha as psh
+import multiprocessing
 import numpy as np
 import matplotlib.pyplot as plt
-from IPython.display import Latex, display
+from IPython.display import Latex, Markdown, display
+from extra_data import RunDirectory, H5File
 from pathlib import Path

-import XFELDetAna.xfelprofiler as xprof
 from XFELDetAna import xfelpyanatools as xana
 from XFELDetAna import xfelpycaltools as xcal
 from XFELDetAna.plotting.util import prettyPlotting
-from XFELDetAna.util import env
+from cal_tools import h5_copy_except
 from cal_tools.tools import (
    get_constant_from_db,
    get_dir_creation_date,
 )
+from cal_tools.step_timing import StepTimer
 from iCalibrationDB import (
    Conditions,
    Constants,
 )

 warnings.filterwarnings('ignore')

 prettyPlotting = True

-profiler = xprof.Profiler()
-profiler.disable()
-env.iprofile = cluster_profile
-
 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
+instrument_src = instrument_source_template.format(karabo_id, receiver_template)
+
 if absolute_gain:
    relative_gain = True
-```

-%% Cell type:code id: tags:
-
-``` python
-h5path = h5path.format(karabo_id, receiver_id)
-h5path_t = h5path_t.format(karabo_id, receiver_id)
-h5path_cntrl = h5path_cntrl.format(karabo_id)
 plot_unit = 'ADU'
 ```

 %% Cell type:code id: tags:

 ``` python
 x = 708  # rows of the ePix100
 y = 768  # columns of the ePix100

 in_folder = Path(in_folder)
 ped_dir = in_folder / f"r{run:04d}"
+run_dc = RunDirectory(ped_dir)
+
 fp_name = path_template.format(run, karabo_da)

 print(f"Reading from: {ped_dir / fp_name}")
 print(f"Run is: {run}")
-print(f"HDF5 path: {h5path}")
-print(f"Data is output to: {out_folder}")
+print(f"Instrument H5File source: {instrument_src}")
+print(f"Data corrected files are stored at: {out_folder}")

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
 if creation_time:
    print(f"Using {creation_time.isoformat()} as creation time")
 ```

 %% Cell type:code id: tags:

 ``` python
+seq_files = [Path(f.filename) for f in run_dc.select(f"*{karabo_id}*").files]
+
+# If a set of sequences requested to correct,
+# adapt seq_files list.
+if sequences != [-1]:
+    seq_files = [f for f in seq_files if any(f.match(f"*-S{s:05d}.h5") for s in sequences)]
+
+if not len(seq_files):
+    raise IndexError("No sequence files available for the selected sequences.")
+
+print(f"Processing a total of {len(seq_files)} sequence files")
+```
+
+%% Cell type:code id: tags:
+
+``` python
+step_timer = StepTimer()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+step_timer.start()
+
 sensorSize = [x, y]
-chunkSize = 100  # Number of images to read per chunk
 # Sensor area will be analysed according to blocksize
 blockSize = [sensorSize[0]//2, sensorSize[1]//2]
 xcal.defaultBlockSize = blockSize
 memoryCells = 1  # ePIX has no memory cells
-run_parallel = True
+run_parallel = False

-# Read slow data from the first available sequence file.
-filename = ped_dir / fp_name.format(
-    sequences[0] if sequences[0] != -1 else 0)
-with h5py.File(filename, 'r') as f:
-    integration_time = int(
-        f[f"{h5path_cntrl}/CONTROL/expTime/value"][0])
-    temperature = np.mean(f[h5path_t]) / 100.
+# Read control data.
+integration_time = int(run_dc.get_run_value(
+    f"{karabo_id}/DET/CONTROL",
+    "expTime.value"))
+temperature = np.mean(run_dc.get_array(
+    f"{karabo_id}/DET/{receiver_template}:daqOutput",
+    f"data.backTemp").values) / 100.
+
+if fix_temperature != 0:
+    temperature_k = fix_temperature
+    print("Temperature is fixed!")
+else:
    temperature_k = temperature + 273.15
-    if fix_temperature != 0:
-        temperature_k = fix_temperature
-        print("Temperature is fixed!")
-    print(f"Bias voltage is {bias_voltage} V")
-    print(f"Detector integration time is set to {integration_time}")
-    print(
-        f"Mean temperature was {temperature:0.2f} °C "
-        f"/ {temperature_k:0.2f} K at beginning of run"
-    )
-    print(f"Operated in vacuum: {in_vacuum} ")
+
+print(f"Bias voltage is {bias_voltage} V")
+print(f"Detector integration time is set to {integration_time}")
+print(
+    f"Mean temperature was {temperature:0.2f} °C "
+    f"/ {temperature_k:0.2f} K at beginning of the run."
+)
+print(f"Operated in vacuum: {in_vacuum} ")

 out_folder = Path(out_folder)
 if out_folder.is_dir() and not overwrite:
-    raise AttributeError("Output path exists! Exiting")
+    raise AttributeError(
+        "Output path exists! Exiting as overwriting corrected data is prohibited.")
 out_folder.mkdir(parents=True, exist_ok=True)
-```
-
-%% Cell type:code id: tags:
-
-``` python
-# Glob the right *.h5 fast data files.
-seq_files = sorted(ped_dir.glob(f"*{karabo_da}*.h5"))
-
-# If a set of sequences requested to correct,
-# adapt seq_files list.
-if sequences != [-1]:
-    seq_files = [f for f in seq_files
-                 if any(
-                     f.match(f"*-S{s:05d}.h5") for s in sequences)]
-
-print(f"Processing a total of {len(seq_files)} sequence files")
+step_timer.done_step(f'Reading control parameters.')
 ```

 %% Cell type:code id: tags:

 ``` python
 # Table of sequence files to process
 table = [(k, f) for k, f in enumerate(seq_files)]

 if len(table):
    md = display(Latex(tabulate.tabulate(
        table,
        tablefmt='latex',
        headers=["#", "file"]
    )))
 ```

 %% Cell type:markdown id: tags:

+## Retrieving calibration constants
+
 As a first step, dark maps have to be loaded.

 %% Cell type:code id: tags:

 ``` python
-temp_limits = 5.
-
 cond_dict = {
    "bias_voltage": bias_voltage,
    "integration_time": integration_time,
    "temperature": temperature_k,
    "in_vacuum": in_vacuum,
 }

 dark_condition = Conditions.Dark.ePix100(**cond_dict)

 # update conditions with illuminated conditins.
 cond_dict.update({
        "photon_energy": gain_photon_energy
    })

 illum_condition = Conditions.Illuminated.ePix100(**cond_dict)

 const_cond = {
    "Offset": dark_condition,
    "Noise": dark_condition,
    "RelativeGain": illum_condition,
 }

 const_data = dict()

 for cname, condition in const_cond.items():
    if cname == "RelativeGain" and not relative_gain:
        continue
    # TODO: Fix this logic.
    for parm in condition.parameters:
        if parm.name == "Sensor Temperature":
-            parm.lower_deviation = temp_limits
-            parm.upper_deviation = temp_limits
+            parm.lower_deviation = temp_deviations
+            parm.upper_deviation = temp_deviations

    const_data[cname] = get_constant_from_db(
        karabo_id=karabo_id,
        karabo_da=karabo_da,
        constant=getattr(Constants.ePix100, cname)(),
        condition=condition,
        empty_constant=None,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time,
        print_once=2,
        timeout=cal_db_timeout
    )

 if relative_gain and const_data["RelativeGain"] is None:
    print(
        "WARNING: RelativeGain map is requested, but not found.\n"
        "No gain correction will be applied"
    )
    relative_gain = False
    absolute_gain = False
-    plot_unit = 'ADU'
 ```

 %% Cell type:code id: tags:

 ``` python
+# Initializing some parameters.
+hscale = 1
+stats = True
 hrange = np.array([-50, 1000])
 nbins = hrange[1] - hrange[0]
-hscale = 1
-
 commonModeBlockSize = [x//2, y//2]
-stats = True
 ```

 %% Cell type:code id: tags:

 ``` python
-offsetCorrection = xcal.OffsetCorrection(
+histCalOffsetCor = xcal.HistogramCalculator(
    sensorSize,
-    const_data["Offset"],
+    bins=nbins,
+    range=hrange,
+    parallel=run_parallel,
    nCells=memoryCells,
-    cores=cpuCores,
-    gains=None,
-    blockSize=blockSize,
-    parallel=run_parallel
+    blockSize=blockSize
 )

-histCalOffsetCor = xcal.HistogramCalculator(
+
+# *****************Histogram Calculators****************** #
+histCalCor = xcal.HistogramCalculator(
    sensorSize,
-    bins=nbins,
-    range=hrange,
+    bins=1050,
+    range=[-50, 1000],
    parallel=run_parallel,
    nCells=memoryCells,
-    cores=cpuCores,
    blockSize=blockSize
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 if common_mode:
    histCalCMCor = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange,
        parallel=run_parallel,
        nCells=memoryCells,
-        cores=cpuCores,
        blockSize=blockSize,
    )
-
    cmCorrectionB = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='block',
        nCells=memoryCells,
        noiseMap=const_data['Noise'],
        runParallel=run_parallel,
+        parallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
    cmCorrectionR = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='row',
        nCells=memoryCells,
        noiseMap=const_data['Noise'],
        runParallel=run_parallel,
+        parallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
    cmCorrectionC = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='col',
        nCells=memoryCells,
        noiseMap=const_data['Noise'],
        runParallel=run_parallel,
+        parallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
 ```

 %% Cell type:code id: tags:

 ``` python
 if relative_gain:
    gain_cnst = np.median(const_data["RelativeGain"])
    hscale = gain_cnst
    plot_unit = 'keV'
    if photon_energy > 0:
        plot_unit = '$\gamma$'
        hscale /= photon_energy

    gainCorrection = xcal.RelativeGainCorrection(
        sensorSize,
        gain_cnst/const_data["RelativeGain"][..., None],
        nCells=memoryCells,
        parallel=run_parallel,
-        cores=cpuCores,
        blockSize=blockSize,
        gains=None,
    )

    histCalRelGainCor = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange,
        parallel=run_parallel,
        nCells=memoryCells,
-        cores=cpuCores,
        blockSize=blockSize
    )

    if absolute_gain:
        histCalAbsGainCor = xcal.HistogramCalculator(
            sensorSize,
            bins=nbins,
            range=hrange*hscale,
            parallel=run_parallel,
            nCells=memoryCells,
-            cores=cpuCores,
            blockSize=blockSize
        )
 ```

 %% Cell type:code id: tags:

 ``` python
 if pattern_classification :
    patternClassifier = xcal.PatternClassifier(
        [x, y],
        const_data["Noise"],
        split_evt_primary_threshold,
        split_evt_secondary_threshold,
        split_evt_mip_threshold,
        tagFirstSingles=0,
        nCells=memoryCells,
-        cores=cpuCores,
        allowElongated=False,
        blockSize=[x, y],
-        runParallel=run_parallel,
+        parallel=run_parallel,
    )
-
    histCalSECor = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange,
        parallel=run_parallel,
        nCells=memoryCells,
-        cores=cpuCores,
        blockSize=blockSize,
    )
    histCalGainCorSingles = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange*hscale,
        parallel=run_parallel,
        nCells=memoryCells,
-        cores=cpuCores,
        blockSize=blockSize
    )
 ```

 %% Cell type:markdown id: tags:

-Applying corrections
+## Applying corrections

 %% Cell type:code id: tags:

 ``` python
-def copy_and_sanitize_non_cal_data(
-    infile: h5py,
-    outfile: h5py,
-    h5base: str
-):
-    """ Copy and sanitize data in `infile`
-    that is not touched by `correctEPIX`. """
+def correct_train(wid, index, tid, d):
+
+    d = d[pixel_data[0]][pixel_data[1]][..., np.newaxis].astype(np.float32)
+    d = np.compress(
+        np.any(d > 0, axis=(0, 1)), d, axis=2)
+
+    # Offset correction.
+    d -= const_data["Offset"]
+
+    histCalOffsetCor.fill(d)
+    # Common Mode correction.
+    if common_mode:
+        # Block CM
+        d = cmCorrectionB.correct(d)
+        # Row CM
+        d = cmCorrectionR.correct(d)
+        # COL CM
+        d = cmCorrectionC.correct(d)
+        histCalCMCor.fill(d)
+
+    # relative gain correction.
+    if relative_gain:
+        d = gainCorrection.correct(d)
+        histCalRelGainCor.fill(d)
+
+    data[index, ...] = np.squeeze(d)
+
+    """The gain correction is currently applying
+    an absolute correction (not a relative correction
+    as the implied by the name);
+    it changes the scale (the unit of measurement)
+    of the data from ADU to either keV or n_of_photons.
+    But the pattern classification relies on comparing
+    data with the noise map, which is still in ADU.
+
+    The best solution is to do a relative gain
+    correction first and apply the global absolute
+    gain to the data at the end, after clustering.
+    """
+
+    if pattern_classification:
+
+        d_clu, patterns = patternClassifier.classify(d)
+
+        d_clu[d_clu < (split_evt_primary_threshold*const_data["Noise"])] = 0
+
+        data_patterns[index, ...] = np.squeeze(patterns)
+
+        data_clu[index, ...] = np.squeeze(d_clu)
+
+        d_clu[patterns != 100] = np.nan
+
+        histCalSECor.fill(d_clu)
+
+    # absolute gain correction
+    # changes data from ADU to keV (or n. of photons)
+    if absolute_gain:
+
+        d = d * gain_cnst
+        if photon_energy > 0:
+            d /= photon_energy
+        histCalAbsGainCor.fill(d)
+
+        if pattern_classification:
+            # Modify pattern classification.
+            d_clu = d_clu * gain_cnst
+            if photon_energy > 0:
+                d_clu /= photon_energy
+
+            histCalGainCorSingles.fill(d_clu)
+
+            data_clu[index, ...] = np.squeeze(d_clu)
+
+    histCalCor.fill(d)
+```

-    if h5base.startswith("/"):
-        h5base = h5base[1:]
-    dont_copy = [h5base+"/pixels"]
+%% Cell type:code id: tags:

-    def visitor(k, item):
-        if k not in dont_copy:
-            if isinstance(item, h5py.Group):
-                outfile.create_group(k)
-            elif isinstance(item, h5py.Dataset):
-                group = str(k).split("/")
-                group = "/".join(group[:-1])
-                infile.copy(k, outfile[group])
+``` python
+pixel_data = (instrument_src, "data.image.pixels")

-    infile.visititems(visitor)
+# 10 is a number chosen after testing 1 ... 71 parallel threads
+context = psh.context.ThreadContext(num_workers=10)
 ```

 %% Cell type:code id: tags:

 ``` python
 for f in seq_files:
-    data = None
+
+    seq_dc = H5File(f)
+
+    n_imgs = seq_dc.get_data_counts(*pixel_data).shape[0]
+
+    # Data shape in seq_dc excluding trains with empty images.
+    dshape = seq_dc[pixel_data].shape
+    dataset_chunk = ((chunk_size_idim,) + dshape[1:])  # e.g. (1, pixels_x, pixels_y)
+
+    if n_imgs - dshape[0] != 0:
+        print(f"- WARNING: {f} has {n_imgs - dshape[0]} trains with empty data.")
+
+    # This parameter is only used for testing.
+    if limit_images > 0:
+        n_imgs = min(n_imgs, limit_images)
+
+    data = context.alloc(shape=dshape, dtype=np.float32)
+
+    if pattern_classification:
+        data_clu = context.alloc(shape=dshape, dtype=np.float32)
+        data_patterns = context.alloc(shape=dshape, dtype=np.int32)
+
+    step_timer.start()
+
+    context.map(
+        correct_train, seq_dc.select(
+            *pixel_data, require_all=True).select_trains(np.s_[:n_imgs])
+    )
+    step_timer.done_step(f'Correcting {n_imgs} trains.')
+
+    # Store detector h5 information in the corrected file
+    # and deselect data to correct and store later.
+    step_timer.start()
+
    out_file = out_folder / f.name.replace("RAW", "CORR")
-    with h5py.File(f, "r") as infile, h5py.File(out_file, "w") as ofile:  # noqa
-        try:
-            copy_and_sanitize_non_cal_data(infile, ofile, h5path)
-            data = infile[h5path+"/pixels"][()]
-            data = np.compress(
-                np.any(data > 0, axis=(1, 2)), data, axis=0)
-            if limit_images > 0:
-                data = data[:limit_images, ...]
-
-            oshape = data.shape
-            data = np.moveaxis(data, 0, 2)
-            ddset = ofile.create_dataset(
-                h5path+"/pixels",
-                oshape,
-                chunks=(chunk_size_idim, oshape[1], oshape[2]),
+    data_path = "INSTRUMENT/"+instrument_src+"/data/image"
+    pixels_path = f"{data_path}/pixels"
+
+    # First copy all raw data source to the corrected file,
+    # while excluding the raw data image /data/image/pixels.
+    with h5py.File(out_file, 'w') as ofile:
+        # Copy RAW non-calibrated sources.
+        with h5py.File(f, 'r') as sfile:
+            h5_copy_except.h5_copy_except_paths(
+                sfile, ofile,
+                [pixels_path])
+
+        # Create dataset in CORR h5 file and add corrected images.
+        dataset = ofile.create_dataset(
+            pixels_path,
+            data=data,
+            chunks=dataset_chunk,
+            dtype=np.float32)
+
+        if pattern_classification:
+            # Save /data/image//pixels_classified in corrected file.
+            datasetc = ofile.create_dataset(
+                f"{data_path}/pixels_classified",
+                data=data_clu,
+                chunks=dataset_chunk,
                dtype=np.float32)

-            # Offset correction.
-            data = offsetCorrection.correct(data.astype(np.float32))
-            histCalOffsetCor.fill(data)
-
-            # Common Mode correction.
-            if common_mode:
-                # Block CM
-                data = cmCorrectionB.correct(data)
-                # Row CM
-                data = cmCorrectionR.correct(data)
-                # COL CM
-                data = cmCorrectionC.correct(data)
-                histCalCMCor.fill(data)
-
-            # relative gain correction.
-            if relative_gain:
-                data = gainCorrection.correct(data.astype(np.float32))
-                histCalRelGainCor.fill(data)
-
-            ddset[...] = np.moveaxis(data, 2, 0)
-
-            if pattern_classification:
-                ddsetc = ofile.create_dataset(
-                    h5path+"/pixels_classified",
-                    oshape,
-                    chunks=(chunk_size_idim, oshape[1], oshape[2]),
-                    dtype=np.float32, compression="gzip")
-
-                ddsetp = ofile.create_dataset(
-                    h5path+"/patterns",
-                    oshape,
-                    chunks=(chunk_size_idim, oshape[1], oshape[2]),
-                    dtype=np.int32, compression="gzip")
-
-                data_clu, patterns = patternClassifier.classify(data)
-
-                data_clu[data_clu < (split_evt_primary_threshold*const_data["Noise"])] = 0  # noqa
-                ddsetc[...] = np.moveaxis(data_clu, 2, 0)
-                ddsetp[...] = np.moveaxis(patterns, 2, 0)
-
-                data_clu[patterns != 100] = np.nan
-                histCalSECor.fill(data_clu)
-
-            # absolute gain correction
-            # changes data from ADU to keV (or n. of photons)
-            if absolute_gain:
-                data = data * gain_cnst
-                if photon_energy > 0:
-                    data /= photon_energy
-                histCalAbsGainCor.fill(data)
-
-                if pattern_classification:
-                    data_clu = data_clu *gain_cnst
-                    if photon_energy > 0:
-                        data_clu /= photon_energy
-                    ddsetc[...] = np.moveaxis(data_clu, 2, 0)
-                    histCalGainCorSingles.fill(data_clu)
+            # Save /data/image//patterns in corrected file.
+            datasetp = ofile.create_dataset(
+                f"{data_path}/patterns",
+                data=data_patterns,
+                chunks=dataset_chunk,
+                dtype=np.int32)

-        except Exception as e:
-            print(f"ERROR applying common mode correction for {f}: {e}")
+        step_timer.done_step('Storing data.')
 ```

 %% Cell type:code id: tags:

 ``` python
-ho, eo, co, so = histCalOffsetCor.get()
+ho, eo, co, so = histCalCor.get()

 d = [{
    'x': co,
    'y': ho,
    'y_err': np.sqrt(ho[:]),
    'drawstyle': 'steps-mid',
    'errorstyle': 'bars',
    'errorcoarsing': 2,
-    'label': 'Offset corr.'
+    'label': 'Total corr.'
 }]

+ho, eo, co, so = histCalOffsetCor.get()
+
+d.append({
+    'x': co,
+    'y': ho,
+    'y_err': np.sqrt(ho[:]),
+    'drawstyle': 'steps-mid',
+    'errorstyle': 'bars',
+    'errorcoarsing': 2,
+    'label': 'Offset corr.'
+})
+
 if common_mode:
    ho, eo, co, so = histCalCMCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'CM corr.'
    })

 if relative_gain :
    ho, eo, co, so = histCalRelGainCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'Relative gain corr.'
    })


 if pattern_classification:
    ho, eo, co, so = histCalSECor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'Isolated photons (singles)'
    })

 fig = xana.simplePlot(
    d, aspect=1, x_label=f'Energy (ADU)',
    y_label='Number of occurrences', figsize='2col',
    y_log=True, x_range=(-50, 500),
    legend='top-center-frame-2col',
 )
 plt.title(f'run {run} - {karabo_da}')
 plt.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 if absolute_gain :
    d=[]
    ho, eo, co, so = histCalAbsGainCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'Absolute gain corr.'
    })

    if pattern_classification:
        ho, eo, co, so = histCalGainCorSingles.get()
        d.append({
            'x': co,
            'y': ho,
            'y_err': np.sqrt(ho[:]),
            'drawstyle': 'steps-mid',
            'errorstyle': 'bars',
            'errorcoarsing': 2,
            'label': 'Isolated photons (singles)'
        })

    fig = xana.simplePlot(
        d, aspect=1, x_label=f'Energy ({plot_unit})',
        y_label='Number of occurrences', figsize='2col',
        y_log=True,
        x_range=np.array((-50, 500))*hscale,
        legend='top-center-frame-2col',
    )
    plt.grid()
    plt.title(f'run {run} - {karabo_da}')
 ```

 %% Cell type:markdown id: tags:

-## Mean Image of last Sequence ##
+## Mean Image of the corrected data

 %% Cell type:code id: tags:

 ``` python
+step_timer.start()
 fig = xana.heatmapPlot(
-    np.nanmedian(data, axis=2),
+    np.nanmedian(data, axis=0),
    x_label='Columns', y_label='Rows',
    lut_label=f'Signal ({plot_unit})',
    x_range=(0, y),
    y_range=(0, x),
    vmin=-50, vmax=50)
+step_timer.done_step(f'Plotting mean image of {data.shape[0]} trains.')
 ```

 %% Cell type:markdown id: tags:

-## Single Shot of last Sequence ##
+## Single Shot of the corrected data

 %% Cell type:code id: tags:

 ``` python
+step_timer.start()
 fig = xana.heatmapPlot(
-    data[..., 0],
+    data[0, ...],
    x_label='Columns', y_label='Rows',
    lut_label=f'Signal ({plot_unit})',
    x_range=(0, y),
    y_range=(0, x),
    vmin=-50, vmax=50)
+step_timer.done_step(f'Plotting single shot of corrected data.')
 ```

--- a/src/cal_tools/step_timing.py
+++ b/src/cal_tools/step_timing.py
@@ -33,4 +33,4 @@ class StepTimer:
        """Show mean & std for each step"""
        for step, data in self.steps.items():
            data = np.asarray(data)
-            print(f'{step}: {data.mean():.01f} +- {data.std():.02f}s')
+            print(f'{step}: {data.mean():.01f} +- {data.std():.02f} s')