remove unrelated changes

cb46bba3 · Karim Ahmed · b0da6ce9 · cb46bba3
Commit cb46bba3 authored 2 years ago by Karim Ahmed
--- a/notebooks/ePix100/Correction_ePix100_NBC.ipynb
+++ b/notebooks/ePix100/Correction_ePix100_NBC.ipynb
@@ -41,7 +41,9 @@
    "\n",
    "# Parameters affecting writing corrected data.\n",
    "chunk_size_idim = 1  # H5 chunking size of output data\n",
-    "limit_trains = 0  # Process only first N images, 0 - process all.\n",
+    "\n",
+    "# Only for testing\n",
+    "limit_images = 0  # ONLY FOR TESTING. process only first N images, 0 - process all.\n",
    "\n",
    "# Parameters for the calibration database.\n",
    "cal_db_interface = \"tcp://max-exfl016:8015#8025\"  # calibration DB interface to use\n",
@@ -83,8 +85,6 @@
   "source": [
    "import tabulate\n",
    "import warnings\n",
-    "from logging import warning\n",
-    "from sys import exit\n",
    "\n",
    "import h5py\n",
    "import pasha as psh\n",
@@ -96,8 +96,8 @@
    "\n",
    "from XFELDetAna import xfelpyanatools as xana\n",
    "from XFELDetAna import xfelpycaltools as xcal\n",
+    "from cal_tools import h5_copy_except\n",
    "from cal_tools.epix100 import epix100lib\n",
-    "from cal_tools.files import DataFile\n",
    "from cal_tools.tools import (\n",
    "    calcat_creation_time,\n",
    "    get_dir_creation_date,\n",
@@ -217,7 +217,7 @@
    "# Read control data.\n",
    "ctrl_data = epix100lib.epix100Ctrl(\n",
    "    run_dc=run_dc,\n",
-    "    instrument_src=instrument_src,\n",
+    "    instrument_src=f\"{karabo_id}/DET/{receiver_template}:daqOutput\",\n",
    "    ctrl_src=f\"{karabo_id}/DET/CONTROL\",\n",
    "    )\n",
    "\n",
@@ -534,7 +534,7 @@
   "source": [
    "def correct_train(wid, index, tid, d):\n",
    "\n",
-    "    d = d[..., np.newaxis].astype(np.float32)\n",
+    "    d = d[pixel_data[0]][pixel_data[1]][..., np.newaxis].astype(np.float32)\n",
    "    d = np.compress(\n",
    "        np.any(d > 0, axis=(0, 1)), d, axis=2)\n",
    "    \n",
@@ -614,6 +614,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
+    "pixel_data = (instrument_src, \"data.image.pixels\")\n",
+    "\n",
    "# 10 is a number chosen after testing 1 ... 71 parallel threads\n",
    "context = psh.context.ThreadContext(num_workers=10)"
   ]
@@ -626,113 +628,77 @@
   },
   "outputs": [],
   "source": [
-    "empty_seq = 0\n",
-    "\n",
    "for f in seq_files:\n",
    "\n",
    "    seq_dc = H5File(f)\n",
-    "    # Save corrected data in an output file with name\n",
+    "\n",
-    "    # of corresponding raw sequence file.\n",
+    "    n_imgs = seq_dc.get_data_counts(*pixel_data).shape[0]\n",
-    "    out_file = out_folder / f.name.replace(\"RAW\", \"CORR\")\n",
    "\n",
    "    # Data shape in seq_dc excluding trains with empty images. \n",
-    "    ishape = seq_dc[instrument_src, \"data.image.pixels\"].shape\n",
+    "    dshape = seq_dc[pixel_data].shape\n",
-    "    corr_ntrains = ishape[0]\n",
+    "    dataset_chunk = ((chunk_size_idim,) + dshape[1:])  # e.g. (1, pixels_x, pixels_y) \n",
-    "    all_train_ids = seq_dc.train_ids\n",
+    "\n",
-    "\n",
+    "    if n_imgs - dshape[0] != 0:\n",
-    "    # Raise a WARNING if this sequence has no trains to correct.\n",
+    "        print(f\"- WARNING: {f} has {n_imgs - dshape[0]} trains with empty data.\")\n",
-    "    # Otherwise, print number of trains with no data.\n",
-    "    if corr_ntrains == 0:\n",
-    "        warning(f\"No trains to correct for {f.name}: \"\n",
-    "                \"Skipping the processing of this file.\")\n",
-    "        empty_seq += 1\n",
-    "        continue\n",
-    "    elif len(all_train_ids) != corr_ntrains:\n",
-    "        print(f\"{f.name} has {len(all_train_ids) - corr_ntrains} trains with missing data.\")\n",
    "\n",
    "    # This parameter is only used for testing.\n",
-    "    if limit_trains > 0:\n",
+    "    if limit_images > 0:\n",
-    "        print(f\"\\nCorrected trains are limited to: {limit_trains} trains\")\n",
+    "        n_imgs = min(n_imgs, limit_images)\n",
-    "        corr_ntrains = min(corr_ntrains, limit_trains)\n",
-    "    oshape = (corr_ntrains, *ishape[1:])\n",
    "\n",
-    "    data = context.alloc(shape=oshape, dtype=np.float32)\n",
+    "    data = context.alloc(shape=dshape, dtype=np.float32)\n",
    "\n",
    "    if pattern_classification:\n",
-    "        data_clu = context.alloc(shape=oshape, dtype=np.float32)\n",
+    "        data_clu = context.alloc(shape=dshape, dtype=np.float32)\n",
-    "        data_patterns = context.alloc(shape=oshape, dtype=np.int32)\n",
+    "        data_patterns = context.alloc(shape=dshape, dtype=np.int32)\n",
-    "\n",
-    "    step_timer.start()  # Correct data. \n",
-    "\n",
-    "    # Overwrite seq_dc after eliminating empty trains or/and applying limited images.\n",
-    "    seq_dc = seq_dc.select(\n",
-    "        instrument_src, \"*\", require_all=True).select_trains(np.s_[:corr_ntrains])\n",
-    "\n",
-    "    pixel_data = seq_dc[instrument_src, \"data.image.pixels\"]\n",
-    "    context.map(correct_train, pixel_data)\n",
-    "\n",
-    "    step_timer.done_step(f'Correcting {corr_ntrains} trains.')\n",
-    "\n",
    "\n",
-    "    step_timer.start()  # Write corrected data.\n",
+    "    step_timer.start()\n",
    "\n",
-    "    # Create CORR files and add corrected data sections.\n",
+    "    context.map(\n",
-    "    image_counts = seq_dc[instrument_src, \"data.image.pixels\"].data_counts(labelled=False)\n",
+    "        correct_train, seq_dc.select(\n",
-    "\n",
+    "            *pixel_data, require_all=True).select_trains(np.s_[:n_imgs])\n",
-    "    # Write corrected data.\n",
+    "    )\n",
-    "    with DataFile(out_file, 'w') as ofile:\n",
+    "    step_timer.done_step(f'Correcting {n_imgs} trains.')\n",
-    "        dataset_chunk = ((chunk_size_idim,) + oshape[1:])  # e.g. (1, pixels_x, pixels_y) \n",
-    "\n",
-    "        # Create INDEX datasets.\n",
-    "        ofile.create_index(seq_dc.train_ids, from_file=seq_dc.files[0])\n",
-    "        # Create Instrument section to later add corrected datasets.\n",
-    "        outp_source = ofile.create_instrument_source(instrument_src)\n",
-    "\n",
-    "        # Create count/first datasets at INDEX source.\n",
-    "        outp_source.create_index(data=image_counts)\n",
-    "\n",
-    "        # Store uncorrected RAW image datasets for the corrected trains.\n",
-    "\n",
-    "        data_raw_fields = [  # /data/\n",
-    "            'ambTemp', 'analogCurr', 'analogInputVolt', 'backTemp',\n",
-    "            'digitalInputVolt', 'guardCurr', 'relHumidity',\n",
-    "        ]\n",
-    "        for field in data_raw_fields:\n",
-    "            field_arr = seq_dc[instrument_src, f\"data.{field}\"].ndarray()\n",
-    "\n",
-    "            outp_source.create_key(\n",
-    "                f\"data.{field}\", data=field_arr,\n",
-    "                chunks=(chunk_size_idim, *field_arr.shape[1:]))\n",
-    "\n",
-    "        image_raw_fields = [  # /data/image/\n",
-    "            'binning', 'bitsPerPixel', 'dimTypes', 'dims',\n",
-    "            'encoding', 'flipX', 'flipY', 'roiOffsets', 'rotation',\n",
-    "        ]\n",
-    "        for field in image_raw_fields:\n",
-    "            field_arr = seq_dc[instrument_src, f\"data.image.{field}\"].ndarray()\n",
    "\n",
-    "            outp_source.create_key(\n",
+    "    # Store detector h5 information in the corrected file\n",
-    "                f\"data.image.{field}\", data=field_arr,\n",
+    "    # and deselect data to correct and store later.\n",
-    "                chunks=(chunk_size_idim, *field_arr.shape[1:]))\n",
+    "    step_timer.start()\n",
    "\n",
-    "        # Add main corrected `data.image.pixels` dataset and store corrected data.\n",
+    "    out_file = out_folder / f.name.replace(\"RAW\", \"CORR\")\n",
-    "        outp_source.create_key(\n",
+    "    data_path = \"INSTRUMENT/\"+instrument_src+\"/data/image\"\n",
-    "            \"data.image.pixels\", data=data, chunks=dataset_chunk)\n",
+    "    pixels_path = f\"{data_path}/pixels\"\n",
+    "    \n",
+    "    # First copy all raw data source to the corrected file,\n",
+    "    # while excluding the raw data image /data/image/pixels.\n",
+    "    with h5py.File(out_file, 'w') as ofile:\n",
+    "        # Copy RAW non-calibrated sources.\n",
+    "        with h5py.File(f, 'r') as sfile:\n",
+    "            h5_copy_except.h5_copy_except_paths(\n",
+    "                sfile, ofile,\n",
+    "                [pixels_path])\n",
+    "\n",
+    "        # Create dataset in CORR h5 file and add corrected images.\n",
+    "        dataset = ofile.create_dataset(\n",
+    "            pixels_path,\n",
+    "            data=data,\n",
+    "            chunks=dataset_chunk,\n",
+    "            dtype=np.float32)\n",
    "\n",
    "        if pattern_classification:\n",
-    "            # Add main corrected `data.image.pixels` dataset and store corrected data.\n",
+    "            # Save /data/image/pixels_classified in corrected file.\n",
-    "            outp_source.create_key(\n",
+    "            datasetc = ofile.create_dataset(\n",
-    "                \"data.image.pixels_classified\", data=data_clu, chunks=dataset_chunk)\n",
+    "                f\"{data_path}/pixels_classified\",\n",
-    "            outp_source.create_key(\n",
+    "                data=data_clu,\n",
-    "                \"data.image.patterns\", data=data_clu, chunks=dataset_chunk)\n",
+    "                chunks=dataset_chunk,\n",
-    "\n",
+    "                dtype=np.float32)\n",
-    "        # Create METDATA datasets\n",
+    "\n",
-    "        ofile.create_metadata(like=seq_dc)\n",
+    "            # Save /data/image/patterns in corrected file.\n",
-    "\n",
+    "            datasetp = ofile.create_dataset(\n",
-    "        step_timer.done_step('Storing data.')\n",
+    "                f\"{data_path}/patterns\",\n",
-    "if empty_seq == len(seq_files):\n",
+    "                data=data_patterns,\n",
-    "    warning(\"No valid trains for RAW data to correct.\")\n",
+    "                chunks=dataset_chunk,\n",
-    "    exit(0)"
+    "                dtype=np.int32)\n",
+    "\n",
+    "        step_timer.done_step('Storing data.')"
   ]
  },
  {

 %% Cell type:markdown id: tags:
 # ePix100 Data Correction
 Author: European XFEL Detector Group, Version: 2.0
 The following notebook provides data correction of images acquired with the ePix100 detector.
 The sequence of correction applied are:
 Offset --> Common Mode Noise --> Relative Gain --> Charge Sharing --> Absolute Gain.
 Offset, common mode and gain corrected data is saved to /data/image/pixels in the CORR files.
 If pattern classification is applied (charge sharing correction), this data will be saved to /data/image/pixels_classified, while the corresponding patterns will be saved to /data/image/patterns in the CORR files.
 %% Cell type:code id: tags:
 ``` python
 in_folder = "/gpfs/exfel/exp/HED/202202/p003121/raw" # input folder, required
 out_folder = ""  # output folder, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1]  # sequences to correct, set to -1 for all, range allowed
 sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
 run = 156  # which run to read data from, required
 # Parameters for accessing the raw data.
 karabo_id = "HED_IA1_EPX100-1"  # karabo karabo_id
 karabo_da = "EPIX01"  # data aggregators
 db_module = ""  # module id in the database
 receiver_template = "RECEIVER"  # detector receiver template for accessing raw data files
 path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5'  # the template to use to access data
 instrument_source_template = '{}/DET/{}:daqOutput'  # instrument detector data source in h5files
 # Parameters affecting writing corrected data.
 chunk_size_idim = 1  # H5 chunking size of output data
-limit_trains = 0  # Process only first N images, 0 - process all.
+# Only for testing
+limit_images = 0  # ONLY FOR TESTING. process only first N images, 0 - process all.
 # Parameters for the calibration database.
 cal_db_interface = "tcp://max-exfl016:8015#8025"  # calibration DB interface to use
 cal_db_timeout = 300000  # timeout on caldb requests
 creation_time = ""  # The timestamp to use with Calibration DBe. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41
 # Conditions for retrieving calibration constants.
 bias_voltage = 200  # bias voltage
 in_vacuum = False  # detector operated in vacuum
 integration_time = -1  # Detector integration time, Default value -1 to use the value from the slow data.
 fix_temperature = -1  # fixed temperature value in Kelvin, Default value -1 to use the value from files.
 gain_photon_energy = 8.048  # Photon energy used for gain calibration
 photon_energy = 0.  # Photon energy to calibrate in number of photons, 0 for calibration in keV
 # Flags to select type of applied corrections.
 pattern_classification = True  # do clustering.
 relative_gain = True  # Apply relative gain correction.
 absolute_gain = True  # Apply absolute gain correction (implies relative gain).
 common_mode = True  # Apply common mode correction.
 # Parameters affecting applied correction.
 cm_min_frac = 0.25  # No CM correction is performed if after masking the ratio of good pixels falls below this
 cm_noise_sigma = 5.  # CM correction noise standard deviation
 split_evt_primary_threshold = 7.  # primary threshold for split event correction
 split_evt_secondary_threshold = 5.  # secondary threshold for split event correction
 split_evt_mip_threshold = 1000.  # minimum ionizing particle threshold
 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```
 %% Cell type:code id: tags:
 ``` python
 import tabulate
 import warnings
-from logging import warning
-from sys import exit
 import h5py
 import pasha as psh
 import numpy as np
 import matplotlib.pyplot as plt
 from IPython.display import Latex, display
 from extra_data import RunDirectory, H5File
 from pathlib import Path
 from XFELDetAna import xfelpyanatools as xana
 from XFELDetAna import xfelpycaltools as xcal
+from cal_tools import h5_copy_except
 from cal_tools.epix100 import epix100lib
-from cal_tools.files import DataFile
 from cal_tools.tools import (
    calcat_creation_time,
    get_dir_creation_date,
    get_constant_from_db,
    load_specified_constants,
    CalibrationMetadata,
 )
 from cal_tools.step_timing import StepTimer
 from iCalibrationDB import (
    Conditions,
    Constants,
 )
 warnings.filterwarnings('ignore')
 prettyPlotting = True
 %matplotlib inline
 ```
 %% Cell type:code id: tags:
 ``` python
 x = 708  # rows of the ePix100
 y = 768  # columns of the ePix100
 if absolute_gain:
    relative_gain = True
 plot_unit = 'ADU'
 ```
 %% Cell type:code id: tags:
 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 out_folder.mkdir(parents=True, exist_ok=True)
 run_folder = in_folder / f"r{run:04d}"
 instrument_src = instrument_source_template.format(
    karabo_id, receiver_template)
 print(f"Correcting run: {run_folder}")
 print(f"Instrument H5File source: {instrument_src}")
 print(f"Data corrected files are stored at: {out_folder}")
 ```
 %% Cell type:code id: tags:
 ``` python
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f"Using {creation_time.isoformat()} as creation time")
 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # Constant paths are saved under retrieved-constants in calibration_metadata.yml.
 # NOTE: this notebook shouldn't overwrite calibration metadata file.
 const_yaml = metadata.get("retrieved-constants", {})
 ```
 %% Cell type:code id: tags:
 ``` python
 run_dc = RunDirectory(run_folder, _use_voview=False)
 seq_files = [Path(f.filename) for f in run_dc.select(f"*{karabo_id}*").files]
 # If a set of sequences requested to correct,
 # adapt seq_files list.
 if sequences != [-1]:
    seq_files = [f for f in seq_files if any(f.match(f"*-S{s:05d}.h5") for s in sequences)]
 if not len(seq_files):
    raise IndexError("No sequence files available for the selected sequences.")
 print(f"Processing a total of {len(seq_files)} sequence files")
 ```
 %% Cell type:code id: tags:
 ``` python
 step_timer = StepTimer()
 ```
 %% Cell type:code id: tags:
 ``` python
 step_timer.start()
 sensorSize = [x, y]
 # Sensor area will be analysed according to blocksize
 blockSize = [sensorSize[0]//2, sensorSize[1]//2]
 xcal.defaultBlockSize = blockSize
 memoryCells = 1  # ePIX has no memory cells
 run_parallel = False
 # Read control data.
 ctrl_data = epix100lib.epix100Ctrl(
    run_dc=run_dc,
-    instrument_src=instrument_src,
+    instrument_src=f"{karabo_id}/DET/{receiver_template}:daqOutput",
    ctrl_src=f"{karabo_id}/DET/CONTROL",
    )
 if integration_time < 0:
    integration_time = ctrl_data.get_integration_time()
    integration_time_str_add = ""
 else:
    integration_time_str_add = "(manual input)"
 if fix_temperature < 0:
    temperature = ctrl_data.get_temprature()
    temperature_k = temperature + 273.15
    temp_str_add = ""
 else:
    temperature_k = fix_temperature
    temperature = fix_temperature - 273.15
    temp_str_add = "(manual input)"
 print(f"Bias voltage is {bias_voltage} V")
 print(f"Detector integration time is set to {integration_time} \u03BCs {integration_time_str_add}")
 print(f"Mean temperature: {temperature:0.2f}°C / {temperature_k:0.2f} K {temp_str_add}")
 print(f"Operated in vacuum: {in_vacuum}")
 ```
 %% Cell type:code id: tags:
 ``` python
 # Table of sequence files to process
 table = [(k, f) for k, f in enumerate(seq_files)]
 if len(table):
    md = display(Latex(tabulate.tabulate(
        table,
        tablefmt='latex',
        headers=["#", "file"]
    )))
 ```
 %% Cell type:markdown id: tags:
 ## Retrieving calibration constants
 As a first step, dark maps have to be loaded.
 %% Cell type:code id: tags:
 ``` python
 cond_dict = {
    "bias_voltage": bias_voltage,
    "integration_time": integration_time,
    "temperature": temperature_k,
    "in_vacuum": in_vacuum,
 }
 dark_condition = Conditions.Dark.ePix100(**cond_dict)
 # update conditions with illuminated conditins.
 cond_dict.update({
        "photon_energy": gain_photon_energy
    })
 illum_condition = Conditions.Illuminated.ePix100(**cond_dict)
 const_cond = {
    "Offset": dark_condition,
    "Noise": dark_condition,
    "RelativeGain": illum_condition,
 }
 ```
 %% Cell type:code id: tags:
 ``` python
 empty_constant = np.zeros((708, 768, 1), dtype=np.float32)
 if const_yaml:  #  Used while reproducing corrected data.
    print(f"Using stored constants in {metadata.filename}")
    const_data, _ = load_specified_constants(const_yaml[karabo_da]["constants"])
    for cname, cval in const_data.items():
        if cval is None and cname != "RelativeGain":
            const_data[cname] = empty_constant
 else:  # First correction attempt.
    const_data = dict()
    for cname, condition in const_cond.items():
        # Avoid retrieving RelativeGain, if not needed for correction.
        if cname == "RelativeGain" and not relative_gain:
            const_data[cname] = None
        else:
            const_data[cname] = get_constant_from_db(
                karabo_id=karabo_id,
                karabo_da=karabo_da,
                constant=getattr(Constants.ePix100, cname)(),
                condition=condition,
                empty_constant=None if cname == "RelativeGain" else empty_constant,
                cal_db_interface=cal_db_interface,
                creation_time=creation_time,
                print_once=2,
                timeout=cal_db_timeout
    )
 ```
 %% Cell type:code id: tags:
 ``` python
 if relative_gain and const_data.get("RelativeGain", None) is None:
    print(
        "WARNING: RelativeGain map is requested, but not found.\n"
        "No gain correction will be applied"
    )
    relative_gain = False
    absolute_gain = False
 # Initializing some parameters.
 hscale = 1
 stats = True
 hrange = np.array([-50, 1000])
 nbins = hrange[1] - hrange[0]
 commonModeBlockSize = [x//2, y//2]
 ```
 %% Cell type:code id: tags:
 ``` python
 histCalOffsetCor = xcal.HistogramCalculator(
    sensorSize,
    bins=nbins,
    range=hrange,
    parallel=run_parallel,
    nCells=memoryCells,
    blockSize=blockSize
 )
 # *****************Histogram Calculators****************** #
 histCalCor = xcal.HistogramCalculator(
    sensorSize,
    bins=1050,
    range=[-50, 1000],
    parallel=run_parallel,
    nCells=memoryCells,
    blockSize=blockSize
 )
 ```
 %% Cell type:code id: tags:
 ``` python
 if common_mode:
    histCalCMCor = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize,
    )
    cmCorrectionB = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='block',
        nCells=memoryCells,
        noiseMap=const_data['Noise'],
        runParallel=run_parallel,
        parallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
    cmCorrectionR = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='row',
        nCells=memoryCells,
        noiseMap=const_data['Noise'],
        runParallel=run_parallel,
        parallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
    cmCorrectionC = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='col',
        nCells=memoryCells,
        noiseMap=const_data['Noise'],
        runParallel=run_parallel,
        parallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
 ```
 %% Cell type:code id: tags:
 ``` python
 if relative_gain:
    gain_cnst = np.median(const_data["RelativeGain"])
    hscale = gain_cnst
    plot_unit = 'keV'
    if photon_energy > 0:
        plot_unit = '$\gamma$'
        hscale /= photon_energy
    gainCorrection = xcal.RelativeGainCorrection(
        sensorSize,
        gain_cnst/const_data["RelativeGain"][..., None],
        nCells=memoryCells,
        parallel=run_parallel,
        blockSize=blockSize,
        gains=None,
    )
    histCalRelGainCor = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize
    )
    if absolute_gain:
        histCalAbsGainCor = xcal.HistogramCalculator(
            sensorSize,
            bins=nbins,
            range=hrange*hscale,
            parallel=run_parallel,
            nCells=memoryCells,
            blockSize=blockSize
        )
 ```
 %% Cell type:code id: tags:
 ``` python
 if pattern_classification :
    patternClassifier = xcal.PatternClassifier(
        [x, y],
        const_data["Noise"],
        split_evt_primary_threshold,
        split_evt_secondary_threshold,
        split_evt_mip_threshold,
        tagFirstSingles=0,
        nCells=memoryCells,
        allowElongated=False,
        blockSize=[x, y],
        parallel=run_parallel,
    )
    histCalCSCor = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize,
    )
    histCalGainCorClusters = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange*hscale,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize
    )
    histCalGainCorSingles = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange*hscale,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize
    )
 ```
 %% Cell type:markdown id: tags:
 ## Applying corrections
 %% Cell type:code id: tags:
 ``` python
 def correct_train(wid, index, tid, d):
-    d = d[..., np.newaxis].astype(np.float32)
+    d = d[pixel_data[0]][pixel_data[1]][..., np.newaxis].astype(np.float32)
    d = np.compress(
        np.any(d > 0, axis=(0, 1)), d, axis=2)
    # Offset correction.
    d -= const_data["Offset"]
    histCalOffsetCor.fill(d)
    # Common Mode correction.
    if common_mode:
        # Block CM
        d = cmCorrectionB.correct(d)
        # Row CM
        d = cmCorrectionR.correct(d)
        # COL CM
        d = cmCorrectionC.correct(d)
        histCalCMCor.fill(d)
    # relative gain correction.
    if relative_gain:
        d = gainCorrection.correct(d)
        histCalRelGainCor.fill(d)
    """The gain correction is currently applying
    an absolute correction (not a relative correction
    as the implied by the name);
    it changes the scale (the unit of measurement)
    of the data from ADU to either keV or n_of_photons.
    But the pattern classification relies on comparing
    data with the noise map, which is still in ADU.
    The best solution is to do a relative gain
    correction first and apply the global absolute
    gain to the data at the end, after clustering.
    """
    if pattern_classification:
        d_clu, patterns = patternClassifier.classify(d)
        d_clu[d_clu < (split_evt_primary_threshold*const_data["Noise"])] = 0
        data_clu[index, ...] = np.squeeze(d_clu)
        data_patterns[index, ...] = np.squeeze(patterns)
        histCalCSCor.fill(d_clu)
    # absolute gain correction
    # changes data from ADU to keV (or n. of photons)
    if absolute_gain:
        d = d * gain_cnst
        if photon_energy > 0:
            d /= photon_energy
        histCalAbsGainCor.fill(d)
        if pattern_classification:
            # Modify pattern classification.
            d_clu = d_clu * gain_cnst
            if photon_energy > 0:
                d_clu /= photon_energy
            data_clu[index, ...] = np.squeeze(d_clu)
            histCalGainCorClusters.fill(d_clu)
            d_sing = d_clu[patterns==100] # pattern 100 corresponds to single photons events
            if len(d_sing):
                histCalGainCorSingles.fill(d_sing)
    data[index, ...] = np.squeeze(d)
    histCalCor.fill(d)
 ```
 %% Cell type:code id: tags:
 ``` python
+pixel_data = (instrument_src, "data.image.pixels")
 # 10 is a number chosen after testing 1 ... 71 parallel threads
 context = psh.context.ThreadContext(num_workers=10)
 ```
 %% Cell type:code id: tags:
 ``` python
-empty_seq = 0
 for f in seq_files:
    seq_dc = H5File(f)
-    # Save corrected data in an output file with name
-    # of corresponding raw sequence file.
+    n_imgs = seq_dc.get_data_counts(*pixel_data).shape[0]
-    out_file = out_folder / f.name.replace("RAW", "CORR")
    # Data shape in seq_dc excluding trains with empty images.
-    ishape = seq_dc[instrument_src, "data.image.pixels"].shape
+    dshape = seq_dc[pixel_data].shape
-    corr_ntrains = ishape[0]
+    dataset_chunk = ((chunk_size_idim,) + dshape[1:])  # e.g. (1, pixels_x, pixels_y)
-    all_train_ids = seq_dc.train_ids
+    if n_imgs - dshape[0] != 0:
-    # Raise a WARNING if this sequence has no trains to correct.
+        print(f"- WARNING: {f} has {n_imgs - dshape[0]} trains with empty data.")
-    # Otherwise, print number of trains with no data.
-    if corr_ntrains == 0:
-        warning(f"No trains to correct for {f.name}: "
-                "Skipping the processing of this file.")
-        empty_seq += 1
-        continue
-    elif len(all_train_ids) != corr_ntrains:
-        print(f"{f.name} has {len(all_train_ids) - corr_ntrains} trains with missing data.")
    # This parameter is only used for testing.
-    if limit_trains > 0:
+    if limit_images > 0:
-        print(f"\nCorrected trains are limited to: {limit_trains} trains")
+        n_imgs = min(n_imgs, limit_images)
-        corr_ntrains = min(corr_ntrains, limit_trains)
-    oshape = (corr_ntrains, *ishape[1:])
-    data = context.alloc(shape=oshape, dtype=np.float32)
+    data = context.alloc(shape=dshape, dtype=np.float32)
    if pattern_classification:
-        data_clu = context.alloc(shape=oshape, dtype=np.float32)
+        data_clu = context.alloc(shape=dshape, dtype=np.float32)
-        data_patterns = context.alloc(shape=oshape, dtype=np.int32)
+        data_patterns = context.alloc(shape=dshape, dtype=np.int32)
-    step_timer.start()  # Correct data.
-    # Overwrite seq_dc after eliminating empty trains or/and applying limited images.
-    seq_dc = seq_dc.select(
-        instrument_src, "*", require_all=True).select_trains(np.s_[:corr_ntrains])
-    pixel_data = seq_dc[instrument_src, "data.image.pixels"]
-    context.map(correct_train, pixel_data)
-    step_timer.done_step(f'Correcting {corr_ntrains} trains.')
+    step_timer.start()
-    step_timer.start()  # Write corrected data.
+    context.map(
+        correct_train, seq_dc.select(
-    # Create CORR files and add corrected data sections.
+            *pixel_data, require_all=True).select_trains(np.s_[:n_imgs])
-    image_counts = seq_dc[instrument_src, "data.image.pixels"].data_counts(labelled=False)
+    )
+    step_timer.done_step(f'Correcting {n_imgs} trains.')
-    # Write corrected data.
-    with DataFile(out_file, 'w') as ofile:
-        dataset_chunk = ((chunk_size_idim,) + oshape[1:])  # e.g. (1, pixels_x, pixels_y)
-        # Create INDEX datasets.
-        ofile.create_index(seq_dc.train_ids, from_file=seq_dc.files[0])
-        # Create Instrument section to later add corrected datasets.
-        outp_source = ofile.create_instrument_source(instrument_src)
-        # Create count/first datasets at INDEX source.
-        outp_source.create_index(data=image_counts)
-        # Store uncorrected RAW image datasets for the corrected trains.
-        data_raw_fields = [  # /data/
-            'ambTemp', 'analogCurr', 'analogInputVolt', 'backTemp',
-            'digitalInputVolt', 'guardCurr', 'relHumidity',
-        ]
-        for field in data_raw_fields:
-            field_arr = seq_dc[instrument_src, f"data.{field}"].ndarray()
-            outp_source.create_key(
-                f"data.{field}", data=field_arr,
-                chunks=(chunk_size_idim, *field_arr.shape[1:]))
-        image_raw_fields = [  # /data/image/
+    # Store detector h5 information in the corrected file
-            'binning', 'bitsPerPixel', 'dimTypes', 'dims',
+    # and deselect data to correct and store later.
-            'encoding', 'flipX', 'flipY', 'roiOffsets', 'rotation',
+    step_timer.start()
-        ]
-        for field in image_raw_fields:
-            field_arr = seq_dc[instrument_src, f"data.image.{field}"].ndarray()
-            outp_source.create_key(
+    out_file = out_folder / f.name.replace("RAW", "CORR")
-                f"data.image.{field}", data=field_arr,
+    data_path = "INSTRUMENT/"+instrument_src+"/data/image"
-                chunks=(chunk_size_idim, *field_arr.shape[1:]))
+    pixels_path = f"{data_path}/pixels"
-        # Add main corrected `data.image.pixels` dataset and store corrected data.
+    # First copy all raw data source to the corrected file,
-        outp_source.create_key(
+    # while excluding the raw data image /data/image/pixels.
-            "data.image.pixels", data=data, chunks=dataset_chunk)
+    with h5py.File(out_file, 'w') as ofile:
+        # Copy RAW non-calibrated sources.
+        with h5py.File(f, 'r') as sfile:
+            h5_copy_except.h5_copy_except_paths(
+                sfile, ofile,
+                [pixels_path])
+        # Create dataset in CORR h5 file and add corrected images.
+        dataset = ofile.create_dataset(
+            pixels_path,
+            data=data,
+            chunks=dataset_chunk,
+            dtype=np.float32)
        if pattern_classification:
-            # Add main corrected `data.image.pixels` dataset and store corrected data.
+            # Save /data/image/pixels_classified in corrected file.
-            outp_source.create_key(
+            datasetc = ofile.create_dataset(
-                "data.image.pixels_classified", data=data_clu, chunks=dataset_chunk)
+                f"{data_path}/pixels_classified",
-            outp_source.create_key(
+                data=data_clu,
-                "data.image.patterns", data=data_clu, chunks=dataset_chunk)
+                chunks=dataset_chunk,
+                dtype=np.float32)
-        # Create METDATA datasets
-        ofile.create_metadata(like=seq_dc)
+            # Save /data/image/patterns in corrected file.
+            datasetp = ofile.create_dataset(
+                f"{data_path}/patterns",
+                data=data_patterns,
+                chunks=dataset_chunk,
+                dtype=np.int32)
        step_timer.done_step('Storing data.')
-if empty_seq == len(seq_files):
-    warning("No valid trains for RAW data to correct.")
-    exit(0)
 ```
 %% Cell type:code id: tags:
 ``` python
 ho, eo, co, so = histCalCor.get()
 d = [{
    'x': co,
    'y': ho,
    'y_err': np.sqrt(ho[:]),
    'drawstyle': 'steps-mid',
    'errorstyle': 'bars',
    'errorcoarsing': 2,
    'label': 'Total corr.'
 }]
 ho, eo, co, so = histCalOffsetCor.get()
 d.append({
    'x': co,
    'y': ho,
    'y_err': np.sqrt(ho[:]),
    'drawstyle': 'steps-mid',
    'errorstyle': 'bars',
    'errorcoarsing': 2,
    'label': 'Offset corr.'
 })
 if common_mode:
    ho, eo, co, so = histCalCMCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'CM corr.'
    })
 if relative_gain :
    ho, eo, co, so = histCalRelGainCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'Relative gain corr.'
    })
 if pattern_classification:
    ho, eo, co, so = histCalCSCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'Charge sharing corr.'
    })
 fig = xana.simplePlot(
    d, aspect=1, x_label=f'Energy (ADU)',
    y_label='Number of occurrences', figsize='2col',
    y_log=True, x_range=(-50, 500),
    legend='top-center-frame-2col',
 )
 plt.title(f'run {run} - {karabo_da}')
 plt.grid()
 ```
 %% Cell type:code id: tags:
 ``` python
 if absolute_gain :
    d=[]
    ho, eo, co, so = histCalAbsGainCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'Absolute gain corr.'
    })
    if pattern_classification:
        ho, eo, co, so = histCalGainCorClusters.get()
        d.append({
            'x': co,
            'y': ho,
            'y_err': np.sqrt(ho[:]),
            'drawstyle': 'steps-mid',
            'errorstyle': 'bars',
            'errorcoarsing': 2,
            'label': 'Charge sharing corr.'
        })
        ho, eo, co, so = histCalGainCorSingles.get()
        d.append({
            'x': co,
            'y': ho,
            'y_err': np.sqrt(ho[:]),
            'drawstyle': 'steps-mid',
            'errorstyle': 'bars',
            'errorcoarsing': 2,
            'label': 'Isolated photons (singles)'
        })
    fig = xana.simplePlot(
        d, aspect=1, x_label=f'Energy ({plot_unit})',
        y_label='Number of occurrences', figsize='2col',
        y_log=True,
        x_range=np.array((-50, 500))*hscale,
        legend='top-center-frame-2col',
    )
    plt.grid()
    plt.title(f'run {run} - {karabo_da}')
 ```
 %% Cell type:markdown id: tags:
 ## Mean Image of the corrected data
 %% Cell type:code id: tags:
 ``` python
 step_timer.start()
 fig = xana.heatmapPlot(
    np.nanmedian(data, axis=0),
    x_label='Columns', y_label='Rows',
    lut_label=f'Signal ({plot_unit})',
    x_range=(0, y),
    y_range=(0, x),
    vmin=-50, vmax=50)
 step_timer.done_step(f'Plotting mean image of {data.shape[0]} trains.')
 ```
 %% Cell type:markdown id: tags:
 ## Single Shot of the corrected data
 %% Cell type:code id: tags:
 ``` python
 step_timer.start()
 fig = xana.heatmapPlot(
    data[0, ...],
    x_label='Columns', y_label='Rows',
    lut_label=f'Signal ({plot_unit})',
    x_range=(0, y),
    y_range=(0, x),
    vmin=-50, vmax=50)
 step_timer.done_step(f'Plotting single shot of corrected data.')
 ```