split LUT step from data correction

45ae6894 · Karim Ahmed · 2bf65119 · 45ae6894 · 45ae6894
Commit 45ae6894 authored 2 years ago by Karim Ahmed
--- a/notebooks/Gotthard2/Correction_Gotthard2_NBC.ipynb
+++ b/notebooks/Gotthard2/Correction_Gotthard2_NBC.ipynb
@@ -39,6 +39,9 @@
    "cal_db_interface = \"tcp://max-exfl016:8017#8025\" # the database interface to use\n",
    "cal_db_timeout = 180000  # timeout on caldb requests\n",
    "\n",
+    "# Parameters affecting corrected data.\n",
+    "relative_gain = True  # do relative gain correction\n",
+    "\n",
    "# Parameters for plotting\n",
    "skip_plots = False  # exit after writing corrected files\n",
    "pulse_preview = 3 # pulseId to preview. The following even/odd pulseId is used for preview.\n",
@@ -201,8 +204,13 @@
    "def correct_train(wid, index, d):\n",
    "    g = gain[index]\n",
    "    d_corr = np.zeros_like(d, dtype=np.float32)\n",
+    "    gotthard2algs.convert_to_10bit(d, lut, d_corr)\n",
    "    gotthard2algs.correct_train(\n",
-    "        d, g, lut, offset_map.astype(np.float32), gain_map.astype(np.float32), d_corr)\n",
+    "        d_corr, g,\n",
+    "        offset_map.astype(np.float32),\n",
+    "        gain_map.astype(np.float32),\n",
+    "        apply_relgain=relative_gain,\n",
+    "    )\n",
    "    data_corr[index, ...] = d_corr"
   ]
  },
@@ -235,12 +243,13 @@
    "        # Allocate shared arrays.\n",
    "        data_corr = context.alloc(shape=dshape, dtype=np.float32)\n",
    "\n",
-    "        context.map(correct_train, data) \n",
+    "        context.map(correct_train, data)\n",
    "        step_timer.done_step(\"Correcting one sequence file\")\n",
    "        step_timer.start()\n",
    "\n",
    "        # Provided PSI gain map has 0 values. Set inf values to nan.\n",
    "        data_corr[np.isinf(data_corr)] = np.nan\n",
+    "\n",
    "        # Create CORR files and add corrected data sources.\n",
    "        # Exclude raw data images (data/adc)\n",
    "        with h5py.File(out_file, 'w') as ofile:\n",
@@ -255,7 +264,7 @@
    "                chunks=((1,) + dshape[1:]),  # 1 chunk == 1 image\n",
    "                dtype=np.float32,\n",
    "            )\n",
-    "            step_timer.done_step(\"Storing data\")"
+    "            step_timer.done_step(\"Storing data\")\n"
   ]
  },
  {

 %% Cell type:markdown id:bed7bd15-21d9-4735-82c1-c27c1a5e3346 tags:

 # Gotthard2 Offline Correction #

 Author: European XFEL Detector Group, Version: 1.0

 Offline Calibration for the Gothard2 Detector

 %% Cell type:code id:570322ed-f611-4fd1-b2ec-c12c13d55843 tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/DETLAB/202230/p900276/raw"  # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/gotthard2"  # the folder to output to, required
 run = 10  # run to process, required  # TODO: test runs up to run 18. Last 3 runs can be checked.
 sequences = [-1]  # sequences to correct, set to [-1] for all, range allowed
 sequences_per_node = 1  # number of sequence files per node if notebook executed through xfel-calibrate, set to 0 to not run SLURM parallel

 # Parameters used to access raw data.
 karabo_id = "DET_LAB_G2"  # karabo prefix of Jungfrau devices
 karabo_da = ["DA01"]  # data aggregators
 receiver = "GOT01"
 control_template = "CTRL{:02d}"
 instrument_source_template = '{}/DET/{}:daqOutput'  # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = '{}/DET/{}'  # template for control source name (filled with karabo_id_control)
 karabo_id_control = ""  # if control is on a different ID, set to empty string if it is the same a karabo-id

 # Parameters for calibration database.
 use_dir_creation_date = True  # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8017#8025" # the database interface to use
 cal_db_timeout = 180000  # timeout on caldb requests

+# Parameters affecting corrected data.
+relative_gain = True  # do relative gain correction
+
 # Parameters for plotting
 skip_plots = False  # exit after writing corrected files
 pulse_preview = 3 # pulseId to preview. The following even/odd pulseId is used for preview.

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id:6e9730d8-3908-41d7-abe2-d78e046d5de2 tags:

 ``` python
 import warnings

 import h5py
 import pasha as psh
 import multiprocessing
 import numpy as np
 import matplotlib.pyplot as plt
 from IPython.display import Markdown, display
 from extra_data import RunDirectory, H5File
 from pathlib import Path

 from cal_tools import h5_copy_except
 from cal_tools import gotthard2algs
 from cal_tools.tools import (
    get_dir_creation_date,
 )
 from cal_tools.step_timing import StepTimer

 warnings.filterwarnings('ignore')

 prettyPlotting = True

 %matplotlib inline
 ```

 %% Cell type:code id:d7c02c48-4429-42ea-a42e-de45366d7fa3 tags:

 ``` python
 in_folder = Path(in_folder)
 run_folder = in_folder / f"r{run:04d}"
 out_folder = Path(out_folder)
 out_folder.mkdir(parents=True, exist_ok=True)

 run_dc = RunDirectory(in_folder / f'r{run:04d}')

 instrument_src = instrument_source_template.format(karabo_id, "{}")
 ctrl_src = ctrl_source_template.format(karabo_id_control, control_template)

 print(f"Run is: {run}")
 print(f"Process modules: {karabo_da}")

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time} as creation time")

 if not karabo_id_control:
    karabo_id_control = karabo_id
 ```

 %% Cell type:code id:b5eb816e-b5f2-44ce-9907-0273d82341b6 tags:

 ``` python
 # Select only process detector.

 if sequences != [-1]:
    seq_files = []
    for f in run_dc.select(f"*{karabo_id}*").files:
        raw_f = Path(f.filename)
        for s in sequences:
            if raw_f.match(f"*-S{s:05d}.h5"):
                seq_files.append(raw_f)
 else:
    seq_files = [Path(f.filename) for f in run_dc.select(f"*{karabo_id}*").files]

 seq_files = sorted(seq_files)

 if not len(seq_files):
    raise IndexError("No sequence files available for the selected sequences.")

 print(f"Processing a total of {len(seq_files)} sequence files")
 ```

 %% Cell type:code id:f9a8d1eb-ce6a-4ed0-abf4-4a6029734672 tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:markdown id:8c852392-bb19-4c40-b2ce-3b787538a92d tags:

 ### Retrieving calibration constants

 %% Cell type:code id:3ba5d81b-e0d6-447b-a7e1-7f40e77b5a61 tags:

 ``` python
 # TODO: Retrieve calibration constants from CALCAT
 ```

 %% Cell type:code id:531c12e6-1c02-45fc-a7a2-018a08eb7a2d tags:

 ``` python
 # load constants temporarely using defined local paths.

 constants_file = "/gpfs/exfel/data/user/mramilli/gotthard2/constants/GH2-0124/calibration_constants_GH2-0124.h5"

 with h5py.File(constants_file, 'r') as cfile:
    offset_map = cfile["offset_map"][()]
    gain_map = cfile["gain_map"][()]
    lut = cfile["LUT"][()]
 ```

 %% Cell type:code id:23fcf7f4-351a-4df7-8829-d8497d94fecc tags:

 ``` python
 context = psh.context.ProcessContext(num_workers=multiprocessing.cpu_count())
 ```

 %% Cell type:code id:daecd662-26d2-4cb8-aa70-383a579cf9f9 tags:

 ``` python
 def correct_train(wid, index, d):
    g = gain[index]
    d_corr = np.zeros_like(d, dtype=np.float32)
+    gotthard2algs.convert_to_10bit(d, lut, d_corr)
    gotthard2algs.correct_train(
-        d, g, lut, offset_map.astype(np.float32), gain_map.astype(np.float32), d_corr)
+        d_corr, g,
+        offset_map.astype(np.float32),
+        gain_map.astype(np.float32),
+        apply_relgain=relative_gain,
+    )
    data_corr[index, ...] = d_corr
 ```

 %% Cell type:code id:f88c1aa6-a735-4b72-adce-b30162f5daea tags:

 ``` python
 for k_da in karabo_da:
    instr_mod_src = instrument_src.format(receiver)
    data_path = "INSTRUMENT/"+instr_mod_src+"/data"

    for raw_file in seq_files:
        step_timer.start()

        dc = H5File(raw_file)
        out_file = out_folder / raw_file.name.replace("RAW", "CORR")

        # Select module INSTRUMENT source and deselect empty trains.
        dc = dc.select(instr_mod_src, require_all=True)
        data = dc[instr_mod_src, "data.adc"].ndarray()
        gain = dc[instr_mod_src, "data.gain"].ndarray()

        step_timer.done_step("preparing raw data")
        dshape = data.shape
        step_timer.start()

        # Allocate shared arrays.
        data_corr = context.alloc(shape=dshape, dtype=np.float32)

        context.map(correct_train, data)
        step_timer.done_step("Correcting one sequence file")
        step_timer.start()

        # Provided PSI gain map has 0 values. Set inf values to nan.
        data_corr[np.isinf(data_corr)] = np.nan
+
        # Create CORR files and add corrected data sources.
        # Exclude raw data images (data/adc)
        with h5py.File(out_file, 'w') as ofile:
            # Copy RAW non-calibrated sources.
            with h5py.File(raw_file, 'r') as sfile:
                h5_copy_except.h5_copy_except_paths(
                    sfile, ofile, [f"{data_path}/adc"])
            # Create datasets with the available corrected data
            ddset = ofile.create_dataset(
                f"{data_path}/adc",
                data=data_corr,
                chunks=((1,) + dshape[1:]),  # 1 chunk == 1 image
                dtype=np.float32,
            )
            step_timer.done_step("Storing data")
 ```

 %% Cell type:code id:94b8e4d2-9f8c-4c23-a509-39238dd8435c tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id:0ccc7f7e-2a3f-4ac0-b854-7d505410d2fd tags:

 ``` python
 if skip_plots:
    print('Skipping plots')
    import sys
    sys.exit(0)
 ```

 %% Cell type:code id:ff203f77-3811-46f3-bf7d-226d2dcab13f tags:

 ``` python
 mod_dcs = {}
 for mod in karabo_da:
    mod_dcs[mod] = {}
    with RunDirectory(out_folder) as out_dc:
        tid, mod_dcs[mod]["train_corr_data"] = next(
            out_dc[instr_mod_src, "data.adc"].trains())
    with RunDirectory(run_folder) as in_dc:
        train_dict = in_dc.train_from_id(tid)[1][instr_mod_src]
        mod_dcs[mod]["train_raw_data"] = train_dict["data.adc"]
        mod_dcs[mod]["train_raw_gain"] = train_dict["data.gain"]
 ```

 %% Cell type:code id:1b379438-eb1d-42b2-ac83-eb8cf88c46db tags:

 ``` python
 display(Markdown("### Mean RAW across pulses for one train:"))
 display(Markdown(f"Train: {tid}"))

 step_timer.start()
 for mod in karabo_da:
    raw_data = mod_dcs[mod]["train_raw_data"]
    fig, ax = plt.subplots(figsize=(20, 10))
    im = ax.plot(np.mean(raw_data, axis=0))
    ax.set_title(f'Module {mod}')
    ax.set_xlabel("Strip #", size=15)
    ax.set_ylabel("12-bit ADC output", size=15)
 step_timer.done_step("Plotting raw data")
 ```

 %% Cell type:code id:512b48a6-9e2d-4fcc-b075-3698297177bf tags:

 ``` python
 display(Markdown("### Mean CORRECTED across pulses for one train:"))
 display(Markdown(f"Train: {tid}"))

 step_timer.start()
 for mod in karabo_da:
    corr_data = mod_dcs[mod]["train_corr_data"]
    fig, ax = plt.subplots(figsize=(20, 10))
    im = ax.plot(np.mean(corr_data, axis=0))
    ax.set_title(f'Module {mod}')
    ax.set_xlabel("Strip #", size=15)
    ax.set_ylabel("10-bit ADC output", size=15)
 step_timer.done_step("Plotting corrected data")
 ```

 %% Cell type:code id:cd8f5e08-fcee-4bff-ba63-6452b3d892a2 tags:

 ``` python
 # Validate given "pulse_preview"

 if pulse_preview + 1 > data.shape[1]:
    print(f"WARNING: selected pulse_preview {pulse_preview} is not available in data."
    " Previewing 1st pulse.")
    pulse_preview = 1

 if data.shape[1] == 1:
    odd_pulse = 1
    even_pulse = None
 else:
    odd_pulse = pulse_preview if pulse_preview % 2 else pulse_preview + 1
    even_pulse = pulse_preview if not (pulse_preview % 2) else pulse_preview + 1

 if pulse_preview + 1 > data.shape[1]:
    pulse_preview = 1
    if data.shape[1] > 1:
        pulse_preview = 2
 ```

 %% Cell type:code id:e5f0d4d8-e32c-4f2c-8469-4ebbfd3f644c tags:

 ``` python
 display(Markdown("### RAW even/odd pulses for one train:"))
 display(Markdown(f"RAW train: {tid}"))
 for mod in karabo_da:
    raw_data = mod_dcs[mod]["train_raw_data"]
    fig, ax = plt.subplots(figsize=(20, 10))
    ax.plot(raw_data[odd_pulse], label=f"Odd Pulse {odd_pulse}")
    if even_pulse:
        ax.plot(raw_data[even_pulse], label=f"Even Pulse {even_pulse}")
    ax.set_title(f'Module {mod}')
    ax.set_xlabel("Strip #", size=15)
    ax.set_ylabel("12-bit ADC output", size=15)
    ax.legend()
 step_timer.done_step("Plotting RAW odd/even pulses.")
 ```

 %% Cell type:markdown id:f1ddac79-5590-4bdb-9cfa-147d73f5dd5f tags:

 ### Single RAW Train Preview

 A single image from the first RAW train

 %% Cell type:code id:96900ba8-96a3-44b0-b489-d948f19298dc tags:

 ``` python
 display(Markdown("### CORRECTED even/odd pulses for one train:"))
 display(Markdown(f"CORRECTED train: {tid}"))
 for mod in karabo_da:
    corr_data = mod_dcs[mod]["train_corr_data"]
    fig, ax = plt.subplots(figsize=(20, 10))
    ax.plot(corr_data[odd_pulse], label=f"Odd Pulse {odd_pulse}")
    if even_pulse:
        ax.plot(corr_data[even_pulse], label=f"Even Pulse {even_pulse}")
    ax.set_title(f'Module {mod}')
    ax.set_xlabel("Strip #", size=15)
    ax.set_ylabel("12-bit ADC output", size=15)
    ax.legend()
 step_timer.done_step("Plotting CORRECTED odd/even pulses.")
 ```

--- a/src/cal_tools/gotthard2algs.pyx
+++ b/src/cal_tools/gotthard2algs.pyx
 import numpy as np

 from cython cimport boundscheck, wraparound, cdivision
-from cython.parallel import prange
 cimport numpy as cnp


+@boundscheck(False)
+@wraparound(False)
+def convert_to_10bit(
+    unsigned short[:, :] data,
+    unsigned short[:, :, :] lut,
+    float[:, :] data_10bit,
+):
+    """Convert 12bit RAW data to 10bit data."""
+
+    cdef:
+        unsigned short d_10bit, pulse, train, x, raw_val
+
+    for pulse in range(data.shape[0]):
+        cell = pulse % 2
+        for x in range(data.shape[1]):
+            raw_val = data[pulse, x]
+            d_10bit = lut[cell, raw_val, x]
+            data_10bit[pulse, x] = <float>d_10bit
+
+
 @boundscheck(False)
 @wraparound(False)
 @cdivision(True)
 def correct_train(
-    unsigned short[:, :] data,
+    float[:, :] data,
    unsigned char[:, :] gain,
-    unsigned short[:, :, :] lut,
-    float[:, :, :] offset,
-    float[:, :, :] relgain,
-    float[:, :] corr_data,
+    float[:, :, :] offset_map,
+    float[:, :, :] relgain_map,
+    unsigned short short apply_relgain = 1,
 ):
-    """
-    correct Gotthard2 raw data.
-    1. Convert 12 bit data to 10 bit data.
-    2. Offset correction.
-    3. Gain correction.
+    """Correct Gotthard2 raw data.
+    1. Offset correction.
+    2. Gain correction.
    """

-    cdef :
-        cdef float corr_val
-        cdef unsigned short d_10bit, pulse, train, x, raw_val, g
-    
+    cdef:
+        float raw_val
+        unsigned short d_10bit, pulse, train, x, g
+
    for pulse in range(data.shape[0]):
        cell = pulse % 2
        for x in range(data.shape[1]):
@@ -34,8 +50,7 @@ def correct_train(
            if g == 3:
                g = 2
            raw_val = data[pulse, x]
-            d_10bit = lut[cell, raw_val, x]
-            corr_val = <float>d_10bit
-            corr_val -= offset[g, cell, x]
-            corr_val /= relgain[g, cell, x]
-            corr_data[pulse, x] = corr_val
+            raw_val -= offset_map[g, cell, x]
+            if apply_relgain == 1:
+                raw_val /= relgain_map[g, cell, x]
+            data[pulse, x] = raw_val