Plot random images by one from a train in dynamic ff correction notebook

fa21baa5 · Egor Sobolev · 7435e283 · fa21baa5
Commit fa21baa5 authored 10 months ago by Egor Sobolev
--- a/notebooks/DynamicFF/Correct_DynamicFF_NBC.ipynb
+++ b/notebooks/DynamicFF/Correct_DynamicFF_NBC.ipynb
@@ -112,7 +112,7 @@
    "    instrument_source_name = detector.instrument_source(module)\n",
    "    corrected_source_name = detector.corrected_source(module)\n",
    "    print('-', da, db_module, module, instrument_source_name)\n",
-    "    \n",
+    "\n",
    "    modules[da] = dict(\n",
    "        db_module=db_module,\n",
    "        module=module,\n",
@@ -183,7 +183,7 @@
    "# Output Folder Creation:\n",
    "os.makedirs(out_folder, exist_ok=True)\n",
    "\n",
-    "report = []\n",
+    "report = {}\n",
    "for file_da, file_modules in aggregators.items():\n",
    "    dc = RunDirectory(f\"{in_folder}/r{run:04d}\", f\"RAW-R{run:04d}-{file_da}-S*.h5\")\n",
    "\n",
@@ -208,6 +208,7 @@
    "    ts = dc.select_trains(by_id[train_ids]).train_timestamps().astype(np.uint64)\n",
    "\n",
    "    # correct and write sequence files\n",
+    "    seq_report = {}\n",
    "    for seq_id, train_mask in sequence_trains(train_ids, 200):\n",
    "        step_timer.start()\n",
    "        print('* sequence', seq_id)\n",
@@ -225,7 +226,6 @@
    "        f.create_index(seq_train_ids, timestamps=seq_timestamps)\n",
    "\n",
    "        # create file structure\n",
-    "        seq_report = {}\n",
    "        file_datasets = {}\n",
    "        for da in process_modules:\n",
    "            instrument_source = modules[da][\"raw_source_name\"]\n",
@@ -278,11 +278,16 @@
    "                keydata, ds = file_datasets[da][key]\n",
    "                ds[:] = keydata.ndarray()\n",
    "\n",
-    "            seq_report[da] = (raw_images[0, 0], corrected_images[:20, 0])\n",
+    "            rep_rix = np.random.randint(0, raw_images.shape[1])\n",
+    "            rep_cix = np.random.randint(0, corrected_images.shape[1],\n",
+    "                                       size=min(20, corrected_images.shape[0]))\n",
+    "            da_report = seq_report.setdefault(da, [])\n",
+    "            da_report.append((raw_images[0, rep_rix],\n",
+    "                              corrected_images[range(len(rep_cix)), rep_cix]))\n",
    "            step_timer.done_step(\"Correct flat-field\")\n",
    "\n",
    "        f.close()\n",
-    "        report.append(seq_report)"
+    "    report.update(seq_report)"
   ]
  },
  {
@@ -293,15 +298,23 @@
   "source": [
    "step_timer.start()\n",
    "if report:\n",
-    "    for da, (raw_image, corrected_images) in report[0].items():\n",
+    "    for da, da_report in report.items():\n",
+    "        if len(da_report) > 0:\n",
+    "            raw_images, corrected_images = zip(*da_report)\n",
+    "            raw_images = np.stack(raw_images)\n",
+    "            corrected_images = np.concatenate(corrected_images, axis=0)\n",
+    "        else:\n",
+    "            raw_images, corrected_images = da_report\n",
+    "            raw_images = raw_images[None, ...]\n",
+    "\n",
    "        source = modules[da][\"raw_source_name\"]\n",
    "        display(Markdown(f\"## {source}\"))\n",
    "\n",
-    "        display(Markdown(\"### The first raw image\"))\n",
-    "        plot_camera_image(raw_images[0, 0])\n",
+    "        display(Markdown(\"### The random raw image from the first train\"))\n",
+    "        plot_camera_image(raw_images[0])\n",
    "        plt.show()\n",
    "\n",
-    "        display(Markdown(\"### The first corrected image\"))\n",
+    "        display(Markdown(\"### The random corrected image from the first train\"))\n",
    "        plot_camera_image(corrected_images[0])\n",
    "        plt.show()\n",
    "\n",
@@ -315,7 +328,7 @@
    "                    np.full(\n",
    "                        (min_images - corrected_images.shape[0], *corrected_images.shape[1:]),\n",
    "                        fill_value=np.nan)])\n",
-    "        display(Markdown(\"### The first corrected images in the trains (up to 20)\"))\n",
+    "        display(Markdown(\"### The random corrected images by one from a train (up to 20 first trains)\"))\n",
    "        plot_images(corrected_images, figsize=(13, 8))\n",
    "        plt.show()\n",
    "\n",

 %% Cell type:markdown id: tags:

 # Dynamic Flat-field Offline Correction

 Author: Egor Sobolev

 Offline dynamic flat-field correction

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202430/p900425/raw"  # input folder, required
 out_folder ="/gpfs/exfel/exp/SPB/202430/p900425/scratch/proc/r0003"  # output folder, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run = 3  # which run to read data from, required

 # Data files parameters.
 karabo_da = ['-1']  # data aggregators
 karabo_id = "SPB_MIC_HPVX2"  # karabo prefix of Shimadzu HPV-X2 devices

 # Database access parameters.
 cal_db_interface = "tcp://max-exfl-cal001:8021"  # Unused, calibration DB interface to use
 cal_db_timeout = 30000  # Unused, calibration DB timeout
 creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HH:MM:SS.00 e.g. 2019-07-04 11:02:41.00

 # Correction parameters
 n_components = 20  # number of principal components of flat-field to use in correction
 downsample_factors = [1, 1]  # list of downsample factors for each image dimention (y, x)

 num_proc = 32  # number of processes running correction in parallel
 ```

 %% Cell type:code id: tags:

 ``` python
 import os
 import h5py
 import warnings
 from logging import warning

 warnings.filterwarnings('ignore')

 import numpy as np
 import matplotlib.pyplot as plt
 from IPython.display import display, Markdown
 from datetime import datetime

 from extra_data import RunDirectory, by_id

 %matplotlib inline
 from cal_tools.step_timing import StepTimer
 from cal_tools.files import sequence_trains, DataFile
 from cal_tools.tools import calcat_creation_time

 from cal_tools.restful_config import calibration_client, extra_calibration_client
 from cal_tools.calcat_interface2 import CalibrationData
 from cal_tools.shimadzu import ShimadzuHPVX2

 from dynflatfield import (
    DynamicFlatFieldCorrectionCython as DynamicFlatFieldCorrection,
    FlatFieldCorrectionFileProcessor
 )
 from dynflatfield.draw import plot_images, plot_camera_image
 ```

 %% Cell type:code id: tags:

 ``` python
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f"Creation time is {creation_time}")

 extra_calibration_client()  # Configure CalibrationData API.

 cc = calibration_client()
 pdus = cc.get_all_phy_det_units_from_detector({
    "detector_identifier": karabo_id,
    "pdu_snapshot_at": creation_time,
 })
 if not pdus["success"]:
    raise ValueError("Failed to retrieve PDUs")

 detector_info = pdus['data'][0]['detector']
 detector = ShimadzuHPVX2(detector_info["source_name_pattern"])
 index_group = detector.image_index_group
 image_key = detector.image_key

 print(f"Instrument {detector.instrument}")
 print(f"Detector in use is {karabo_id}")

 modules = {}
 for pdu in pdus["data"]:
    db_module = pdu["physical_name"]
    module = pdu["module_number"]
    da = pdu["karabo_da"]
    if karabo_da[0] != "-1" and da not in karabo_da:
        continue

    instrument_source_name = detector.instrument_source(module)
    corrected_source_name = detector.corrected_source(module)
    print('-', da, db_module, module, instrument_source_name)

    modules[da] = dict(
        db_module=db_module,
        module=module,
        raw_source_name=instrument_source_name,
        corrected_source_name=corrected_source_name,
    )

 step_timer = StepTimer()
 ```

 %% Cell type:markdown id: tags:

 # Calibration constants

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 dc = RunDirectory(f"{in_folder}/r{run:04d}")
 conditions = detector.conditions(dc)

 caldata = CalibrationData.from_condition(
    conditions, 'SPB_MIC_HPVX2', event_at=creation_time)

 aggregators = {}
 corrections = {}
 for da in modules:
    file_da, _, _ = da.partition('/')
    aggregators.setdefault(file_da, []).append(da)
    try:
        dark = caldata["Offset", da].ndarray()
        flat = caldata["DynamicFF", da].ndarray()

        components = flat[1:][:n_components]
        flat = flat[0]

        dffc = DynamicFlatFieldCorrection.from_constants(
            dark, flat, components, downsample_factors)

        corrections[da] = dffc
    except (KeyError, FileNotFoundError):
        # missed constants are reported later
        pass

 step_timer.done_step("Load calibration constants")
 ```

 %% Cell type:markdown id: tags:

 # Correction

 %% Cell type:code id: tags:

 ``` python
 # Output Folder Creation:
 os.makedirs(out_folder, exist_ok=True)

-report = []
+report = {}
 for file_da, file_modules in aggregators.items():
    dc = RunDirectory(f"{in_folder}/r{run:04d}", f"RAW-R{run:04d}-{file_da}-S*.h5")

    # build train IDs
    train_ids = set()
    process_modules = []
    for da in file_modules:
        instrument_source = modules[da]["raw_source_name"]
        if instrument_source not in dc.all_sources:
            print(f"Source {instrument_source} for module {da} is missed")
            continue
        if da not in corrections:
            warning(f"Constants are not found for module {da}. "
                    "The module will not calibrated")
            continue

        keydata = dc[instrument_source][image_key].drop_empty_trains()
        train_ids.update(keydata.train_ids)
        process_modules.append(da)

    train_ids = np.array(sorted(train_ids))
    ts = dc.select_trains(by_id[train_ids]).train_timestamps().astype(np.uint64)

    # correct and write sequence files
+    seq_report = {}
    for seq_id, train_mask in sequence_trains(train_ids, 200):
        step_timer.start()
        print('* sequence', seq_id)
        seq_train_ids = train_ids[train_mask]
        seq_timestamps = ts[train_mask]
        dc_seq = dc.select_trains(by_id[seq_train_ids])
        ntrains = len(seq_train_ids)

        # create output file
        channels = [f"{modules[da]['corrected_source_name']}/{index_group}"
                    for da in process_modules]

        f = DataFile.from_details(out_folder, file_da, run, seq_id)
        f.create_metadata(like=dc, instrument_channels=channels)
        f.create_index(seq_train_ids, timestamps=seq_timestamps)

        # create file structure
-        seq_report = {}
        file_datasets = {}
        for da in process_modules:
            instrument_source = modules[da]["raw_source_name"]
            keydata = dc_seq[instrument_source][image_key].drop_empty_trains()
            count = keydata.data_counts(labelled=False)
            i = np.flatnonzero(count)
            raw_images = keydata.select_trains(np.s_[i]).ndarray()

            # not pulse resolved
            shape = keydata.shape
            count = np.in1d(seq_train_ids, keydata.train_ids).astype(int)

            corrected_source = modules[da]["corrected_source_name"]
            src = f.create_instrument_source(corrected_source)
            src.create_index(**{index_group: count})

            # create key for images
            ds_data = src.create_key(image_key, shape=shape, dtype=np.float32)
            module_datasets = {image_key: ds_data}

            # create keys for image parameters
            for key in detector.copy_keys:
                keydata = dc_seq[instrument_source][key].drop_empty_trains()
                module_datasets[key] = (keydata, src.create_key(
                    key, shape=keydata.shape, dtype=keydata.dtype))

            file_datasets[da] = module_datasets

        step_timer.done_step("Create output file")

        # correct and write data to file
        for da in process_modules:
            step_timer.start()
            dc_seq = dc.select_trains(by_id[seq_train_ids])

            dffc = corrections[da]
            instrument_source = modules[da]["raw_source_name"]
            proc = FlatFieldCorrectionFileProcessor(dffc, num_proc, instrument_source, image_key)

            proc.start_workers()
            proc.run(dc_seq)
            proc.join_workers()

            # not pulse resolved
            corrected_images = np.stack(proc.rdr.results, 0)
            file_datasets[da][image_key][:] = corrected_images

            # copy image parameters
            for key in detector.copy_keys:
                keydata, ds = file_datasets[da][key]
                ds[:] = keydata.ndarray()

-            seq_report[da] = (raw_images[0, 0], corrected_images[:20, 0])
+            rep_rix = np.random.randint(0, raw_images.shape[1])
+            rep_cix = np.random.randint(0, corrected_images.shape[1],
+                                       size=min(20, corrected_images.shape[0]))
+            da_report = seq_report.setdefault(da, [])
+            da_report.append((raw_images[0, rep_rix],
+                              corrected_images[range(len(rep_cix)), rep_cix]))
            step_timer.done_step("Correct flat-field")

        f.close()
-        report.append(seq_report)
+    report.update(seq_report)
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 if report:
-    for da, (raw_image, corrected_images) in report[0].items():
+    for da, da_report in report.items():
+        if len(da_report) > 0:
+            raw_images, corrected_images = zip(*da_report)
+            raw_images = np.stack(raw_images)
+            corrected_images = np.concatenate(corrected_images, axis=0)
+        else:
+            raw_images, corrected_images = da_report
+            raw_images = raw_images[None, ...]
+
        source = modules[da]["raw_source_name"]
        display(Markdown(f"## {source}"))

-        display(Markdown("### The first raw image"))
-        plot_camera_image(raw_images[0, 0])
+        display(Markdown("### The random raw image from the first train"))
+        plot_camera_image(raw_images[0])
        plt.show()

-        display(Markdown("### The first corrected image"))
+        display(Markdown("### The random corrected image from the first train"))
        plot_camera_image(corrected_images[0])
        plt.show()

        min_images = 5
        if corrected_images.shape[0] < min_images:
            # Update corrected_images to avoid less axes
            # array dimension than expected in plot_images.
            corrected_images = np.concatenate(
                [
                    corrected_images,
                    np.full(
                        (min_images - corrected_images.shape[0], *corrected_images.shape[1:]),
                        fill_value=np.nan)])
-        display(Markdown("### The first corrected images in the trains (up to 20)"))
+        display(Markdown("### The random corrected images by one from a train (up to 20 first trains)"))
        plot_images(corrected_images, figsize=(13, 8))
        plt.show()

 step_timer.done_step("Draw images")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```