Change name of dynamic flat-field correction package

ffcdffb9 · Egor Sobolev · 7daba04f · ffcdffb9 · ffcdffb9
Commit ffcdffb9 authored 1 year ago by Egor Sobolev
--- a/notebooks/DynamicFF/Characterize_DynamicFF_NBC.ipynb
+++ b/notebooks/DynamicFF/Characterize_DynamicFF_NBC.ipynb
@@ -78,8 +78,8 @@
    "    run_prop_seq_from_path,\n",
    ")\n",
    "\n",
-    "import dffc\n",
+    "import dynflatfield as dffc\n",
-    "from dffc.draw import plot_images, plot_camera_image"
+    "from dynflatfield.draw import plot_images, plot_camera_image"
   ]
  },
  {

 %% Cell type:markdown id: tags:
 # Characterization of dark and flat field for Dynamic Flat Field correction
 Author: Egor Sobolev
 Computation of dark offsets and flat-field principal components
 %% Cell type:code id: tags:
 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202430/p900425/raw"  # input folder, required
 out_folder = '/gpfs/exfel/data/scratch/esobolev/test/shimadzu'  # output folder, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 dark_run = 1 # which run to read data from, required
 flat_run = 2 # which run to read
 # Data files parameters.
 karabo_da = ['HPVX01/1', 'HPVX01/2'] # data aggregators
 karabo_id = "SPB_EHD_MIC" # karabo prefix of Shimadzu HPV-X2 devices
 #receiver_id = "PNCCD_FMT-0" # inset for receiver devices
 #path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5'  # the template to use to access data
 instrument_source_template = 'SPB_EHD_MIC/CAM/HPVX2_{module}:daqOutput'  # data source path in h5file.
 #instrument_source_template = 'SPB_EHD_HPVX2_{module}/CAM/CAMERA:daqOutput'
 image_key = "data.image.pixels"  # image data key in Karabo or exdf notation
 db_module_template = "Shimadzu_HPVX2_{}"
 # Database access parameters.
 use_dir_creation_date = True  # use dir creation date as data production reference date
 cal_db_interface = "tcp://max-exfl-cal001:8021"  # calibration DB interface to use
 cal_db_timeout = 300000 # timeout on caldb requests
 db_output = False # if True, the notebook sends dark constants to the calibration database
 local_output = True # if True, the notebook saves dark constants locally
 creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC.00 e.g. 2019-07-04 11:02:41.00
 n_components = 50  # Number of principal components to compute
 ```
 %% Cell type:code id: tags:
 ``` python
 import datetime
 import os
 import warnings
 warnings.filterwarnings('ignore')
 import time
 import numpy as np
 import matplotlib.pyplot as plt
 from IPython.display import display, Markdown
 from extra_data import RunDirectory
 %matplotlib inline
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    get_dir_creation_date,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    save_const_to_h5,
    save_dict_to_hdf5,
    send_to_db,
    run_prop_seq_from_path,
 )
-import dffc
+import dynflatfield as dffc
-from dffc.draw import plot_images, plot_camera_image
+from dynflatfield.draw import plot_images, plot_camera_image
 ```
 %% Cell type:code id: tags:
 ``` python
 creation_time=None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, max(dark_run, flat_run))
 print(f"Using {creation_time} as creation time of constant.")
 run, prop, seq = run_prop_seq_from_path(in_folder)
 file_loc = f'proposal: {prop}, runs: {dark_run} {flat_run}'
 # Read report path and create file location tuple to add with the injection
 file_loc = f"proposal:{prop} runs:{dark_run} {flat_run}"
 report = get_report(metadata_folder)
 cal_db_interface = get_random_db_interface(cal_db_interface)
 print(f'Calibration database interface: {cal_db_interface}')
 print()
 instrument, part, component = karabo_id.split('_')
 sources = {}
 source_to_db = {}
 print("Sources:")
 for da in karabo_da:
    aggr, _, module = da.partition('/')
    source_name = instrument_source_template.format(
        instrument=instrument, part=part, component=component,
        module=module
    )
    sources[source_name] = aggr
    source_to_db[source_name] = db_module_template.format(module)
    print('-', source_name)
 print()
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 step_timer = StepTimer()
 constants = {}
 ```
 %% Cell type:markdown id: tags:
 # Offset map
 %% Cell type:code id: tags:
 ``` python
 for source, aggr in sources.items():
    display(Markdown(f"## {source}"))
    # read
    step_timer.start()
    dark_dc = RunDirectory(f"{in_folder}/r{dark_run:04d}",
                           include=f"RAW-R{dark_run:04d}-{aggr}-S*.h5")
    dark_dc = dark_dc.select([(source, image_key)])
    key_data = dark_dc[source][image_key]
    images_dark = key_data.ndarray()
    ntrain, npulse, ny, nx = images_dark.shape
    print(f"N image: {ntrain * npulse} (ntrain: {ntrain}, npulse: {npulse})")
    print(f"Image size: {ny} x {nx} px")
    step_timer.done_step("Read dark images")
    # process
    step_timer.start()
    dark = dffc.process_dark(images_dark)
    module_constants = constants.setdefault(source, {})
    module_constants["Offset"] = dark
    step_timer.done_step("Process dark images")
    display()
    # draw plots
    step_timer.start()
    plot_camera_image(dark)
    plt.show()
    step_timer.done_step("Draw offsets")
 ```
 %% Cell type:markdown id: tags:
 # Flat-field PCA decomposition
 %% Cell type:code id: tags:
 ``` python
 for source, aggr in sources.items():
    display(Markdown(f"## {source}"))
    # read
    step_timer.start()
    flat_dc = RunDirectory(f"{in_folder}/r{flat_run:04d}",
                           include=f"RAW-R{flat_run:04d}-{aggr}-S*.h5")
    flat_dc = flat_dc.select([(source, image_key)])
    key_data = flat_dc[source][image_key]
    images_flat = key_data.ndarray()
    ntrain, npulse, ny, nx = images_flat.shape
    print(f"N image: {ntrain * npulse} (ntrain: {ntrain}, npulse: {npulse})")
    print(f"Image size: {ny} x {nx} px")
    step_timer.done_step("Read flat-field images")
    # process
    step_timer.start()
    flat, components, explained_variance_ratio = dffc.process_flat(
        images_flat, dark, n_components)
    module_constants = constants.setdefault(source, {})
    module_constants["DynamicFF"] = np.concatenate([flat[None, ...], components])
    step_timer.done_step("Process flat-field images")
    # draw plots
    step_timer.start()
    display(Markdown("### Average flat-field"))
    plot_camera_image(flat)
    plt.show()
    display(Markdown("### Explained variance ratio"))
    fig, ax = plt.subplots(1, 1, figsize=(10,4), tight_layout=True)
    ax.semilogy(explained_variance_ratio, 'o')
    ax.set_xticks(np.arange(len(explained_variance_ratio)))
    ax.set_xlabel("Component no.")
    ax.set_ylabel("Variance fraction")
    plt.show()
    display(Markdown("### The first principal components (up to 20)"))
    plot_images(components[:20], figsize=(13, 8))
    plt.show()
    step_timer.done_step("Draw flat-field")
 ```
 %% Cell type:markdown id: tags:
 ## Calibration constants
 %% Cell type:code id: tags:
 ``` python
 step_timer.start()
 # Output Folder Creation:
 os.makedirs(out_folder, exist_ok=True)
 for source, module_constants  in constants.items():
    for constant_name, data in module_constants.items():
        db_module = source_to_db[source]
        conditions = {
            'Frame Size': {'value': 1.0},
        }
        data_to_store = {
            'condition': conditions,
            'db_module': db_module,
            'karabo_id': karabo_id,
            'constant': constant_name,
            'data': data,
            'creation_time': creation_time.replace(microsecond=0),
            'file_loc': file_loc,
            'report': report,
        }
        ofile = f"{out_folder}/const_{constant_name}_{db_module}.h5"
        if os.path.isfile(ofile):
            print(f'File {ofile} already exists and will be overwritten')
        save_dict_to_hdf5(data_to_store, ofile)
 step_timer.done_step("Storing calibration constants")
 ```
 %% Cell type:code id: tags:
 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

--- a/notebooks/DynamicFF/Correct_DynamicFF_NBC.ipynb
+++ b/notebooks/DynamicFF/Correct_DynamicFF_NBC.ipynb
@@ -67,9 +67,11 @@
    "from cal_tools.step_timing import StepTimer\n",
    "from cal_tools.files import sequence_trains, DataFile\n",
    "\n",
-    "from dffc.correction import DynamicFlatFieldCorrectionCython as DynamicFlatFieldCorrection\n",
+    "from dynflatfield import (\n",
-    "from dffc.offline import FlatFieldCorrectionFileProcessor\n",
+    "    DynamicFlatFieldCorrectionCython as DynamicFlatFieldCorrection,\n",
-    "from dffc.draw import plot_images, plot_camera_image"
+    "    FlatFieldCorrectionFileProcessor\n",
+    ")\n",
+    "from dynflatfield.draw import plot_images, plot_camera_image"
   ]
  },
  {

 %% Cell type:markdown id: tags:
 # Dynamic Flat-field Offline Correction
 Author: Egor Sobolev
 Offline dynamic flat-field correction
 %% Cell type:code id: tags:
 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202430/p900425/raw"  # input folder, required
 out_folder = '/gpfs/exfel/data/scratch/esobolev/test/shimadzu'  # output folder, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run = 3  # which run to read data from, required
 # Data files parameters.
 karabo_da = ['HPVX01/1', 'HPVX01/2']  # data aggregators
 karabo_id = "SPB_EHD_MIC"  # karabo prefix of Shimadzu HPV-X2 devices
 #receiver_id = "PNCCD_FMT-0" # inset for receiver devices
 #path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5'  # the template to use to access data
 instrument_source_template = 'SPB_EHD_MIC/CAM/HPVX2_{module}:daqOutput'  # data source path in h5file.
 image_key = "data.image.pixels"  # image data key in Karabo or exdf notation
 # Database access parameters.
 use_dir_creation_date = True  # use dir creation date as data production reference date
 cal_db_interface = "tcp://max-exfl-cal001:8021"  # calibration DB interface to use
 cal_db_timeout = 300000  # timeout on caldb requests
 db_output = False  # if True, the notebook sends dark constants to the calibration database
 local_output = True  # if True, the notebook saves dark constants locally
 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC.00 e.g. 2019-07-04 11:02:41.00
 n_components = 20  # number of principal components of flat-field to use in correction
 downsample_factors = [1, 1]  # list of downsample factors for each image dimention (y, x)
 constants_folder = "/gpfs/exfel/data/scratch/esobolev/test/shimadzu"
 db_module_template = "Shimadzu_HPVX2_{}"
 num_proc = 32  # number of processes running correction in parallel
 corrected_source_template = 'SPB_EHD_MIC/CORR/HPVX2_{module}:output'  # data source path in h5file.
 ```
 %% Cell type:code id: tags:
 ``` python
 import os
 import h5py
 import numpy as np
 import matplotlib.pyplot as plt
 from IPython.display import display, Markdown
 from extra_data import RunDirectory, by_id
 %matplotlib inline
 from cal_tools.step_timing import StepTimer
 from cal_tools.files import sequence_trains, DataFile
-from dffc.correction import DynamicFlatFieldCorrectionCython as DynamicFlatFieldCorrection
+from dynflatfield import (
-from dffc.offline import FlatFieldCorrectionFileProcessor
+    DynamicFlatFieldCorrectionCython as DynamicFlatFieldCorrection,
-from dffc.draw import plot_images, plot_camera_image
+    FlatFieldCorrectionFileProcessor
+)
+from dynflatfield.draw import plot_images, plot_camera_image
 ```
 %% Cell type:code id: tags:
 ``` python
 index_group = image_key.partition('.')[0]
 instrument, part, component = karabo_id.split('_')
 aggregators = {}
 sources = {}
 source_to_db = {}
 print("Sources:")
 for da in karabo_da:
    aggr, _, module = da.partition('/')
    instrument_source_name = instrument_source_template.format(
        instrument=instrument, part=part, component=component,
        module=module
    )
    corrected_source_name = corrected_source_template.format(
        instrument=instrument, part=part, component=component,
        module=module
    )
    aggregators.setdefault(aggr, []).append(
        (instrument_source_name, corrected_source_name))
    sources[instrument_source_name] = aggr
    source_to_db[instrument_source_name] = db_module_template.format(module)
    print('-', instrument_source_name)
 print()
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 step_timer = StepTimer()
 ```
 %% Cell type:markdown id: tags:
 # Calibration constants
 %% Cell type:code id: tags:
 ``` python
 requested_conditions = {
    "frame_size": 1.0,
 }
 step_timer.start()
 corrections = {}
 constant_types = ["Offset", "DynamicFF"]
 for source, db_module in source_to_db.items():
    constants = {}
    for constant_name in constant_types:
        const_file = f"{constants_folder}/const_{constant_name}_{db_module}.h5"
        if not os.path.isfile(const_file):
            raise FileNotFoundError(f"{constant_name} constants are not found for {karabo_id}.")
        with h5py.File(const_file, 'r') as f:
            conditions = dict(
                frame_size=int(f["condition/Frame Size/value"][()])
            )
            data = f["data"][:]
            data_creation_time = f["creation_time"][()].decode()
        if not all(conditions[key] == value for key, value in requested_conditions.items()):
            raise ValueError("Conditions for {constant_name} are not match")
        print(f"{source} {db_module} {constant_name}: {data_creation_time}")
        constants[constant_name] = data
    dark = constants["Offset"]
    flat = constants["DynamicFF"][0]
    components = constants["DynamicFF"][1:][:n_components]
    dffc = DynamicFlatFieldCorrection.from_constants(
        dark, flat, components, downsample_factors)
    corrections[source] = dffc
 step_timer.done_step("Load calibration constants")
 ```
 %% Cell type:markdown id: tags:
 # Correction
 %% Cell type:code id: tags:
 ``` python
 report = []
 for aggr, sources in aggregators.items():
    dc = RunDirectory(f"{in_folder}/r{run:04d}", f"RAW-R{run:04d}-{aggr}-S*.h5")
    train_ids = set()
    keydata_cache = {}
    for instrument_source, corrected_source in sources:
        keydata = dc[instrument_source][image_key].drop_empty_trains()
        train_ids.update(keydata.train_ids)
        keydata_cache[instrument_source] = keydata
    train_ids = np.array(sorted(train_ids))
    ts = dc.select_trains(by_id[train_ids]).train_timestamps().astype(np.uint64)
    for seq_id, train_mask in sequence_trains(train_ids, 200):
        step_timer.start()
        print('* sequience', seq_id)
        seq_train_ids = train_ids[train_mask]
        seq_timestamps = ts[train_mask]
        dc_seq = dc.select_trains(by_id[seq_train_ids])
        ntrains = len(seq_train_ids)
        # create output file
        channels = [f"{s[1]}/{index_group}" for s in sources]
        f = DataFile.from_details(out_folder, aggr, run, seq_id)
        f.create_metadata(like=dc, instrument_channels=channels)
        f.create_index(seq_train_ids, timestamps=seq_timestamps)
        seq_report = {}
        image_datasets = {}
        for instrument_source, corrected_source in sources:
            keydata = dc_seq[instrument_source][image_key].drop_empty_trains()
            count = keydata.data_counts()
            i = np.flatnonzero(count.values)
            raw_images = keydata.select_trains(np.s_[i]).ndarray()
            # not pulse resolved
            shape = keydata.shape
            count = np.in1d(seq_train_ids, keydata.train_ids).astype(int)
            src = f.create_instrument_source(corrected_source)
            src.create_index(index_group=count)
            ds_data = src.create_key(image_key, shape=shape, dtype=np.float32)
            image_datasets[corrected_source] = ds_data
        step_timer.done_step("Create output file")
        for instrument_source, corrected_source in sources:
            step_timer.start()
            dc_seq = dc.select_trains(by_id[seq_train_ids])
            dffc = corrections[instrument_source]
            proc = FlatFieldCorrectionFileProcessor(dffc, num_proc, instrument_source, image_key)
            proc.start_workers()
            proc.run(dc_seq)
            proc.join_workers()
            # not pulse resolved
            corrected_images = np.stack(proc.rdr.results, 0)
            image_datasets[corrected_source][:] = corrected_images
            seq_report[instrument_source] = (raw_images[0, 0], corrected_images[:20, 0])
            step_timer.done_step("Correct flat-field")
        f.close()
        report.append(seq_report)
 ```
 %% Cell type:code id: tags:
 ``` python
 step_timer.start()
 for source, (raw_image, corrected_images) in report[0].items():
    display(Markdown(f"# {source}"))
    display(Markdown("## The first raw image"))
    plot_camera_image(raw_images[0, 0])
    plt.show()
    display(Markdown("## The first corrected image"))
    plot_camera_image(corrected_images[0])
    plt.show()
    display(Markdown("## The first corrected images in the trains (up to 20)"))
    plot_images(corrected_images, figsize=(13, 8))
    plt.show()
 step_timer.done_step("Draw images")
 ```
 %% Cell type:code id: tags:
 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```