Skip to content
Snippets Groups Projects
Commit baf26468 authored by Thomas Kluyver's avatar Thomas Kluyver
Browse files

Fix name of index group for corrected Shimadzu files

parent 71673f09
No related branches found
No related tags found
1 merge request!997[Shimadzu] [CORRECT] Fix name of index group in output files
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# Dynamic Flat-field Offline Correction # Dynamic Flat-field Offline Correction
Author: Egor Sobolev Author: Egor Sobolev
Offline dynamic flat-field correction Offline dynamic flat-field correction
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
in_folder = "/gpfs/exfel/exp/SPB/202430/p900425/raw" # input folder, required in_folder = "/gpfs/exfel/exp/SPB/202430/p900425/raw" # input folder, required
out_folder ="/gpfs/exfel/exp/SPB/202430/p900425/scratch/proc/r0003" # output folder, required out_folder ="/gpfs/exfel/exp/SPB/202430/p900425/scratch/proc/r0003" # output folder, required
metadata_folder = "" # Directory containing calibration_metadata.yml when run by xfel-calibrate metadata_folder = "" # Directory containing calibration_metadata.yml when run by xfel-calibrate
run = 3 # which run to read data from, required run = 3 # which run to read data from, required
# Data files parameters. # Data files parameters.
karabo_da = ['-1'] # data aggregators karabo_da = ['-1'] # data aggregators
karabo_id = "SPB_MIC_HPVX2" # karabo prefix of Shimadzu HPV-X2 devices karabo_id = "SPB_MIC_HPVX2" # karabo prefix of Shimadzu HPV-X2 devices
# Database access parameters. # Database access parameters.
cal_db_interface = "tcp://max-exfl-cal001:8021" # Unused, calibration DB interface to use cal_db_interface = "tcp://max-exfl-cal001:8021" # Unused, calibration DB interface to use
cal_db_timeout = 30000 # Unused, calibration DB timeout cal_db_timeout = 30000 # Unused, calibration DB timeout
# Correction parameters # Correction parameters
n_components = 20 # number of principal components of flat-field to use in correction n_components = 20 # number of principal components of flat-field to use in correction
downsample_factors = [1, 1] # list of downsample factors for each image dimention (y, x) downsample_factors = [1, 1] # list of downsample factors for each image dimention (y, x)
num_proc = 32 # number of processes running correction in parallel num_proc = 32 # number of processes running correction in parallel
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
import os import os
import h5py import h5py
import warnings import warnings
from logging import warning from logging import warning
warnings.filterwarnings('ignore') warnings.filterwarnings('ignore')
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from IPython.display import display, Markdown from IPython.display import display, Markdown
from datetime import datetime from datetime import datetime
from extra_data import RunDirectory, by_id from extra_data import RunDirectory, by_id
%matplotlib inline %matplotlib inline
from cal_tools.step_timing import StepTimer from cal_tools.step_timing import StepTimer
from cal_tools.files import sequence_trains, DataFile from cal_tools.files import sequence_trains, DataFile
from cal_tools.tools import get_dir_creation_date from cal_tools.tools import get_dir_creation_date
from cal_tools.restful_config import calibration_client, extra_calibration_client from cal_tools.restful_config import calibration_client, extra_calibration_client
from cal_tools.calcat_interface2 import CalibrationData from cal_tools.calcat_interface2 import CalibrationData
from cal_tools.shimadzu import ShimadzuHPVX2 from cal_tools.shimadzu import ShimadzuHPVX2
from dynflatfield import ( from dynflatfield import (
DynamicFlatFieldCorrectionCython as DynamicFlatFieldCorrection, DynamicFlatFieldCorrectionCython as DynamicFlatFieldCorrection,
FlatFieldCorrectionFileProcessor FlatFieldCorrectionFileProcessor
) )
from dynflatfield.draw import plot_images, plot_camera_image from dynflatfield.draw import plot_images, plot_camera_image
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
creation_time = get_dir_creation_date(in_folder, run) creation_time = get_dir_creation_date(in_folder, run)
print(f"Creation time is {creation_time}") print(f"Creation time is {creation_time}")
extra_calibration_client() # Configure CalibrationData API. extra_calibration_client() # Configure CalibrationData API.
cc = calibration_client() cc = calibration_client()
pdus = cc.get_all_phy_det_units_from_detector( pdus = cc.get_all_phy_det_units_from_detector(
{"detector_identifier": karabo_id}) # TODO: Use creation_time for snapshot_at {"detector_identifier": karabo_id}) # TODO: Use creation_time for snapshot_at
if not pdus["success"]: if not pdus["success"]:
raise ValueError("Failed to retrieve PDUs") raise ValueError("Failed to retrieve PDUs")
detector_info = pdus['data'][0]['detector'] detector_info = pdus['data'][0]['detector']
detector = ShimadzuHPVX2(detector_info["source_name_pattern"]) detector = ShimadzuHPVX2(detector_info["source_name_pattern"])
index_group = detector.image_index_group index_group = detector.image_index_group
image_key = detector.image_key image_key = detector.image_key
print(f"Instrument {detector.instrument}") print(f"Instrument {detector.instrument}")
print(f"Detector in use is {karabo_id}") print(f"Detector in use is {karabo_id}")
modules = {} modules = {}
for pdu in pdus["data"]: for pdu in pdus["data"]:
db_module = pdu["physical_name"] db_module = pdu["physical_name"]
module = pdu["module_number"] module = pdu["module_number"]
da = pdu["karabo_da"] da = pdu["karabo_da"]
if karabo_da[0] != "-1" and da not in karabo_da: if karabo_da[0] != "-1" and da not in karabo_da:
continue continue
instrument_source_name = detector.instrument_source(module) instrument_source_name = detector.instrument_source(module)
corrected_source_name = detector.corrected_source(module) corrected_source_name = detector.corrected_source(module)
print('-', da, db_module, module, instrument_source_name) print('-', da, db_module, module, instrument_source_name)
modules[da] = dict( modules[da] = dict(
db_module=db_module, db_module=db_module,
module=module, module=module,
raw_source_name=instrument_source_name, raw_source_name=instrument_source_name,
corrected_source_name=corrected_source_name, corrected_source_name=corrected_source_name,
) )
step_timer = StepTimer() step_timer = StepTimer()
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# Calibration constants # Calibration constants
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
step_timer.start() step_timer.start()
dc = RunDirectory(f"{in_folder}/r{run:04d}") dc = RunDirectory(f"{in_folder}/r{run:04d}")
conditions = detector.conditions(dc) conditions = detector.conditions(dc)
caldata = CalibrationData.from_condition( caldata = CalibrationData.from_condition(
conditions, 'SPB_MIC_HPVX2', event_at=creation_time) conditions, 'SPB_MIC_HPVX2', event_at=creation_time)
aggregators = {} aggregators = {}
corrections = {} corrections = {}
for da in modules: for da in modules:
try: try:
dark = caldata["Offset", da].ndarray() dark = caldata["Offset", da].ndarray()
flat = caldata["DynamicFF", da].ndarray() flat = caldata["DynamicFF", da].ndarray()
components = flat[1:][:n_components] components = flat[1:][:n_components]
flat = flat[0] flat = flat[0]
dffc = DynamicFlatFieldCorrection.from_constants( dffc = DynamicFlatFieldCorrection.from_constants(
dark, flat, components, downsample_factors) dark, flat, components, downsample_factors)
corrections[da] = dffc corrections[da] = dffc
file_da, _, _ = da.partition('/') file_da, _, _ = da.partition('/')
aggregators.setdefault(file_da, []).append(da) aggregators.setdefault(file_da, []).append(da)
except (KeyError, FileNotFoundError): except (KeyError, FileNotFoundError):
warning(f"Constants are not found for module {da}. " warning(f"Constants are not found for module {da}. "
"The module will not calibrated") "The module will not calibrated")
step_timer.done_step("Load calibration constants") step_timer.done_step("Load calibration constants")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# Correction # Correction
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Output Folder Creation: # Output Folder Creation:
os.makedirs(out_folder, exist_ok=True) os.makedirs(out_folder, exist_ok=True)
report = [] report = []
for file_da, file_modules in aggregators.items(): for file_da, file_modules in aggregators.items():
dc = RunDirectory(f"{in_folder}/r{run:04d}", f"RAW-R{run:04d}-{file_da}-S*.h5") dc = RunDirectory(f"{in_folder}/r{run:04d}", f"RAW-R{run:04d}-{file_da}-S*.h5")
# build train IDs # build train IDs
train_ids = set() train_ids = set()
process_modules = [] process_modules = []
for da in file_modules: for da in file_modules:
instrument_source = modules[da]["raw_source_name"] instrument_source = modules[da]["raw_source_name"]
if instrument_source in dc.all_sources: if instrument_source in dc.all_sources:
keydata = dc[instrument_source][image_key].drop_empty_trains() keydata = dc[instrument_source][image_key].drop_empty_trains()
train_ids.update(keydata.train_ids) train_ids.update(keydata.train_ids)
process_modules.append(da) process_modules.append(da)
else: else:
print(f"Source {instrument_source} for module {da} is missed") print(f"Source {instrument_source} for module {da} is missed")
train_ids = np.array(sorted(train_ids)) train_ids = np.array(sorted(train_ids))
ts = dc.select_trains(by_id[train_ids]).train_timestamps().astype(np.uint64) ts = dc.select_trains(by_id[train_ids]).train_timestamps().astype(np.uint64)
# correct and write sequence files # correct and write sequence files
for seq_id, train_mask in sequence_trains(train_ids, 200): for seq_id, train_mask in sequence_trains(train_ids, 200):
step_timer.start() step_timer.start()
print('* sequence', seq_id) print('* sequence', seq_id)
seq_train_ids = train_ids[train_mask] seq_train_ids = train_ids[train_mask]
seq_timestamps = ts[train_mask] seq_timestamps = ts[train_mask]
dc_seq = dc.select_trains(by_id[seq_train_ids]) dc_seq = dc.select_trains(by_id[seq_train_ids])
ntrains = len(seq_train_ids) ntrains = len(seq_train_ids)
# create output file # create output file
channels = [f"{modules[da]['corrected_source_name']}/{index_group}" channels = [f"{modules[da]['corrected_source_name']}/{index_group}"
for da in process_modules] for da in process_modules]
f = DataFile.from_details(out_folder, file_da, run, seq_id) f = DataFile.from_details(out_folder, file_da, run, seq_id)
f.create_metadata(like=dc, instrument_channels=channels) f.create_metadata(like=dc, instrument_channels=channels)
f.create_index(seq_train_ids, timestamps=seq_timestamps) f.create_index(seq_train_ids, timestamps=seq_timestamps)
# create file structure # create file structure
seq_report = {} seq_report = {}
file_datasets = {} file_datasets = {}
for da in process_modules: for da in process_modules:
instrument_source = modules[da]["raw_source_name"] instrument_source = modules[da]["raw_source_name"]
keydata = dc_seq[instrument_source][image_key].drop_empty_trains() keydata = dc_seq[instrument_source][image_key].drop_empty_trains()
count = keydata.data_counts(labelled=False) count = keydata.data_counts(labelled=False)
i = np.flatnonzero(count) i = np.flatnonzero(count)
raw_images = keydata.select_trains(np.s_[i]).ndarray() raw_images = keydata.select_trains(np.s_[i]).ndarray()
# not pulse resolved # not pulse resolved
shape = keydata.shape shape = keydata.shape
count = np.in1d(seq_train_ids, keydata.train_ids).astype(int) count = np.in1d(seq_train_ids, keydata.train_ids).astype(int)
corrected_source = modules[da]["corrected_source_name"] corrected_source = modules[da]["corrected_source_name"]
src = f.create_instrument_source(corrected_source) src = f.create_instrument_source(corrected_source)
src.create_index(index_group=count) src.create_index(**{index_group: count})
# create key for images # create key for images
ds_data = src.create_key(image_key, shape=shape, dtype=np.float32) ds_data = src.create_key(image_key, shape=shape, dtype=np.float32)
module_datasets = {image_key: ds_data} module_datasets = {image_key: ds_data}
# create keys for image parameters # create keys for image parameters
for key in detector.copy_keys: for key in detector.copy_keys:
keydata = dc_seq[instrument_source][key].drop_empty_trains() keydata = dc_seq[instrument_source][key].drop_empty_trains()
module_datasets[key] = (keydata, src.create_key( module_datasets[key] = (keydata, src.create_key(
key, shape=keydata.shape, dtype=keydata.dtype)) key, shape=keydata.shape, dtype=keydata.dtype))
file_datasets[da] = module_datasets file_datasets[da] = module_datasets
step_timer.done_step("Create output file") step_timer.done_step("Create output file")
# correct and write data to file # correct and write data to file
for da in process_modules: for da in process_modules:
step_timer.start() step_timer.start()
dc_seq = dc.select_trains(by_id[seq_train_ids]) dc_seq = dc.select_trains(by_id[seq_train_ids])
dffc = corrections[da] dffc = corrections[da]
instrument_source = modules[da]["raw_source_name"] instrument_source = modules[da]["raw_source_name"]
proc = FlatFieldCorrectionFileProcessor(dffc, num_proc, instrument_source, image_key) proc = FlatFieldCorrectionFileProcessor(dffc, num_proc, instrument_source, image_key)
proc.start_workers() proc.start_workers()
proc.run(dc_seq) proc.run(dc_seq)
proc.join_workers() proc.join_workers()
# not pulse resolved # not pulse resolved
corrected_images = np.stack(proc.rdr.results, 0) corrected_images = np.stack(proc.rdr.results, 0)
file_datasets[da][image_key][:] = corrected_images file_datasets[da][image_key][:] = corrected_images
# copy image parameters # copy image parameters
for key in detector.copy_keys: for key in detector.copy_keys:
keydata, ds = file_datasets[da][key] keydata, ds = file_datasets[da][key]
ds[:] = keydata.ndarray() ds[:] = keydata.ndarray()
seq_report[da] = (raw_images[0, 0], corrected_images[:20, 0]) seq_report[da] = (raw_images[0, 0], corrected_images[:20, 0])
step_timer.done_step("Correct flat-field") step_timer.done_step("Correct flat-field")
f.close() f.close()
report.append(seq_report) report.append(seq_report)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
step_timer.start() step_timer.start()
if report: if report:
for da, (raw_image, corrected_images) in report[0].items(): for da, (raw_image, corrected_images) in report[0].items():
source = modules[da]["raw_source_name"] source = modules[da]["raw_source_name"]
display(Markdown(f"## {source}")) display(Markdown(f"## {source}"))
display(Markdown("### The first raw image")) display(Markdown("### The first raw image"))
plot_camera_image(raw_images[0, 0]) plot_camera_image(raw_images[0, 0])
plt.show() plt.show()
display(Markdown("### The first corrected image")) display(Markdown("### The first corrected image"))
plot_camera_image(corrected_images[0]) plot_camera_image(corrected_images[0])
plt.show() plt.show()
display(Markdown("### The first corrected images in the trains (up to 20)")) display(Markdown("### The first corrected images in the trains (up to 20)"))
plot_images(corrected_images, figsize=(13, 8)) plot_images(corrected_images, figsize=(13, 8))
plt.show() plt.show()
step_timer.done_step("Draw images") step_timer.done_step("Draw images")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
print(f"Total processing time {step_timer.timespan():.01f} s") print(f"Total processing time {step_timer.timespan():.01f} s")
step_timer.print_summary() step_timer.print_summary()
``` ```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment