Skip to content
Snippets Groups Projects
Commit ffcdffb9 authored by Egor Sobolev's avatar Egor Sobolev
Browse files

Change name of dynamic flat-field correction package

parent 7daba04f
No related branches found
No related tags found
No related merge requests found
This commit is part of merge request !939. Comments created here will be created in the context of that merge request.
%% Cell type:markdown id: tags:
# Characterization of dark and flat field for Dynamic Flat Field correction
Author: Egor Sobolev
Computation of dark offsets and flat-field principal components
%% Cell type:code id: tags:
``` python
in_folder = "/gpfs/exfel/exp/SPB/202430/p900425/raw" # input folder, required
out_folder = '/gpfs/exfel/data/scratch/esobolev/test/shimadzu' # output folder, required
metadata_folder = "" # Directory containing calibration_metadata.yml when run by xfel-calibrate
dark_run = 1 # which run to read data from, required
flat_run = 2 # which run to read
# Data files parameters.
karabo_da = ['HPVX01/1', 'HPVX01/2'] # data aggregators
karabo_id = "SPB_EHD_MIC" # karabo prefix of Shimadzu HPV-X2 devices
#receiver_id = "PNCCD_FMT-0" # inset for receiver devices
#path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5' # the template to use to access data
instrument_source_template = 'SPB_EHD_MIC/CAM/HPVX2_{module}:daqOutput' # data source path in h5file.
#instrument_source_template = 'SPB_EHD_HPVX2_{module}/CAM/CAMERA:daqOutput'
image_key = "data.image.pixels" # image data key in Karabo or exdf notation
db_module_template = "Shimadzu_HPVX2_{}"
# Database access parameters.
use_dir_creation_date = True # use dir creation date as data production reference date
cal_db_interface = "tcp://max-exfl-cal001:8021" # calibration DB interface to use
cal_db_timeout = 300000 # timeout on caldb requests
db_output = False # if True, the notebook sends dark constants to the calibration database
local_output = True # if True, the notebook saves dark constants locally
creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC.00 e.g. 2019-07-04 11:02:41.00
n_components = 50 # Number of principal components to compute
```
%% Cell type:code id: tags:
``` python
import datetime
import os
import warnings
warnings.filterwarnings('ignore')
import time
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, Markdown
from extra_data import RunDirectory
%matplotlib inline
from cal_tools.step_timing import StepTimer
from cal_tools.tools import (
get_dir_creation_date,
get_pdu_from_db,
get_random_db_interface,
get_report,
save_const_to_h5,
save_dict_to_hdf5,
send_to_db,
run_prop_seq_from_path,
)
import dffc
from dffc.draw import plot_images, plot_camera_image
import dynflatfield as dffc
from dynflatfield.draw import plot_images, plot_camera_image
```
%% Cell type:code id: tags:
``` python
creation_time=None
if use_dir_creation_date:
creation_time = get_dir_creation_date(in_folder, max(dark_run, flat_run))
print(f"Using {creation_time} as creation time of constant.")
run, prop, seq = run_prop_seq_from_path(in_folder)
file_loc = f'proposal: {prop}, runs: {dark_run} {flat_run}'
# Read report path and create file location tuple to add with the injection
file_loc = f"proposal:{prop} runs:{dark_run} {flat_run}"
report = get_report(metadata_folder)
cal_db_interface = get_random_db_interface(cal_db_interface)
print(f'Calibration database interface: {cal_db_interface}')
print()
instrument, part, component = karabo_id.split('_')
sources = {}
source_to_db = {}
print("Sources:")
for da in karabo_da:
aggr, _, module = da.partition('/')
source_name = instrument_source_template.format(
instrument=instrument, part=part, component=component,
module=module
)
sources[source_name] = aggr
source_to_db[source_name] = db_module_template.format(module)
print('-', source_name)
print()
print(f"Detector in use is {karabo_id}")
print(f"Instrument {instrument}")
step_timer = StepTimer()
constants = {}
```
%% Cell type:markdown id: tags:
# Offset map
%% Cell type:code id: tags:
``` python
for source, aggr in sources.items():
display(Markdown(f"## {source}"))
# read
step_timer.start()
dark_dc = RunDirectory(f"{in_folder}/r{dark_run:04d}",
include=f"RAW-R{dark_run:04d}-{aggr}-S*.h5")
dark_dc = dark_dc.select([(source, image_key)])
key_data = dark_dc[source][image_key]
images_dark = key_data.ndarray()
ntrain, npulse, ny, nx = images_dark.shape
print(f"N image: {ntrain * npulse} (ntrain: {ntrain}, npulse: {npulse})")
print(f"Image size: {ny} x {nx} px")
step_timer.done_step("Read dark images")
# process
step_timer.start()
dark = dffc.process_dark(images_dark)
module_constants = constants.setdefault(source, {})
module_constants["Offset"] = dark
step_timer.done_step("Process dark images")
display()
# draw plots
step_timer.start()
plot_camera_image(dark)
plt.show()
step_timer.done_step("Draw offsets")
```
%% Cell type:markdown id: tags:
# Flat-field PCA decomposition
%% Cell type:code id: tags:
``` python
for source, aggr in sources.items():
display(Markdown(f"## {source}"))
# read
step_timer.start()
flat_dc = RunDirectory(f"{in_folder}/r{flat_run:04d}",
include=f"RAW-R{flat_run:04d}-{aggr}-S*.h5")
flat_dc = flat_dc.select([(source, image_key)])
key_data = flat_dc[source][image_key]
images_flat = key_data.ndarray()
ntrain, npulse, ny, nx = images_flat.shape
print(f"N image: {ntrain * npulse} (ntrain: {ntrain}, npulse: {npulse})")
print(f"Image size: {ny} x {nx} px")
step_timer.done_step("Read flat-field images")
# process
step_timer.start()
flat, components, explained_variance_ratio = dffc.process_flat(
images_flat, dark, n_components)
module_constants = constants.setdefault(source, {})
module_constants["DynamicFF"] = np.concatenate([flat[None, ...], components])
step_timer.done_step("Process flat-field images")
# draw plots
step_timer.start()
display(Markdown("### Average flat-field"))
plot_camera_image(flat)
plt.show()
display(Markdown("### Explained variance ratio"))
fig, ax = plt.subplots(1, 1, figsize=(10,4), tight_layout=True)
ax.semilogy(explained_variance_ratio, 'o')
ax.set_xticks(np.arange(len(explained_variance_ratio)))
ax.set_xlabel("Component no.")
ax.set_ylabel("Variance fraction")
plt.show()
display(Markdown("### The first principal components (up to 20)"))
plot_images(components[:20], figsize=(13, 8))
plt.show()
step_timer.done_step("Draw flat-field")
```
%% Cell type:markdown id: tags:
## Calibration constants
%% Cell type:code id: tags:
``` python
step_timer.start()
# Output Folder Creation:
os.makedirs(out_folder, exist_ok=True)
for source, module_constants in constants.items():
for constant_name, data in module_constants.items():
db_module = source_to_db[source]
conditions = {
'Frame Size': {'value': 1.0},
}
data_to_store = {
'condition': conditions,
'db_module': db_module,
'karabo_id': karabo_id,
'constant': constant_name,
'data': data,
'creation_time': creation_time.replace(microsecond=0),
'file_loc': file_loc,
'report': report,
}
ofile = f"{out_folder}/const_{constant_name}_{db_module}.h5"
if os.path.isfile(ofile):
print(f'File {ofile} already exists and will be overwritten')
save_dict_to_hdf5(data_to_store, ofile)
step_timer.done_step("Storing calibration constants")
```
%% Cell type:code id: tags:
``` python
print(f"Total processing time {step_timer.timespan():.01f} s")
step_timer.print_summary()
```
......
%% Cell type:markdown id: tags:
# Dynamic Flat-field Offline Correction
Author: Egor Sobolev
Offline dynamic flat-field correction
%% Cell type:code id: tags:
``` python
in_folder = "/gpfs/exfel/exp/SPB/202430/p900425/raw" # input folder, required
out_folder = '/gpfs/exfel/data/scratch/esobolev/test/shimadzu' # output folder, required
metadata_folder = "" # Directory containing calibration_metadata.yml when run by xfel-calibrate
run = 3 # which run to read data from, required
# Data files parameters.
karabo_da = ['HPVX01/1', 'HPVX01/2'] # data aggregators
karabo_id = "SPB_EHD_MIC" # karabo prefix of Shimadzu HPV-X2 devices
#receiver_id = "PNCCD_FMT-0" # inset for receiver devices
#path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5' # the template to use to access data
instrument_source_template = 'SPB_EHD_MIC/CAM/HPVX2_{module}:daqOutput' # data source path in h5file.
image_key = "data.image.pixels" # image data key in Karabo or exdf notation
# Database access parameters.
use_dir_creation_date = True # use dir creation date as data production reference date
cal_db_interface = "tcp://max-exfl-cal001:8021" # calibration DB interface to use
cal_db_timeout = 300000 # timeout on caldb requests
db_output = False # if True, the notebook sends dark constants to the calibration database
local_output = True # if True, the notebook saves dark constants locally
creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC.00 e.g. 2019-07-04 11:02:41.00
n_components = 20 # number of principal components of flat-field to use in correction
downsample_factors = [1, 1] # list of downsample factors for each image dimention (y, x)
constants_folder = "/gpfs/exfel/data/scratch/esobolev/test/shimadzu"
db_module_template = "Shimadzu_HPVX2_{}"
num_proc = 32 # number of processes running correction in parallel
corrected_source_template = 'SPB_EHD_MIC/CORR/HPVX2_{module}:output' # data source path in h5file.
```
%% Cell type:code id: tags:
``` python
import os
import h5py
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, Markdown
from extra_data import RunDirectory, by_id
%matplotlib inline
from cal_tools.step_timing import StepTimer
from cal_tools.files import sequence_trains, DataFile
from dffc.correction import DynamicFlatFieldCorrectionCython as DynamicFlatFieldCorrection
from dffc.offline import FlatFieldCorrectionFileProcessor
from dffc.draw import plot_images, plot_camera_image
from dynflatfield import (
DynamicFlatFieldCorrectionCython as DynamicFlatFieldCorrection,
FlatFieldCorrectionFileProcessor
)
from dynflatfield.draw import plot_images, plot_camera_image
```
%% Cell type:code id: tags:
``` python
index_group = image_key.partition('.')[0]
instrument, part, component = karabo_id.split('_')
aggregators = {}
sources = {}
source_to_db = {}
print("Sources:")
for da in karabo_da:
aggr, _, module = da.partition('/')
instrument_source_name = instrument_source_template.format(
instrument=instrument, part=part, component=component,
module=module
)
corrected_source_name = corrected_source_template.format(
instrument=instrument, part=part, component=component,
module=module
)
aggregators.setdefault(aggr, []).append(
(instrument_source_name, corrected_source_name))
sources[instrument_source_name] = aggr
source_to_db[instrument_source_name] = db_module_template.format(module)
print('-', instrument_source_name)
print()
print(f"Detector in use is {karabo_id}")
print(f"Instrument {instrument}")
step_timer = StepTimer()
```
%% Cell type:markdown id: tags:
# Calibration constants
%% Cell type:code id: tags:
``` python
requested_conditions = {
"frame_size": 1.0,
}
step_timer.start()
corrections = {}
constant_types = ["Offset", "DynamicFF"]
for source, db_module in source_to_db.items():
constants = {}
for constant_name in constant_types:
const_file = f"{constants_folder}/const_{constant_name}_{db_module}.h5"
if not os.path.isfile(const_file):
raise FileNotFoundError(f"{constant_name} constants are not found for {karabo_id}.")
with h5py.File(const_file, 'r') as f:
conditions = dict(
frame_size=int(f["condition/Frame Size/value"][()])
)
data = f["data"][:]
data_creation_time = f["creation_time"][()].decode()
if not all(conditions[key] == value for key, value in requested_conditions.items()):
raise ValueError("Conditions for {constant_name} are not match")
print(f"{source} {db_module} {constant_name}: {data_creation_time}")
constants[constant_name] = data
dark = constants["Offset"]
flat = constants["DynamicFF"][0]
components = constants["DynamicFF"][1:][:n_components]
dffc = DynamicFlatFieldCorrection.from_constants(
dark, flat, components, downsample_factors)
corrections[source] = dffc
step_timer.done_step("Load calibration constants")
```
%% Cell type:markdown id: tags:
# Correction
%% Cell type:code id: tags:
``` python
report = []
for aggr, sources in aggregators.items():
dc = RunDirectory(f"{in_folder}/r{run:04d}", f"RAW-R{run:04d}-{aggr}-S*.h5")
train_ids = set()
keydata_cache = {}
for instrument_source, corrected_source in sources:
keydata = dc[instrument_source][image_key].drop_empty_trains()
train_ids.update(keydata.train_ids)
keydata_cache[instrument_source] = keydata
train_ids = np.array(sorted(train_ids))
ts = dc.select_trains(by_id[train_ids]).train_timestamps().astype(np.uint64)
for seq_id, train_mask in sequence_trains(train_ids, 200):
step_timer.start()
print('* sequience', seq_id)
seq_train_ids = train_ids[train_mask]
seq_timestamps = ts[train_mask]
dc_seq = dc.select_trains(by_id[seq_train_ids])
ntrains = len(seq_train_ids)
# create output file
channels = [f"{s[1]}/{index_group}" for s in sources]
f = DataFile.from_details(out_folder, aggr, run, seq_id)
f.create_metadata(like=dc, instrument_channels=channels)
f.create_index(seq_train_ids, timestamps=seq_timestamps)
seq_report = {}
image_datasets = {}
for instrument_source, corrected_source in sources:
keydata = dc_seq[instrument_source][image_key].drop_empty_trains()
count = keydata.data_counts()
i = np.flatnonzero(count.values)
raw_images = keydata.select_trains(np.s_[i]).ndarray()
# not pulse resolved
shape = keydata.shape
count = np.in1d(seq_train_ids, keydata.train_ids).astype(int)
src = f.create_instrument_source(corrected_source)
src.create_index(index_group=count)
ds_data = src.create_key(image_key, shape=shape, dtype=np.float32)
image_datasets[corrected_source] = ds_data
step_timer.done_step("Create output file")
for instrument_source, corrected_source in sources:
step_timer.start()
dc_seq = dc.select_trains(by_id[seq_train_ids])
dffc = corrections[instrument_source]
proc = FlatFieldCorrectionFileProcessor(dffc, num_proc, instrument_source, image_key)
proc.start_workers()
proc.run(dc_seq)
proc.join_workers()
# not pulse resolved
corrected_images = np.stack(proc.rdr.results, 0)
image_datasets[corrected_source][:] = corrected_images
seq_report[instrument_source] = (raw_images[0, 0], corrected_images[:20, 0])
step_timer.done_step("Correct flat-field")
f.close()
report.append(seq_report)
```
%% Cell type:code id: tags:
``` python
step_timer.start()
for source, (raw_image, corrected_images) in report[0].items():
display(Markdown(f"# {source}"))
display(Markdown("## The first raw image"))
plot_camera_image(raw_images[0, 0])
plt.show()
display(Markdown("## The first corrected image"))
plot_camera_image(corrected_images[0])
plt.show()
display(Markdown("## The first corrected images in the trains (up to 20)"))
plot_images(corrected_images, figsize=(13, 8))
plt.show()
step_timer.done_step("Draw images")
```
%% Cell type:code id: tags:
``` python
print(f"Total processing time {step_timer.timespan():.01f} s")
step_timer.print_summary()
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment