Skip to content
Snippets Groups Projects
Commit de7ef3e2 authored by Egor Sobolev's avatar Egor Sobolev Committed by Philipp Schmidt
Browse files

Change name of dynamic flat-field correction package

parent a77f83e0
No related branches found
No related tags found
1 merge request!939[Generic][Shimadzu] Dynamic flat-field characterization and correction for MHz microscopy
%% Cell type:markdown id: tags:
# Characterization of dark and flat field for Dynamic Flat Field correction
Author: Egor Sobolev
Computation of dark offsets and flat-field principal components
%% Cell type:code id: tags:
``` python
in_folder = "/gpfs/exfel/exp/SPB/202430/p900425/raw" # input folder, required
out_folder = '/gpfs/exfel/data/scratch/esobolev/test/shimadzu' # output folder, required
metadata_folder = "" # Directory containing calibration_metadata.yml when run by xfel-calibrate
dark_run = 1 # which run to read data from, required
flat_run = 2 # which run to read
# Data files parameters.
karabo_da = ['HPVX01/1', 'HPVX01/2'] # data aggregators
karabo_id = "SPB_EHD_MIC" # karabo prefix of Shimadzu HPV-X2 devices
#receiver_id = "PNCCD_FMT-0" # inset for receiver devices
#path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5' # the template to use to access data
instrument_source_template = 'SPB_EHD_MIC/CAM/HPVX2_{module}:daqOutput' # data source path in h5file.
#instrument_source_template = 'SPB_EHD_HPVX2_{module}/CAM/CAMERA:daqOutput'
image_key = "data.image.pixels" # image data key in Karabo or exdf notation
db_module_template = "Shimadzu_HPVX2_{}"
# Database access parameters.
use_dir_creation_date = True # use dir creation date as data production reference date
cal_db_interface = "tcp://max-exfl-cal001:8021" # calibration DB interface to use
cal_db_timeout = 300000 # timeout on caldb requests
db_output = False # if True, the notebook sends dark constants to the calibration database
local_output = True # if True, the notebook saves dark constants locally
creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC.00 e.g. 2019-07-04 11:02:41.00
n_components = 50 # Number of principal components to compute
```
%% Cell type:code id: tags:
``` python
import datetime
import os
import warnings
warnings.filterwarnings('ignore')
import time
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, Markdown
from extra_data import RunDirectory
%matplotlib inline
from cal_tools.step_timing import StepTimer
from cal_tools.tools import (
get_dir_creation_date,
get_pdu_from_db,
get_random_db_interface,
get_report,
save_const_to_h5,
save_dict_to_hdf5,
send_to_db,
run_prop_seq_from_path,
)
import dffc
from dffc.draw import plot_images, plot_camera_image
import dynflatfield as dffc
from dynflatfield.draw import plot_images, plot_camera_image
```
%% Cell type:code id: tags:
``` python
creation_time=None
if use_dir_creation_date:
creation_time = get_dir_creation_date(in_folder, max(dark_run, flat_run))
print(f"Using {creation_time} as creation time of constant.")
run, prop, seq = run_prop_seq_from_path(in_folder)
file_loc = f'proposal: {prop}, runs: {dark_run} {flat_run}'
# Read report path and create file location tuple to add with the injection
file_loc = f"proposal:{prop} runs:{dark_run} {flat_run}"
report = get_report(metadata_folder)
cal_db_interface = get_random_db_interface(cal_db_interface)
print(f'Calibration database interface: {cal_db_interface}')
print()
instrument, part, component = karabo_id.split('_')
sources = {}
source_to_db = {}
print("Sources:")
for da in karabo_da:
aggr, _, module = da.partition('/')
source_name = instrument_source_template.format(
instrument=instrument, part=part, component=component,
module=module
)
sources[source_name] = aggr
source_to_db[source_name] = db_module_template.format(module)
print('-', source_name)
print()
print(f"Detector in use is {karabo_id}")
print(f"Instrument {instrument}")
step_timer = StepTimer()
constants = {}
```
%% Cell type:markdown id: tags:
# Offset map
%% Cell type:code id: tags:
``` python
for source, aggr in sources.items():
display(Markdown(f"## {source}"))
# read
step_timer.start()
dark_dc = RunDirectory(f"{in_folder}/r{dark_run:04d}",
include=f"RAW-R{dark_run:04d}-{aggr}-S*.h5")
dark_dc = dark_dc.select([(source, image_key)])
key_data = dark_dc[source][image_key]
images_dark = key_data.ndarray()
ntrain, npulse, ny, nx = images_dark.shape
print(f"N image: {ntrain * npulse} (ntrain: {ntrain}, npulse: {npulse})")
print(f"Image size: {ny} x {nx} px")
step_timer.done_step("Read dark images")
# process
step_timer.start()
dark = dffc.process_dark(images_dark)
module_constants = constants.setdefault(source, {})
module_constants["Offset"] = dark
step_timer.done_step("Process dark images")
display()
# draw plots
step_timer.start()
plot_camera_image(dark)
plt.show()
step_timer.done_step("Draw offsets")
```
%% Cell type:markdown id: tags:
# Flat-field PCA decomposition
%% Cell type:code id: tags:
``` python
for source, aggr in sources.items():
display(Markdown(f"## {source}"))
# read
step_timer.start()
flat_dc = RunDirectory(f"{in_folder}/r{flat_run:04d}",
include=f"RAW-R{flat_run:04d}-{aggr}-S*.h5")
flat_dc = flat_dc.select([(source, image_key)])
key_data = flat_dc[source][image_key]
images_flat = key_data.ndarray()
ntrain, npulse, ny, nx = images_flat.shape
print(f"N image: {ntrain * npulse} (ntrain: {ntrain}, npulse: {npulse})")
print(f"Image size: {ny} x {nx} px")
step_timer.done_step("Read flat-field images")
# process
step_timer.start()
flat, components, explained_variance_ratio = dffc.process_flat(
images_flat, dark, n_components)
module_constants = constants.setdefault(source, {})
module_constants["DynamicFF"] = np.concatenate([flat[None, ...], components])
step_timer.done_step("Process flat-field images")
# draw plots
step_timer.start()
display(Markdown("### Average flat-field"))
plot_camera_image(flat)
plt.show()
display(Markdown("### Explained variance ratio"))
fig, ax = plt.subplots(1, 1, figsize=(10,4), tight_layout=True)
ax.semilogy(explained_variance_ratio, 'o')
ax.set_xticks(np.arange(len(explained_variance_ratio)))
ax.set_xlabel("Component no.")
ax.set_ylabel("Variance fraction")
plt.show()
display(Markdown("### The first principal components (up to 20)"))
plot_images(components[:20], figsize=(13, 8))
plt.show()
step_timer.done_step("Draw flat-field")
```
%% Cell type:markdown id: tags:
## Calibration constants
%% Cell type:code id: tags:
``` python
step_timer.start()
# Output Folder Creation:
os.makedirs(out_folder, exist_ok=True)
for source, module_constants in constants.items():
for constant_name, data in module_constants.items():
db_module = source_to_db[source]
conditions = {
'Frame Size': {'value': 1.0},
}
data_to_store = {
'condition': conditions,
'db_module': db_module,
'karabo_id': karabo_id,
'constant': constant_name,
'data': data,
'creation_time': creation_time.replace(microsecond=0),
'file_loc': file_loc,
'report': report,
}
ofile = f"{out_folder}/const_{constant_name}_{db_module}.h5"
if os.path.isfile(ofile):
print(f'File {ofile} already exists and will be overwritten')
save_dict_to_hdf5(data_to_store, ofile)
step_timer.done_step("Storing calibration constants")
```
%% Cell type:code id: tags:
``` python
print(f"Total processing time {step_timer.timespan():.01f} s")
step_timer.print_summary()
```
......
%% Cell type:markdown id: tags:
# Dynamic Flat-field Offline Correction
Author: Egor Sobolev
Offline dynamic flat-field correction
%% Cell type:code id: tags:
``` python
in_folder = "/gpfs/exfel/exp/SPB/202430/p900425/raw" # input folder, required
out_folder = '/gpfs/exfel/data/scratch/esobolev/test/shimadzu' # output folder, required
metadata_folder = "" # Directory containing calibration_metadata.yml when run by xfel-calibrate
run = 3 # which run to read data from, required
# Data files parameters.
karabo_da = ['HPVX01/1', 'HPVX01/2'] # data aggregators
karabo_id = "SPB_EHD_MIC" # karabo prefix of Shimadzu HPV-X2 devices
#receiver_id = "PNCCD_FMT-0" # inset for receiver devices
#path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5' # the template to use to access data
instrument_source_template = 'SPB_EHD_MIC/CAM/HPVX2_{module}:daqOutput' # data source path in h5file.
image_key = "data.image.pixels" # image data key in Karabo or exdf notation
# Database access parameters.
use_dir_creation_date = True # use dir creation date as data production reference date
cal_db_interface = "tcp://max-exfl-cal001:8021" # calibration DB interface to use
cal_db_timeout = 300000 # timeout on caldb requests
db_output = False # if True, the notebook sends dark constants to the calibration database
local_output = True # if True, the notebook saves dark constants locally
creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC.00 e.g. 2019-07-04 11:02:41.00
n_components = 20 # number of principal components of flat-field to use in correction
downsample_factors = [1, 1] # list of downsample factors for each image dimention (y, x)
constants_folder = "/gpfs/exfel/data/scratch/esobolev/test/shimadzu"
db_module_template = "Shimadzu_HPVX2_{}"
num_proc = 32 # number of processes running correction in parallel
corrected_source_template = 'SPB_EHD_MIC/CORR/HPVX2_{module}:output' # data source path in h5file.
```
%% Cell type:code id: tags:
``` python
import os
import h5py
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, Markdown
from extra_data import RunDirectory, by_id
%matplotlib inline
from cal_tools.step_timing import StepTimer
from cal_tools.files import sequence_trains, DataFile
from dffc.correction import DynamicFlatFieldCorrectionCython as DynamicFlatFieldCorrection
from dffc.offline import FlatFieldCorrectionFileProcessor
from dffc.draw import plot_images, plot_camera_image
from dynflatfield import (
DynamicFlatFieldCorrectionCython as DynamicFlatFieldCorrection,
FlatFieldCorrectionFileProcessor
)
from dynflatfield.draw import plot_images, plot_camera_image
```
%% Cell type:code id: tags:
``` python
index_group = image_key.partition('.')[0]
instrument, part, component = karabo_id.split('_')
aggregators = {}
sources = {}
source_to_db = {}
print("Sources:")
for da in karabo_da:
aggr, _, module = da.partition('/')
instrument_source_name = instrument_source_template.format(
instrument=instrument, part=part, component=component,
module=module
)
corrected_source_name = corrected_source_template.format(
instrument=instrument, part=part, component=component,
module=module
)
aggregators.setdefault(aggr, []).append(
(instrument_source_name, corrected_source_name))
sources[instrument_source_name] = aggr
source_to_db[instrument_source_name] = db_module_template.format(module)
print('-', instrument_source_name)
print()
print(f"Detector in use is {karabo_id}")
print(f"Instrument {instrument}")
step_timer = StepTimer()
```
%% Cell type:markdown id: tags:
# Calibration constants
%% Cell type:code id: tags:
``` python
requested_conditions = {
"frame_size": 1.0,
}
step_timer.start()
corrections = {}
constant_types = ["Offset", "DynamicFF"]
for source, db_module in source_to_db.items():
constants = {}
for constant_name in constant_types:
const_file = f"{constants_folder}/const_{constant_name}_{db_module}.h5"
if not os.path.isfile(const_file):
raise FileNotFoundError(f"{constant_name} constants are not found for {karabo_id}.")
with h5py.File(const_file, 'r') as f:
conditions = dict(
frame_size=int(f["condition/Frame Size/value"][()])
)
data = f["data"][:]
data_creation_time = f["creation_time"][()].decode()
if not all(conditions[key] == value for key, value in requested_conditions.items()):
raise ValueError("Conditions for {constant_name} are not match")
print(f"{source} {db_module} {constant_name}: {data_creation_time}")
constants[constant_name] = data
dark = constants["Offset"]
flat = constants["DynamicFF"][0]
components = constants["DynamicFF"][1:][:n_components]
dffc = DynamicFlatFieldCorrection.from_constants(
dark, flat, components, downsample_factors)
corrections[source] = dffc
step_timer.done_step("Load calibration constants")
```
%% Cell type:markdown id: tags:
# Correction
%% Cell type:code id: tags:
``` python
report = []
for aggr, sources in aggregators.items():
dc = RunDirectory(f"{in_folder}/r{run:04d}", f"RAW-R{run:04d}-{aggr}-S*.h5")
train_ids = set()
keydata_cache = {}
for instrument_source, corrected_source in sources:
keydata = dc[instrument_source][image_key].drop_empty_trains()
train_ids.update(keydata.train_ids)
keydata_cache[instrument_source] = keydata
train_ids = np.array(sorted(train_ids))
ts = dc.select_trains(by_id[train_ids]).train_timestamps().astype(np.uint64)
for seq_id, train_mask in sequence_trains(train_ids, 200):
step_timer.start()
print('* sequience', seq_id)
seq_train_ids = train_ids[train_mask]
seq_timestamps = ts[train_mask]
dc_seq = dc.select_trains(by_id[seq_train_ids])
ntrains = len(seq_train_ids)
# create output file
channels = [f"{s[1]}/{index_group}" for s in sources]
f = DataFile.from_details(out_folder, aggr, run, seq_id)
f.create_metadata(like=dc, instrument_channels=channels)
f.create_index(seq_train_ids, timestamps=seq_timestamps)
seq_report = {}
image_datasets = {}
for instrument_source, corrected_source in sources:
keydata = dc_seq[instrument_source][image_key].drop_empty_trains()
count = keydata.data_counts()
i = np.flatnonzero(count.values)
raw_images = keydata.select_trains(np.s_[i]).ndarray()
# not pulse resolved
shape = keydata.shape
count = np.in1d(seq_train_ids, keydata.train_ids).astype(int)
src = f.create_instrument_source(corrected_source)
src.create_index(index_group=count)
ds_data = src.create_key(image_key, shape=shape, dtype=np.float32)
image_datasets[corrected_source] = ds_data
step_timer.done_step("Create output file")
for instrument_source, corrected_source in sources:
step_timer.start()
dc_seq = dc.select_trains(by_id[seq_train_ids])
dffc = corrections[instrument_source]
proc = FlatFieldCorrectionFileProcessor(dffc, num_proc, instrument_source, image_key)
proc.start_workers()
proc.run(dc_seq)
proc.join_workers()
# not pulse resolved
corrected_images = np.stack(proc.rdr.results, 0)
image_datasets[corrected_source][:] = corrected_images
seq_report[instrument_source] = (raw_images[0, 0], corrected_images[:20, 0])
step_timer.done_step("Correct flat-field")
f.close()
report.append(seq_report)
```
%% Cell type:code id: tags:
``` python
step_timer.start()
for source, (raw_image, corrected_images) in report[0].items():
display(Markdown(f"# {source}"))
display(Markdown("## The first raw image"))
plot_camera_image(raw_images[0, 0])
plt.show()
display(Markdown("## The first corrected image"))
plot_camera_image(corrected_images[0])
plt.show()
display(Markdown("## The first corrected images in the trains (up to 20)"))
plot_images(corrected_images, figsize=(13, 8))
plt.show()
step_timer.done_step("Draw images")
```
%% Cell type:code id: tags:
``` python
print(f"Total processing time {step_timer.timespan():.01f} s")
step_timer.print_summary()
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment