Skip to content
Snippets Groups Projects
Commit be89d563 authored by Philipp Schmidt's avatar Philipp Schmidt
Browse files

Fix unconditional access to timestamps for old data format version files

parent 456d224f
No related branches found
No related tags found
1 merge request!752[DataFile] Add support for older INDEX and METADATA versions
%% Cell type:markdown id: tags:
# Jungfrau Offline Correction #
Author: European XFEL Detector Group, Version: 2.0
Offline Calibration for the Jungfrau Detector
%% Cell type:code id: tags:
``` python
in_folder = "/gpfs/exfel/exp/SPB/202130/p900204/raw" # the folder to read data from, required
out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/remove" # the folder to output to, required
run = 91 # run to process, required
metadata_folder = "" # Directory containing calibration_metadata.yml when run by xfel-calibrate
sequences = [-1] # sequences to correct, set to [-1] for all, range allowed
sequences_per_node = 1 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
# Parameters used to access raw data.
karabo_id = "SPB_IRDA_JF4M" # karabo prefix of Jungfrau devices
karabo_da = ['JNGFR01', 'JNGFR02', 'JNGFR03', 'JNGFR04', 'JNGFR05', 'JNGFR06', 'JNGFR07', 'JNGFR08'] # data aggregators
receiver_template = "JNGFR{:02d}" # Detector receiver template for accessing raw data files. e.g. "JNGFR{:02d}"
instrument_source_template = '{}/DET/{}:daqOutput' # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
ctrl_source_template = '{}/DET/CONTROL' # template for control source name (filled with karabo_id_control)
karabo_id_control = "" # if control is on a different ID, set to empty string if it is the same a karabo-id
# Parameters for calibration database.
use_dir_creation_date = True # use the creation data of the input dir for database queries
cal_db_interface = "tcp://max-exfl016:8017#8025" # the database interface to use
cal_db_timeout = 180000 # timeout on caldb requests
# Parameters affecting corrected data.
relative_gain = True # do relative gain correction.
strixel_sensor = False # reordering for strixel detector layout.
strixel_double_norm = 2.0 # normalization to use for double-size pixels, only applied for strixel sensors.
limit_trains = 0 # ONLY FOR TESTING. process only first N trains, Use 0 to process all.
chunks_ids = 32 # HDF chunk size for memoryCell and frameNumber.
chunks_data = 1 # HDF chunk size for pixel data in number of frames.
# Parameters for retrieving calibration constants
manual_slow_data = False # if true, use manually entered bias_voltage, integration_time, gain_setting, and gain_mode values
integration_time = 4.96 # integration time in us, will be overwritten by value in file
gain_setting = 0 # 0 for dynamic gain, 1 for dynamic HG0, will be overwritten by value in file
gain_mode = 0 # 0 for runs with dynamic gain setting, 1 for fixgain. It will be overwritten by value in file, if manual_slow_data is set to True.
mem_cells = -1 # Set mem_cells to -1 to automatically use the value stored in RAW data.
bias_voltage = 180 # will be overwritten by value in file
# Parameters for plotting
skip_plots = False # exit after writing corrected files
plot_trains = 500 # Number of trains to plot for RAW and CORRECTED plots. Set to -1 to automatically plot all trains.
cell_id_preview = 15 # cell Id used for preview in single-shot plots
# Parameters for ROI selection and reduction
roi_definitions = [-1] # List with groups of 6 values defining ROIs, e.g. [3, 120, 180, 200, 550, -2] for module 3 (JNGFR03), slice 120:180, 200:550, average along axis -2 (slow scan, or -1 for fast scan)
def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
from xfel_calibrate.calibrate import balance_sequences as bs
return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
```
%% Cell type:code id: tags:
``` python
import multiprocessing
import sys
import warnings
from functools import partial
from pathlib import Path
import h5py
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pasha as psh
import tabulate
from IPython.display import Latex, Markdown, display
from extra_data import H5File, RunDirectory, by_id, components
from extra_geom import JUNGFRAUGeometry
from matplotlib.colors import LogNorm
from cal_tools import h5_copy_except
from cal_tools.jungfraulib import JungfrauCtrl
from cal_tools.enums import BadPixels
from cal_tools.files import DataFile
from cal_tools.step_timing import StepTimer
from cal_tools.tools import (
get_constant_from_db_and_time,
get_dir_creation_date,
get_pdu_from_db,
map_seq_files,
write_compressed_frames,
CalibrationMetadata,
)
from iCalibrationDB import Conditions, Constants
warnings.filterwarnings('ignore')
matplotlib.use('agg')
%matplotlib inline
```
%% Cell type:code id: tags:
``` python
in_folder = Path(in_folder)
out_folder = Path(out_folder)
run_folder = in_folder / f'r{run:04d}'
run_dc = RunDirectory(run_folder)
instrument_src = instrument_source_template.format(karabo_id, receiver_template)
out_folder.mkdir(parents=True, exist_ok=True)
print(f"Run is: {run}")
print(f"Instrument H5File source: {instrument_src}")
print(f"Process modules: {karabo_da}")
creation_time = None
if use_dir_creation_date:
creation_time = get_dir_creation_date(in_folder, run)
print(f"Using {creation_time} as creation time")
if karabo_id_control == "":
karabo_id_control = karabo_id
if any(axis_no not in {-2, -1, 2, 3} for axis_no in roi_definitions[5::6]):
print("ROI averaging must be on axis 2/3 (or equivalently -2/-1). "
f"Axis numbers given: {roi_definitions[5::6]}")
sys.exit(1)
```
%% Cell type:code id: tags:
``` python
# Read available sequence files to correct.
mapped_files, num_seq_files = map_seq_files(
run_folder, karabo_da, sequences)
if not len(mapped_files):
raise IndexError(
"No sequence files available to correct for the selected sequences and karabo_da.")
```
%% Cell type:code id: tags:
``` python
print(f"Processing a total of {num_seq_files} sequence files")
table = []
fi = 0
for kda, sfiles in mapped_files.items():
for k, f in enumerate(sfiles):
if k == 0:
table.append((fi, kda, k, f))
else:
table.append((fi, "", k, f))
fi += 1
md = display(Latex(tabulate.tabulate(
table, tablefmt='latex',
headers=["#", "module", "# module", "file"])))
```
%% Cell type:code id: tags:
``` python
ctrl_src = ctrl_source_template.format(karabo_id_control)
ctrl_data = JungfrauCtrl(run_dc, ctrl_src)
if mem_cells < 0:
memory_cells, sc_start = ctrl_data.get_memory_cells()
mem_cells_name = "single cell" if memory_cells == 1 else "burst"
print(f"Run is in {mem_cells_name} mode.\nStorage cell start: {sc_start:02d}")
else:
memory_cells = mem_cells
mem_cells_name = "single cell" if memory_cells == 1 else "burst"
print(f"Run is in manually set to {mem_cells_name} mode. With {memory_cells} memory cells")
if not manual_slow_data:
integration_time = ctrl_data.get_integration_time()
bias_voltage = ctrl_data.get_bias_voltage()
gain_setting = ctrl_data.get_gain_setting()
gain_mode = ctrl_data.get_gain_mode()
print(f"Integration time is {integration_time} us")
print(f"Gain setting is {gain_setting} (run settings: {ctrl_data.run_settings})")
print(f"Gain mode is {gain_mode} ({ctrl_data.run_mode})")
print(f"Bias voltage is {bias_voltage} V")
print(f"Number of memory cells are {memory_cells}")
```
%% Cell type:code id: tags:
``` python
strixel_transform = None
output_frame_shape = None
if strixel_sensor:
from cal_tools.jfalgs import strixel_transform, strixel_shape, strixel_double_pixels
output_frame_shape = strixel_shape()
Ydouble, Xdouble = strixel_double_pixels()
print('Strixel sensor transformation enabled')
```
%% Cell type:markdown id: tags:
### Retrieving calibration constants ###
%% Cell type:code id: tags:
``` python
condition = Conditions.Dark.jungfrau(
memory_cells=memory_cells,
bias_voltage=bias_voltage,
integration_time=integration_time,
gain_setting=gain_setting,
gain_mode=gain_mode,
)
empty_constants = {
"Offset": np.zeros((512, 1024, memory_cells, 3), dtype=np.float32),
"BadPixelsDark": np.zeros((512, 1024, memory_cells, 3), dtype=np.uint32),
"RelativeGain": None,
"BadPixelsFF": None,
}
metadata = CalibrationMetadata(metadata_folder or out_folder)
# NOTE: this notebook will not overwrite calibration metadata file
const_yaml = metadata.get("retrieved-constants", {})
def get_constants_for_module(karabo_da: str):
""" Get calibration constants for given module of Jungfrau
:return:
offset_map (offset map),
mask (mask of bad pixels),
gain_map (map of relative gain factors),
db_module (name of DB module),
when (dictionary: constant - creation time)
"""
when = dict()
const_data = dict()
if const_yaml:
for cname, mdata in const_yaml[karabo_da]["constants"].items():
const_data[cname] = dict()
when[cname] = mdata["creation-time"]
if when[cname]:
with h5py.File(mdata["file-path"], "r") as cf:
const_data[cname] = np.copy(
cf[f"{mdata['dataset-name']}/data"])
else:
const_data[cname] = empty_constants[cname]
else:
retrieval_function = partial(
get_constant_from_db_and_time,
karabo_id=karabo_id,
karabo_da=karabo_da,
cal_db_interface=cal_db_interface,
creation_time=creation_time,
timeout=cal_db_timeout,
print_once=False,
)
for cname, cempty in empty_constants.items():
const_data[cname], when[cname] = retrieval_function(
condition=condition,
constant=getattr(Constants.jungfrau, cname)(),
empty_constant=cempty,
)
offset_map = const_data["Offset"]
mask = const_data["BadPixelsDark"]
gain_map = const_data["RelativeGain"]
mask_ff = const_data["BadPixelsFF"]
# Combine masks
if mask_ff is not None:
mask |= np.moveaxis(mask_ff, 0, 1)
if memory_cells > 1:
# move from x, y, cell, gain to cell, x, y, gain
offset_map = np.moveaxis(offset_map, [0, 1], [1, 2])
mask = np.moveaxis(mask, [0, 1], [1, 2])
else:
offset_map = np.squeeze(offset_map)
mask = np.squeeze(mask)
# masking double size pixels
mask[..., [255, 256], :, :] |= BadPixels.NON_STANDARD_SIZE
mask[..., [255, 256, 511, 512, 767, 768], :] |= BadPixels.NON_STANDARD_SIZE
if gain_map is not None:
if memory_cells > 1:
gain_map = np.moveaxis(gain_map, [0, 2], [2, 0])
# add extra empty cell constant
b = np.ones(((1,)+gain_map.shape[1:]))
gain_map = np.concatenate((gain_map, b), axis=0)
else:
gain_map = np.moveaxis(np.squeeze(gain_map), 1, 0)
return offset_map, mask, gain_map, karabo_da, when
with multiprocessing.Pool() as pool:
r = pool.map(get_constants_for_module, karabo_da)
# Print timestamps for the retrieved constants.
constants = {}
for offset_map, mask, gain_map, k_da, when in r:
print(f'Constants for module {k_da}:')
for const in when:
print(f' {const} injected at {when[const]}')
if gain_map is None:
print("No gain map found")
relative_gain = False
constants[k_da] = (offset_map, mask, gain_map)
```
%% Cell type:code id: tags:
``` python
# Correct a chunk of images for offset and gain
def correct_train(wid, index, d):
d = d.astype(np.float32) # [cells, x, y]
g = gain[index]
# Copy gain over first to keep it at the original 3 for low gain.
if strixel_transform is not None:
strixel_transform(g, out=gain_corr[index, ...])
else:
gain_corr[index, ...] = g
# Jungfrau gains 0[00], 1[01], 3[11]
# Change low gain to 2 for indexing purposes.
g[g==3] = 2
# Select memory cells
if memory_cells > 1:
"""
Even though it is correct to assume that memory cells pattern
can be the same across all trains (for one correction run
taken with one acquisition), it is preferred to not assume
this to account for exceptions that can happen.
"""
m = memcells[index].copy()
# 255 is a cell value pointing to no cell image data (image of 0 pixels).
# Corresponding image will be corrected with constant of cell 0. To avoid values of 0.
# This line is depending on not storing the modified memory cells in the corrected data.
m[m==255] = 0
offset_map_cell = offset_map[m, ...] # [16 + empty cell, x, y]
mask_cell = mask[m, ...]
else:
offset_map_cell = offset_map
mask_cell = mask
# Offset correction
offset = np.choose(g, np.moveaxis(offset_map_cell, -1, 0))
d -= offset
# Gain correction
if relative_gain:
if memory_cells > 1:
gain_map_cell = gain_map[m, ...]
else:
gain_map_cell = gain_map
cal = np.choose(g, np.moveaxis(gain_map_cell, -1, 0))
d /= cal
msk = np.choose(g, np.moveaxis(mask_cell, -1, 0))
if strixel_transform is not None:
strixel_transform(d, out=data_corr[index, ...])
data_corr[index, :, Ydouble, Xdouble] /= strixel_double_norm
strixel_transform(msk, out=mask_corr[index, ...])
else:
data_corr[index, ...] = d
mask_corr[index, ...] = msk
```
%% Cell type:code id: tags:
``` python
step_timer = StepTimer()
n_cpus = multiprocessing.cpu_count()
context = psh.context.ProcessContext(num_workers=n_cpus)
print(f"Using {n_cpus} workers for correction.")
```
%% Cell type:code id: tags:
``` python
def save_reduced_rois(ofile, data_corr, mask_corr, karabo_da):
"""If ROIs are defined for this karabo_da, reduce them and save to the output file"""
rois_defined = 0
module_no = int(karabo_da[-2:])
params_source = f'{karabo_id}/ROIPROC/{karabo_da}'
rois_source = f'{params_source}:output'
for i in range(len(roi_definitions) // 6):
roi_module, a1, a2, b1, b2, mean_axis = roi_definitions[i*6 : (i+1)*6]
if roi_module == module_no:
rois_defined += 1
# Apply the mask and average remaining pixels to 1D
roi_data = data_corr[..., a1:a2, b1:b2].mean(
axis=mean_axis, where=(mask_corr[..., a1:a2, b1:b2] == 0)
)
# Create Instrument section to later add corrected datasets.
outp_source = ofile.create_instrument_source(rois_source)
outp_source.create_key(f'data.roi{rois_defined}.data', data=roi_data)
ctrl_source = ofile.create_control_source(params_source)
ctrl_source.create_run_key('region', np.array([[a1, a2, b1, b2]]))
ctrl_source.create_run_key('reduce_axis', np.array([mean_axis]))
if rois_defined:
# Copy the index for the new source
# Create count/first datasets at INDEX source.
ofile.copy(f'INDEX/{karabo_id}/DET/{karabo_da}:daqOutput/data',
f'INDEX/{rois_source}')
ntrains = ofile['INDEX/trainId'].shape[0]
ctrl_source.create_index(ntrains)
```
%% Cell type:markdown id: tags:
### Correcting RAW data ###
%% Cell type:code id: tags:
``` python
# Loop over modules
for local_karabo_da, mapped_files_module in mapped_files.items():
instrument_src_kda = instrument_src.format(int(local_karabo_da[-2:]))
for sequence_file in mapped_files_module:
# Save corrected data in an output file with name
# of corresponding raw sequence file.
ofile_name = sequence_file.name.replace("RAW", "CORR")
out_file = out_folder / ofile_name
# Load sequence file data collection, data.adc keydata,
# the shape for data to later created arrays of the same shape,
# and number of available trains to correct.
seq_dc = H5File(sequence_file)
seq_dc_adc = seq_dc[instrument_src_kda, "data.adc"]
ishape = seq_dc_adc.shape # input shape.
corr_ntrains = ishape[0] # number of available trains to correct.
all_train_ids = seq_dc_adc.train_ids
# Raise a WARNING if this sequence has no trains to correct.
# Otherwise, print number of trains with no data.
if corr_ntrains == 0:
print(f"\t- WARNING: No trains to correct for {sequence_file.name}: "
f"Skipping the processing of this file.")
continue
elif len(all_train_ids) != corr_ntrains:
print(f"{sequence_file.name} has {len(seq_dc_adc.train_ids) - corr_ntrains} "
"trains with missing data.")
# For testing, limit corrected trains. i.e. Getting output faster.
if limit_trains > 0:
print(f"\nCorrected trains are limited to: {limit_trains} trains")
corr_ntrains = min(corr_ntrains, limit_trains)
print(f"\nNumber of corrected trains are: {corr_ntrains} for {ofile_name}")
# Load constants from the constants dictionary.
# These arrays are used by `correct_train()` function
offset_map, mask, gain_map = constants[local_karabo_da]
# Determine total output shape.
if output_frame_shape is not None:
oshape = (*ishape[:-2], *output_frame_shape)
else:
oshape = ishape
# Allocate shared arrays for corrected data. Used in `correct_train()`
data_corr = context.alloc(shape=oshape, dtype=np.float32)
gain_corr = context.alloc(shape=oshape, dtype=np.uint8)
mask_corr = context.alloc(shape=oshape, dtype=np.uint32)
step_timer.start()
# Overwrite seq_dc after eliminating empty trains or/and applying limited images.
seq_dc = seq_dc.select(
instrument_src_kda, "*", require_all=True).select_trains(np.s_[:corr_ntrains])
# Load raw images(adc), gain, memcells, and frame numbers.
data = seq_dc[instrument_src_kda, "data.adc"].ndarray()
gain = seq_dc[instrument_src_kda, "data.gain"].ndarray()
memcells = seq_dc[instrument_src_kda, "data.memoryCell"].ndarray()
frame_number = seq_dc[instrument_src_kda, "data.frameNumber"].ndarray()
# Validate that the selected cell id to preview is available in raw data.
if memory_cells > 1:
# For plotting, assuming that memory cells are sorted the same for all trains.
found_cells = memcells[0] == cell_id_preview
if any(found_cells):
cell_idx_preview = np.where(found_cells)[0][0]
else:
print(f"The selected cell_id_preview {cell_id_preview} is not available in burst mode. "
f"Previewing cell `{memcells[0]}`.")
cell_idx_preview = 0
else:
cell_idx_preview = 0
# Correct data per train
context.map(correct_train, data)
step_timer.done_step(f"Correction time.")
step_timer.start()
# Create CORR files and add corrected data sections.
sel_trains = np.isin(all_train_ids, seq_dc.train_ids)
image_counts = seq_dc[instrument_src_kda, "data.adc"].data_counts(labelled=False)
with DataFile(out_file, 'w') as outp_file:
# Create INDEX datasets.
outp_file.create_index(
train_ids=seq_dc.train_ids,
timestamps=seq_dc.files[0].file["INDEX/timestamp"][sel_trains],
flags=seq_dc.files[0].validity_flag[sel_trains])
outp_file.create_index(seq_dc.train_ids, from_file=seq_dc.files[0])
# Create Instrument section to later add corrected datasets.
outp_source = outp_file.create_instrument_source(instrument_src_kda)
# Create count/first datasets at INDEX source.
outp_source.create_index(data=image_counts)
# RAW memoryCell and frameNumber are not corrected. But we are storing only
# the values for the corrected trains.
outp_source.create_key(
"data.memoryCell", data=memcells,
chunks=(min(chunks_ids, memcells.shape[0]), 1))
outp_source.create_key(
"data.frameNumber", data=frame_number,
chunks=(min(chunks_ids, frame_number.shape[0]), 1))
# Add main corrected `data.adc`` dataset and store corrected data.
outp_source.create_key(
"data.adc", data=data_corr,
chunks=(min(chunks_data, data_corr.shape[0]), *oshape[1:]))
write_compressed_frames(
gain_corr, outp_file, f"{outp_source.name}/data/gain", comp_threads=8)
write_compressed_frames(
mask_corr, outp_file, f"{outp_source.name}/data/mask", comp_threads=8)
save_reduced_rois(outp_file, data_corr, mask_corr, local_karabo_da)
# Create METDATA datasets
outp_file.create_metadata(like=seq_dc)
step_timer.done_step(f'Saving data time.')
```
%% Cell type:markdown id: tags:
### Processing time summary ###
%% Cell type:code id: tags:
``` python
print(f"Total processing time {step_timer.timespan():.01f} s")
step_timer.print_summary()
```
%% Cell type:code id: tags:
``` python
if skip_plots:
print('Skipping plots')
import sys
sys.exit(0)
```
%% Cell type:code id: tags:
``` python
# Positions are given in pixels
mod_width = (256 * 4) + (2 * 3) # inc. 2px gaps between tiles
mod_height = (256 * 2) + 2
if karabo_id == "SPB_IRDA_JF4M":
# The first 4 modules are rotated 180 degrees relative to the others.
# We pass the bottom, beam-right corner of the module regardless of its
# orientation, requiring a subtraction from the symmetric positions we'd
# otherwise calculate.
x_start, y_start = 1125, 1078
module_pos = [
(x_start - mod_width, y_start - mod_height - (i * (mod_height + 33)))
for i in range(4)
] + [
(-x_start, -y_start + (i * (mod_height + 33))) for i in range(4)
]
orientations = [(-1, -1) for _ in range(4)] + [(1, 1) for _ in range(4)]
elif karabo_id == "FXE_XAD_JF1M":
module_pos = ((-mod_width//2, 33),(-mod_width//2, -mod_height -33))
orientations = [(-1,-1), (1,1)]
else:
module_pos = ((-mod_width//2,-mod_height//2),)
orientations = None
geom = JUNGFRAUGeometry.from_module_positions(module_pos, orientations=orientations, asic_gap=0)
```
%% Cell type:code id: tags:
``` python
first_seq = 0 if sequences == [-1] else sequences[0]
with RunDirectory(out_folder, f"*{run}*S{first_seq:05d}*") as corr_dc:
# Reading CORR data for plotting.
jf_corr = components.JUNGFRAU(
corr_dc,
detector_name=karabo_id,
).select_trains(np.s_[:plot_trains])
tid, jf_corr_data = next(iter(jf_corr.trains(require_all=True)))
# Shape = [modules, trains, cells, x, y]
# TODO: Fix the case if not all modules were requested to be corrected.
# For example if only one modules was corrected. An assertion error is expected
# at `geom.plot_data_fast`, while plotting corrected images.
corrected = jf_corr.get_array("data.adc")[:, :, cell_idx_preview, ...].values
corrected_train = jf_corr_data["data.adc"][
:, cell_idx_preview, ...
].values # loose the train axis.
mask = jf_corr.get_array("data.mask")[:, :, cell_idx_preview, ...].values
mask_train = jf_corr_data["data.mask"][:, cell_idx_preview, ...].values
with RunDirectory(f"{in_folder}/r{run:04d}/", f"*S{first_seq:05d}*") as raw_dc:
# Reading RAW data for plotting.
jf_raw = components.JUNGFRAU(raw_dc, detector_name=karabo_id).select_trains(
np.s_[:plot_trains]
)
raw = jf_raw.get_array("data.adc")[:, :, cell_idx_preview, ...].values
raw_train = (
jf_raw.select_trains(by_id[[tid]])
.get_array("data.adc")[:, 0, cell_idx_preview, ...]
.values
)
gain = jf_raw.get_array("data.gain")[:, :, cell_idx_preview, ...].values
gain_train_cells = (
jf_raw.select_trains(by_id[[tid]]).get_array("data.gain")[:, :, :, ...].values
)
```
%% Cell type:code id: tags:
``` python
db_modules = get_pdu_from_db(
karabo_id=karabo_id,
karabo_da=karabo_da,
constant=Constants.jungfrau.Offset(),
condition=condition,
cal_db_interface=cal_db_interface,
snapshot_at=creation_time,
)
```
%% Cell type:markdown id: tags:
### Mean RAW Preview
%% Cell type:code id: tags:
``` python
print(f"The per pixel mean of the first {raw.shape[1]} trains of the first sequence file")
fig, ax = plt.subplots(figsize=(18, 10))
raw_mean = np.mean(raw, axis=1)
geom.plot_data_fast(
raw_mean,
ax=ax,
vmin=min(0.75*np.median(raw_mean[raw_mean > 0]), 2000),
vmax=max(1.5*np.median(raw_mean[raw_mean > 0]), 16000),
cmap="jet",
colorbar={'shrink': 1, 'pad': 0.01},
)
ax.set_title(f'{karabo_id} - Mean RAW', size=18)
plt.show()
```
%% Cell type:markdown id: tags:
### Mean CORRECTED Preview
%% Cell type:code id: tags:
``` python
print(f"The per pixel mean of the first {corrected.shape[1]} trains of the first sequence file")
fig, ax = plt.subplots(figsize=(18, 10))
corrected_mean = np.mean(corrected, axis=1)
_corrected_vmin = min(0.75*np.median(corrected_mean[corrected_mean > 0]), -0.5)
_corrected_vmax = max(2.*np.median(corrected_mean[corrected_mean > 0]), 100)
mean_plot_kwargs = dict(
vmin=_corrected_vmin, vmax=_corrected_vmax, cmap="jet"
)
if not strixel_sensor:
geom.plot_data_fast(
corrected_mean,
ax=ax,
colorbar={'shrink': 1, 'pad': 0.01},
**mean_plot_kwargs
)
else:
ax.imshow(corrected_mean.squeeze(), aspect=10, **mean_plot_kwargs)
ax.set_title(f'{karabo_id} - Mean CORRECTED', size=18)
plt.show()
```
%% Cell type:code id: tags:
``` python
fig, ax = plt.subplots(figsize=(18, 10))
corrected_masked = corrected.copy()
corrected_masked[mask != 0] = np.nan
corrected_masked_mean = np.nanmean(corrected_masked, axis=1)
del corrected_masked
if not strixel_sensor:
geom.plot_data_fast(
corrected_masked_mean,
ax=ax,
colorbar={'shrink': 1, 'pad': 0.01},
**mean_plot_kwargs
)
else:
ax.imshow(corrected_mean.squeeze(), aspect=10, **mean_plot_kwargs)
ax.set_title(f'{karabo_id} - Mean CORRECTED with mask', size=18)
plt.show()
```
%% Cell type:code id: tags:
``` python
display(Markdown((f"#### A single image from train {tid}")))
fig, ax = plt.subplots(figsize=(18, 10))
single_plot_kwargs = dict(
vmin=min(0.75 * np.median(corrected_train[corrected_train > 0]), -0.5),
vmax=max(2.0 * np.median(corrected_train[corrected_train > 0]), 100),
cmap="jet"
)
if not strixel_sensor:
geom.plot_data_fast(
corrected_train,
ax=ax,
colorbar={"shrink": 1, "pad": 0.01},
**single_plot_kwargs
)
else:
ax.imshow(corrected_train.squeeze(), aspect=10, **single_plot_kwargs)
ax.set_title(f"{karabo_id} - CORRECTED train: {tid}", size=18)
plt.show()
```
%% Cell type:code id: tags:
``` python
def do_2d_plot(data, edges, y_axis, x_axis, title):
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111)
extent = [
np.min(edges[1]),
np.max(edges[1]),
np.min(edges[0]),
np.max(edges[0]),
]
im = ax.imshow(
data[::-1, :],
extent=extent,
aspect="auto",
norm=LogNorm(vmin=1, vmax=np.max(data))
)
ax.set_xlabel(x_axis)
ax.set_ylabel(y_axis)
ax.set_title(title)
cb = fig.colorbar(im)
cb.set_label("Counts")
```
%% Cell type:markdown id: tags:
### Gain Bit Value
%% Cell type:code id: tags:
``` python
for i, (pdu, mod) in enumerate(zip(db_modules, karabo_da)):
h, ex, ey = np.histogram2d(
raw[i].flatten(),
gain[i].flatten(),
bins=[100, 4],
range=[[0, 10000], [0, 4]],
)
do_2d_plot(
h,
(ex, ey),
"Signal (ADU)",
"Gain Bit Value (high gain=0[00], medium gain=1[01], low gain=3[11])",
f"Module {mod} ({pdu})",
)
```
%% Cell type:markdown id: tags:
## Signal Distribution ##
%% Cell type:code id: tags:
``` python
for i, (pdu, mod) in enumerate(zip(db_modules, karabo_da)):
fig, axs = plt.subplots(nrows=2, ncols=1, figsize=(18, 10))
corrected_flatten = corrected[i].flatten()
for ax, hist_range in zip(axs, [(-100, 1000), (-1000, 10000)]):
h = ax.hist(
corrected_flatten,
bins=1000,
range=hist_range,
log=True,
)
l = ax.set_xlabel("Signal (keV)")
l = ax.set_ylabel("Counts")
_ = ax.set_title(f'Module {mod} ({pdu})')
```
%% Cell type:markdown id: tags:
### Maximum GAIN Preview
%% Cell type:code id: tags:
``` python
display(Markdown((f"#### The per pixel maximum of train {tid} of the GAIN data")))
fig, ax = plt.subplots(figsize=(18, 10))
gain_max = np.max(gain_train_cells, axis=(1, 2))
geom.plot_data_fast(
gain_max,
ax=ax,
cmap="jet",
colorbar={'shrink': 1, 'pad': 0.01},
)
plt.show()
```
%% Cell type:markdown id: tags:
## Bad Pixels ##
The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:
%% Cell type:code id: tags:
``` python
table = []
for item in BadPixels:
table.append(
(item.name, f"{item.value:016b}"))
md = display(Latex(tabulate.tabulate(
table, tablefmt='latex',
headers=["Bad pixel type", "Bit mask"])))
```
%% Cell type:markdown id: tags:
### Single Image Bad Pixels ###
A single image bad pixel map for the first image of the first train
%% Cell type:code id: tags:
``` python
display(Markdown(f"#### Bad pixels image for train {tid}"))
fig, ax = plt.subplots(figsize=(18, 10))
if not strixel_sensor:
geom.plot_data_fast(
np.log2(mask_train),
ax=ax,
vmin=0, vmax=32, cmap="jet",
colorbar={'shrink': 1, 'pad': 0.01},
)
else:
ax.imshow(np.log2(mask_train).squeeze(), vmin=0, vmax=32, cmap='jet', aspect=10)
plt.show()
```
......
%% Cell type:markdown id: tags:
# LPD Offline Correction #
Author: European XFEL Data Analysis Group
%% Cell type:code id: tags:
``` python
# Input parameters
in_folder = "/gpfs/exfel/exp/FXE/202201/p003073/raw/" # the folder to read data from, required
out_folder = "/gpfs/exfel/data/scratch/schmidtp/random/LPD_test" # the folder to output to, required
metadata_folder = '' # Directory containing calibration_metadata.yml when run by xfel-calibrate.
sequences = [-1] # Sequences to correct, use [-1] for all
modules = [-1] # Modules indices to correct, use [-1] for all, only used when karabo_da is empty
karabo_da = [''] # Data aggregators names to correct, use [''] for all
run = 10 # run to process, required
# Source parameters
karabo_id = 'FXE_DET_LPD1M-1' # Karabo domain for detector.
input_source = '{karabo_id}/DET/{module_index}CH0:xtdf' # Input fast data source.
output_source = '' # Output fast data source, empty to use same as input.
# CalCat parameters
creation_time = "" # The timestamp to use with Calibration DB. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41
cal_db_interface = '' # Not needed, compatibility with current webservice.
cal_db_timeout = 0 # Not needed, compatbility with current webservice.
cal_db_root = '/gpfs/exfel/d/cal/caldb_store'
# Operating conditions
mem_cells = 512 # Memory cells, LPD constants are always taken with 512 cells.
bias_voltage = 250.0 # Detector bias voltage.
capacitor = '5pF' # Capacitor setting: 5pF or 50pF
photon_energy = 9.2 # Photon energy in keV.
category = 0 # Whom to blame.
# Correction parameters
offset_corr = True # Offset correction.
rel_gain = True # Gain correction based on RelativeGain constant.
ff_map = True # Gain correction based on FFMap constant.
gain_amp_map = True # Gain correction based on GainAmpMap constant.
# Output options
overwrite = True # set to True if existing data should be overwritten
chunks_data = 1 # HDF chunk size for pixel data in number of frames.
chunks_ids = 32 # HDF chunk size for cellId and pulseId datasets.
create_virtual_cxi_in = '' # Folder to create virtual CXI files in (for each sequence).
# Parallelization options
sequences_per_node = 1 # Sequence files to process per node
max_nodes = 8 # Maximum number of SLURM jobs to split correction work into
num_workers = 8 # Worker processes per node, 8 is safe on 768G nodes but won't work on 512G.
num_threads_per_worker = 32 # Number of threads per worker.
def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
from xfel_calibrate.calibrate import balance_sequences as bs
return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
```
%% Cell type:code id: tags:
``` python
from collections import OrderedDict
from pathlib import Path
from time import perf_counter
import gc
import re
import warnings
import numpy as np
import h5py
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
%matplotlib inline
from calibration_client import CalibrationClient
from calibration_client.modules import CalibrationConstantVersion
import extra_data as xd
import extra_geom as xg
import pasha as psh
from extra_data.components import LPD1M
from cal_tools.lpdalgs import correct_lpd_frames
from cal_tools.tools import (
CalibrationMetadata,
calcat_creation_time,
write_compressed_frames,
)
from cal_tools.files import DataFile
from cal_tools.restful_config import restful_config
```
%% Cell type:markdown id: tags:
# Prepare environment
%% Cell type:code id: tags:
``` python
file_re = re.compile(r'^RAW-R(\d{4})-(\w+\d+)-S(\d{5})$') # This should probably move to cal_tools
run_folder = Path(in_folder) / f'r{run:04d}'
out_folder = Path(out_folder)
out_folder.mkdir(exist_ok=True)
output_source = output_source or input_source
cal_db_root = Path(cal_db_root)
metadata = CalibrationMetadata(metadata_folder or out_folder)
creation_time = calcat_creation_time(in_folder, run, creation_time)
print(f'Using {creation_time.isoformat()} as creation time')
# Pick all modules/aggregators or those selected.
if not karabo_da or karabo_da == ['']:
if not modules or modules == [-1]:
modules = list(range(16))
karabo_da = [f'LPD{i:02d}' for i in modules]
# Pick all sequences or those selected.
if not sequences or sequences == [-1]:
do_sequence = lambda seq: True
else:
do_sequence = [int(x) for x in sequences].__contains__
# List of detector sources.
det_inp_sources = [input_source.format(karabo_id=karabo_id, module_index=int(da[-2:])) for da in karabo_da]
```
%% Cell type:markdown id: tags:
# Select data to process
%% Cell type:code id: tags:
``` python
data_to_process = []
for inp_path in run_folder.glob('RAW-*.h5'):
match = file_re.match(inp_path.stem)
if match[2] not in karabo_da or not do_sequence(int(match[3])):
continue
outp_path = out_folder / 'CORR-R{run:04d}-{aggregator}-S{seq:05d}.h5'.format(
run=int(match[1]), aggregator=match[2], seq=int(match[3]))
data_to_process.append((match[2], inp_path, outp_path))
print('Files to process:')
for data_descr in sorted(data_to_process, key=lambda x: f'{x[0]}{x[1]}'):
print(f'{data_descr[0]}\t{data_descr[1]}')
```
%% Cell type:markdown id: tags:
# Obtain and prepare calibration constants
%% Cell type:code id: tags:
``` python
# Connect to CalCat.
calcat_config = restful_config['calcat']
client = CalibrationClient(
base_api_url=calcat_config['base-api-url'],
use_oauth2=calcat_config['use-oauth2'],
client_id=calcat_config['user-id'],
client_secret=calcat_config['user-secret'],
user_email=calcat_config['user-email'],
token_url=calcat_config['token-url'],
refresh_url=calcat_config['refresh-url'],
auth_url=calcat_config['auth-url'],
scope='')
```
%% Cell type:code id: tags:
``` python
metadata = CalibrationMetadata(metadata_folder or out_folder)
# Constant paths & timestamps are saved under retrieved-constants in calibration_metadata.yml
const_yaml = metadata.setdefault("retrieved-constants", {})
```
%% Cell type:code id: tags:
``` python
const_data = {}
const_load_mp = psh.ProcessContext(num_workers=24)
if const_yaml: # Read constants from YAML file.
start = perf_counter()
for da, ccvs in const_yaml.items():
for calibration_name, ccv in ccvs['constants'].items():
dtype = np.uint32 if calibration_name.startswith('BadPixels') else np.float32
const_data[(da, calibration_name)] = dict(
path=Path(ccv['file-path']),
dataset=ccv['dataset-name'],
data=const_load_mp.alloc(shape=(256, 256, mem_cells, 3), dtype=dtype)
)
else: # Retrieve constants from CALCAT.
dark_calibrations = {
1: 'Offset', # np.float32
14: 'BadPixelsDark' # should be np.uint32, but is np.float64
}
dark_condition = [
dict(parameter_id=1, value=bias_voltage), # Sensor bias voltage
dict(parameter_id=7, value=mem_cells), # Memory cells
dict(parameter_id=15, value=capacitor), # Feedback capacitor
dict(parameter_id=13, value=256), # Pixels X
dict(parameter_id=14, value=256), # Pixels Y
]
illuminated_calibrations = {
20: 'BadPixelsFF', # np.uint32
42: 'GainAmpMap', # np.float32
43: 'FFMap', # np.float32
44: 'RelativeGain' # np.float32
}
illuminated_condition = dark_condition.copy()
illuminated_condition += [
dict(parameter_id=3, value=photon_energy), # Source energy
dict(parameter_id=25, value=category) # category
]
print('Querying calibration database', end='', flush=True)
start = perf_counter()
for calibrations, condition in [
(dark_calibrations, dark_condition),
(illuminated_calibrations, illuminated_condition)
]:
resp = CalibrationConstantVersion.get_closest_by_time_by_detector_conditions(
client, karabo_id, list(calibrations.keys()),
{'parameters_conditions_attributes': condition},
karabo_da='', event_at=creation_time.isoformat(), snapshot_at=None)
if not resp['success']:
raise RuntimeError(resp)
for ccv in resp['data']:
cc = ccv['calibration_constant']
da = ccv['physical_detector_unit']['karabo_da']
calibration_name = calibrations[cc['calibration_id']]
dtype = np.uint32 if calibration_name.startswith('BadPixels') else np.float32
const_data[(da, calibration_name)] = dict(
path=Path(ccv['path_to_file']) / ccv['file_name'],
dataset=ccv['data_set_name'],
data=const_load_mp.alloc(shape=(256, 256, mem_cells, 3), dtype=dtype)
)
print('.', end='', flush=True)
total_time = perf_counter() - start
print(f'{total_time:.1f}s')
```
%% Cell type:code id: tags:
``` python
def load_constant_dataset(wid, index, const_descr):
ccv_entry = const_data[const_descr]
with h5py.File(cal_db_root / ccv_entry['path'], 'r') as fp:
fp[ccv_entry['dataset'] + '/data'].read_direct(ccv_entry['data'])
print('.', end='', flush=True)
print('Loading calibration data', end='', flush=True)
start = perf_counter()
const_load_mp.map(load_constant_dataset, list(const_data.keys()))
total_time = perf_counter() - start
print(f'{total_time:.1f}s')
```
%% Cell type:code id: tags:
``` python
# These are intended in order cell, X, Y, gain
ccv_offsets = {}
ccv_gains = {}
ccv_masks = {}
ccv_shape = (mem_cells, 256, 256, 3)
constant_order = {
'Offset': (2, 1, 0, 3),
'BadPixelsDark': (2, 1, 0, 3),
'RelativeGain': (2, 1, 0, 3),
'FFMap': (2, 0, 1, 3),
'BadPixelsFF': (2, 0, 1, 3),
'GainAmpMap': (2, 0, 1, 3),
}
def prepare_constants(wid, index, aggregator):
consts = {calibration_name: entry['data']
for (aggregator_, calibration_name), entry
in const_data.items()
if aggregator == aggregator_}
def _prepare_data(calibration_name, dtype):
return consts[calibration_name] \
.transpose(constant_order[calibration_name]) \
.astype(dtype, copy=True) # Make sure array is contiguous.
if offset_corr and 'Offset' in consts:
ccv_offsets[aggregator] = _prepare_data('Offset', np.float32)
else:
ccv_offsets[aggregator] = np.zeros(ccv_shape, dtype=np.float32)
ccv_gains[aggregator] = np.ones(ccv_shape, dtype=np.float32)
if 'BadPixelsDark' in consts:
ccv_masks[aggregator] = _prepare_data('BadPixelsDark', np.uint32)
else:
ccv_masks[aggregator] = np.zeros(ccv_shape, dtype=np.uint32)
if rel_gain and 'RelativeGain' in consts:
ccv_gains[aggregator] *= _prepare_data('RelativeGain', np.float32)
if ff_map and 'FFMap' in consts:
ccv_gains[aggregator] *= _prepare_data('FFMap', np.float32)
if 'BadPixelsFF' in consts:
np.bitwise_or(ccv_masks[aggregator], _prepare_data('BadPixelsFF', np.uint32),
out=ccv_masks[aggregator])
if gain_amp_map and 'GainAmpMap' in consts:
ccv_gains[aggregator] *= _prepare_data('GainAmpMap', np.float32)
print('.', end='', flush=True)
print('Preparing constants', end='', flush=True)
start = perf_counter()
psh.ThreadContext(num_workers=len(karabo_da)).map(prepare_constants, karabo_da)
total_time = perf_counter() - start
print(f'{total_time:.1f}s')
const_data.clear() # Clear raw constants data now to save memory.
gc.collect();
```
%% Cell type:code id: tags:
``` python
def correct_file(wid, index, work):
aggregator, inp_path, outp_path = work
module_index = int(aggregator[-2:])
start = perf_counter()
dc = xd.H5File(inp_path, inc_suspect_trains=False).select('*', 'image.*', require_all=True)
inp_source = dc[input_source.format(karabo_id=karabo_id, module_index=module_index)]
open_time = perf_counter() - start
# Load raw data for this file.
# Reshaping gets rid of the extra 1-len dimensions without
# mangling the frame axis for an actual frame count of 1.
start = perf_counter()
in_raw = inp_source['image.data'].ndarray().reshape(-1, 256, 256)
in_cell = inp_source['image.cellId'].ndarray().reshape(-1)
in_pulse = inp_source['image.pulseId'].ndarray().reshape(-1)
read_time = perf_counter() - start
# Allocate output arrays.
out_data = np.zeros((in_raw.shape[0], 256, 256), dtype=np.float32)
out_gain = np.zeros((in_raw.shape[0], 256, 256), dtype=np.uint8)
out_mask = np.zeros((in_raw.shape[0], 256, 256), dtype=np.uint32)
start = perf_counter()
correct_lpd_frames(in_raw, in_cell,
out_data, out_gain, out_mask,
ccv_offsets[aggregator], ccv_gains[aggregator], ccv_masks[aggregator],
num_threads=num_threads_per_worker)
correct_time = perf_counter() - start
image_counts = inp_source['image.data'].data_counts(labelled=False)
start = perf_counter()
if (not outp_path.exists() or overwrite) and image_counts.sum() > 0:
fa = dc.files[0]
sel_trains = np.isin(fa.train_ids, dc.train_ids)
outp_source_name = output_source.format(karabo_id=karabo_id, module_index=module_index)
with DataFile(outp_path, 'w') as outp_file:
outp_file.create_index(
train_ids=dc.train_ids,
timestamps=fa.file['INDEX/timestamp'][sel_trains],
flags=fa.validity_flag[sel_trains])
outp_file.create_index(dc.train_ids, from_file=dc.files[0])
outp_file.create_metadata(like=dc, instrument_channels=(f'{outp_source_name}/image',))
outp_source = outp_file.create_instrument_source(outp_source_name)
outp_source.create_index(image=image_counts)
outp_source.create_key('image.cellId', data=in_cell,
chunks=(min(chunks_ids, in_cell.shape[0]),))
outp_source.create_key('image.pulseId', data=in_pulse,
chunks=(min(chunks_ids, in_pulse.shape[0]),))
outp_source.create_key('image.data', data=out_data,
chunks=(min(chunks_data, out_data.shape[0]), 256, 256))
write_compressed_frames(
out_gain, outp_file, f'INSTRUMENT/{outp_source_name}/image/gain', comp_threads=8)
write_compressed_frames(
out_mask, outp_file, f'INSTRUMENT/{outp_source_name}/image/mask', comp_threads=8)
write_time = perf_counter() - start
total_time = open_time + read_time + correct_time + write_time
frame_rate = in_raw.shape[0] / total_time
print('{}\t{}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{}\t{:.1f}'.format(
wid, aggregator, open_time, read_time, correct_time, write_time, total_time,
in_raw.shape[0], frame_rate))
in_raw = None
in_cell = None
in_pulse = None
out_data = None
out_gain = None
out_mask = None
gc.collect()
print('worker\tDA\topen\tread\tcorrect\twrite\ttotal\tframes\trate')
start = perf_counter()
psh.ProcessContext(num_workers=num_workers).map(correct_file, data_to_process)
total_time = perf_counter() - start
print(f'Total time: {total_time:.1f}s')
```
%% Cell type:markdown id: tags:
# Data preview for first train
%% Cell type:code id: tags:
``` python
geom = xg.LPD_1MGeometry.from_quad_positions(
[(11.4, 299), (-11.5, 8), (254.5, -16), (278.5, 275)])
output_paths = [outp_path for _, _, outp_path in data_to_process if outp_path.exists()]
dc = xd.DataCollection.from_paths(output_paths).select_trains(np.s_[0])
det = LPD1M(dc, detector_name=karabo_id)
data = det.get_array('image.data')
```
%% Cell type:markdown id: tags:
### Intensity histogram across all cells
%% Cell type:code id: tags:
``` python
left_edge_ratio = 0.01
right_edge_ratio = 0.99
fig, ax = plt.subplots(num=1, clear=True, figsize=(15, 6))
values, bins, _ = ax.hist(np.ravel(data.data), bins=2000, range=(-1500, 2000))
def find_nearest_index(array, value):
return (np.abs(array - value)).argmin()
cum_values = np.cumsum(values)
vmin = bins[find_nearest_index(cum_values, cum_values[-1]*left_edge_ratio)]
vmax = bins[find_nearest_index(cum_values, cum_values[-1]*right_edge_ratio)]
max_value = values.max()
ax.vlines([vmin, vmax], 0, max_value, color='red', linewidth=5, alpha=0.2)
ax.text(vmin, max_value, f'{left_edge_ratio*100:.0f}%',
color='red', ha='center', va='bottom', size='large')
ax.text(vmax, max_value, f'{right_edge_ratio*100:.0f}%',
color='red', ha='center', va='bottom', size='large')
ax.text(vmax+(vmax-vmin)*0.01, max_value/2, 'Colormap interval',
color='red', rotation=90, ha='left', va='center', size='x-large')
ax.set_xlim(vmin-(vmax-vmin)*0.1, vmax+(vmax-vmin)*0.1)
ax.set_ylim(0, max_value*1.1)
pass
```
%% Cell type:markdown id: tags:
### First memory cell
%% Cell type:code id: tags:
``` python
fig, ax = plt.subplots(num=2, figsize=(15, 15), clear=True, nrows=1, ncols=1)
geom.plot_data_fast(data[:, 0, 0], ax=ax, vmin=vmin, vmax=vmax)
pass
```
%% Cell type:markdown id: tags:
### Train average
%% Cell type:code id: tags:
``` python
fig, ax = plt.subplots(num=3, figsize=(15, 15), clear=True, nrows=1, ncols=1)
geom.plot_data_fast(data[:, 0].mean(axis=1), ax=ax, vmin=vmin, vmax=vmax)
pass
```
%% Cell type:markdown id: tags:
### Lowest gain stage per pixel
%% Cell type:code id: tags:
``` python
highest_gain_stage = det.get_array('image.gain', pulses=np.s_[:]).max(axis=(1, 2))
fig, ax = plt.subplots(num=4, figsize=(15, 15), clear=True, nrows=1, ncols=1)
p = geom.plot_data_fast(highest_gain_stage, ax=ax, vmin=0, vmax=2);
cb = ax.images[0].colorbar
cb.set_ticks([0, 1, 2])
cb.set_ticklabels(['High gain', 'Medium gain', 'Low gain'])
```
%% Cell type:markdown id: tags:
### Create virtual CXI file
%% Cell type:code id: tags:
``` python
if create_virtual_cxi_in:
vcxi_folder = Path(create_virtual_cxi_in.format(
run=run, proposal_folder=str(Path(in_folder).parent)))
vcxi_folder.mkdir(parents=True, exist_ok=True)
def sort_files_by_seq(by_seq, outp_path):
by_seq.setdefault(int(outp_path.stem[-5:]), []).append(outp_path)
return by_seq
from functools import reduce
reduce(sort_files_by_seq, output_paths, output_by_seq := {})
for seq_number, seq_output_paths in output_by_seq.items():
# Create data collection and detector components only for this sequence.
try:
det = LPD1M(xd.DataCollection.from_paths(seq_output_paths), detector_name=karabo_id, min_modules=4)
except ValueError: # Couldn't find enough data for min_modules
continue
det.write_virtual_cxi(vcxi_folder / f'VCXI-LPD-R{run:04d}-S{seq_number:05d}.cxi')
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment