Skip to content
Snippets Groups Projects

[AGIPD][CORRECT] Clean up before AGIPD calcalt_interface changes

Merged Karim Ahmed requested to merge fix/small_agipd_corr_improvements into master
1 unresolved thread
2 files
+ 113
139
Compare changes
  • Side-by-side
  • Inline
Files
2
%% Cell type:markdown id: tags:
# AGIPD Offline Correction #
Author: European XFEL Detector Group, Version: 2.0
Offline Calibration for the AGIPD Detector
%% Cell type:code id: tags:
``` python
in_folder = "/gpfs/exfel/exp/MID/202201/p002834/raw" # the folder to read data from, required
out_folder = "/gpfs/exfel/data/scratch/esobolev/pycal_litfrm/p002834/r0225" # the folder to output to, required
metadata_folder = "" # Directory containing calibration_metadata.yml when run by xfel-calibrate
sequences = [-1] # sequences to correct, set to -1 for all, range allowed
modules = [-1] # modules to correct, set to -1 for all, range allowed
train_ids = [-1] # train IDs to correct, set to -1 for all, range allowed
run = 225 # runs to process, required
karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
karabo_da = ['-1'] # a list of data aggregators names, Default [-1] for selecting all data aggregators
receiver_template = "{}CH0" # inset for receiver devices
path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
instrument_source_template = '{}/DET/{}:xtdf' # path in the HDF5 file to images
index_source_template = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images
ctrl_source_template = '{}/MDL/FPGA_COMP' # path to control information
karabo_id_control = "MID_EXP_AGIPD1M1" # karabo-id for control device
slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants, loaded in precorrection notebook
use_dir_creation_date = True # use the creation data of the input dir for database queries
creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
cal_db_timeout = 30000 # in milliseconds
creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
mem_cells = 0 # Number of memory cells used, set to 0 to automatically infer
bias_voltage = 0 # bias voltage, set to 0 to use stored value in slow data.
acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
mem_cells = -1 # Number of memory cells used, set to 0 to automatically infer
bias_voltage = -1 # bias voltage, set to 0 to use stored value in slow data.
acq_rate = -1. # the detector acquisition rate, use 0 to try to auto-determine
gain_setting = -1 # the gain setting, use -1 to use value stored in slow data.
gain_mode = -1 # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)
overwrite = True # set to True if existing data should be overwritten
max_pulses = [0, 352, 1] # range list [st, end, step] of memory cell indices to be processed within a train. 3 allowed maximum list input elements.
mem_cells_db = 0 # set to a value different than 0 to use this value for DB queries
mem_cells_db = -1 # set to a value different than 0 to use this value for DB queries
integration_time = -1 # integration time, negative values for auto-detection.
# Correction parameters
blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
cm_dark_fraction = 0.66 # threshold for fraction of empty pixels to consider module enough dark to perform CM correction
cm_dark_range = [-50.,30] # range for signal value ADU for pixel to be consider as a dark pixel
cm_n_itr = 4 # number of iterations for common mode correction
hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
noisy_adc_threshold = 0.25 # threshold to mask complete adc
ff_gain = 7.2 # conversion gain for absolute FlatField constants, while applying xray_gain
photon_energy = -1.0 # photon energy in keV, non-positive value for XGM autodetection
# Correction Booleans
only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
rel_gain = False # do relative gain correction based on PC data
xray_gain = False # do relative gain correction based on xray data
blc_noise = False # if set, baseline correction via noise peak location is attempted
blc_stripes = False # if set, baseline corrected via stripes
blc_hmatch = False # if set, base line correction via histogram matching is attempted
match_asics = False # if set, inner ASIC borders are matched to the same signal level
adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
zero_nans = False # set NaN values in corrected data to 0
zero_orange = False # set to 0 very negative and very large values in corrected data
blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
corr_asic_diag = False # if set, diagonal drop offs on ASICs are corrected
force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold
force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold
mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
common_mode = False # Common mode correction
melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
mask_zero_std = False # Mask pixels with zero standard deviation across train
low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain
round_photons = False # Round to absolute number of photons, only use with gain corrections
# Optional auxiliary devices
use_ppu_device = '' # Device ID for a pulse picker device to only process picked trains, empty string to disable
ppu_train_offset = 0 # When using the pulse picker, offset between the PPU's sequence start and actually picked train
use_litframe_finder = 'off' # Process only illuminated frames: 'off' - disable, 'device' - use online device data, 'offline' - use offline algorithm, 'auto' - choose online/offline source automatically (default)
litframe_device_id = '' # Device ID for a lit frame finder device, empty string to auto detection
energy_threshold = -1000 # The low limit for the energy (uJ) exposed by frames subject to processing. If -1000, selection by pulse energy is disabled
use_super_selection = 'cm' # Make a common selection for entire run: 'off' - disable, 'final' - enable for final selection, 'cm' - enable only for common mode correction
use_xgm_device = '' # DoocsXGM device ID to obtain actual photon energy, operating condition else.
# Output parameters
recast_image_data = '' # Cast data to a different dtype before saving
compress_fields = ['gain', 'mask'] # Datasets in image group to compress.
# Plotting parameters
skip_plots = False # exit after writing corrected files and metadata
cell_id_preview = 1 # cell Id used for preview in single-shot plots
# Parallelization parameters
chunk_size = 1000 # Size of chunk for image-wise correction
n_cores_correct = 16 # Number of chunks to be processed in parallel
n_cores_files = 4 # Number of files to be processed in parallel
sequences_per_node = 2 # number of sequence files per cluster node if run as SLURM job, set to 0 to not run SLURM parallel
max_nodes = 8 # Maximum number of SLURM jobs to split correction work into
max_tasks_per_worker = 1 # the number of tasks a correction pool worker process can complete before it will exit and be replaced with a fresh worker process. Leave as -1 to keep worker alive as long as pool.
def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
from xfel_calibrate.calibrate import balance_sequences as bs
return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
```
%% Cell type:code id: tags:
``` python
import itertools
import os
import math
import multiprocessing
import re
import traceback
import warnings
from datetime import timedelta
from logging import warn
from logging import warning
from pathlib import Path
from time import perf_counter
import tabulate
from dateutil import parser
from IPython.display import Latex, Markdown, display
warnings.filterwarnings('ignore')
import matplotlib
import matplotlib.pyplot as plt
import yaml
from extra_data import H5File, RunDirectory, stack_detector_data, by_id
from extra_data import RunDirectory, stack_detector_data
from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
from matplotlib import cm as colormap
from matplotlib.colors import LogNorm
matplotlib.use("agg")
%matplotlib inline
import numpy as np
import seaborn as sns
sns.set()
sns.set_context("paper", font_scale=1.4)
sns.set_style("ticks")
import cal_tools
import seaborn as sns
from cal_tools import agipdalgs as calgs
from cal_tools.agipdlib import (
AgipdCorrections,
AgipdCtrl,
CellRange,
LitFrameSelection,
)
from cal_tools.ana_tools import get_range
from cal_tools.enums import AgipdGainMode, BadPixels
from cal_tools.step_timing import StepTimer
sns.set()
sns.set_context("paper", font_scale=1.4)
sns.set_style("ticks")
from cal_tools.tools import (
CalibrationMetadata,
calcat_creation_time,
map_modules_from_folder,
module_index_to_qm,
)
```
%% Cell type:code id: tags:
``` python
in_folder = Path(in_folder)
out_folder = Path(out_folder)
run_folder = in_folder / f'r{run:04d}'
```
%% Cell type:markdown id: tags:
## Evaluated parameters ##
%% Cell type:code id: tags:
``` python
# Fill dictionaries comprising bools and arguments for correction and data analysis
# Here the hierarchy and dependability for correction booleans are defined
corr_bools = {}
# offset is at the bottom of AGIPD correction pyramid.
corr_bools["only_offset"] = only_offset
# Dont apply any corrections if only_offset is requested
if not only_offset:
corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
corr_bools["rel_gain"] = rel_gain
corr_bools["xray_corr"] = xray_gain
corr_bools["blc_noise"] = blc_noise
corr_bools["blc_stripes"] = blc_stripes
corr_bools["blc_hmatch"] = blc_hmatch
corr_bools["blc_set_min"] = blc_set_min
corr_bools["match_asics"] = match_asics
corr_bools["corr_asic_diag"] = corr_asic_diag
corr_bools["zero_nans"] = zero_nans
corr_bools["zero_orange"] = zero_orange
corr_bools["mask_noisy_adc"] = mask_noisy_adc
corr_bools["force_hg_if_below"] = force_hg_if_below
corr_bools["force_mg_if_below"] = force_mg_if_below
corr_bools["common_mode"] = common_mode
corr_bools["melt_snow"] = melt_snow
corr_bools["mask_zero_std"] = mask_zero_std
corr_bools["low_medium_gap"] = low_medium_gap
corr_bools["round_photons"] = round_photons
# Many corrections don't apply to fixed gain mode; will explicitly disable later if detected
disable_for_fixed_gain = [
"adjust_mg_baseline",
"blc_set_min",
"force_hg_if_below",
"force_mg_if_below",
"low_medium_gap",
"melt_snow",
"rel_gain"
]
```
%% Cell type:code id: tags:
``` python
if sequences == [-1]:
sequences = None
dc = RunDirectory(run_folder)
ctrl_src = ctrl_source_template.format(karabo_id_control)
instrument_src = instrument_source_template.format(karabo_id, receiver_template)
index_src = index_source_template.format(karabo_id, receiver_template)
```
%% Cell type:code id: tags:
``` python
# Create output folder
out_folder.mkdir(parents=True, exist_ok=True)
# Evaluate detector instance for mapping
instrument = karabo_id.split("_")[0]
if instrument == "SPB":
dinstance = "AGIPD1M1"
nmods = 16
elif instrument == "MID":
dinstance = "AGIPD1M2"
nmods = 16
elif instrument == "HED":
dinstance = "AGIPD500K"
nmods = 8
# Evaluate requested modules
if karabo_da[0] == '-1':
if modules[0] == -1:
modules = list(range(nmods))
karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
else:
modules = [int(x[-2:]) for x in karabo_da]
print("Process modules:", ', '.join(cal_tools.tools.module_index_to_qm(x) for x in modules))
print("Process modules:", ', '.join(module_index_to_qm(x) for x in modules))
print(f"Detector in use is {karabo_id}")
print(f"Instrument {instrument}")
print(f"Detector instance {dinstance}")
```
%% Cell type:code id: tags:
``` python
if use_ppu_device:
# Obtain trains to process if using a pulse picker device.
# Will throw an uncaught exception if the device is wrong.
seq_start = dc[use_ppu_device, 'trainTrigger.sequenceStart.value'].ndarray()
# The trains picked are the unique values of trainTrigger.sequenceStart
# minus the first (previous trigger before this run).
train_ids = np.unique(seq_start)[1:] + ppu_train_offset
print(f'PPU device {use_ppu_device} triggered for {len(train_ids)} train(s)')
elif train_ids != [-1]:
# Specific trains passed by parameter, convert to ndarray.
train_ids = np.array(train_ids)
print(f'Processing up to {len(train_ids)} manually selected train(s)')
else:
# Process all trains.
train_ids = None
print(f'Processing all valid trains')
```
%% Cell type:code id: tags:
``` python
# set everything up filewise
mapped_files, _, total_sequences, _, _ = cal_tools.tools.map_modules_from_folder(
mapped_files, _, total_sequences, _, _ = map_modules_from_folder(
str(in_folder), run, path_template, karabo_da, sequences
)
file_list = []
# ToDo: Split table over pages
print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
table = []
ti = 0
for k, files in mapped_files.items():
i = 0
for f in list(files.queue):
file_list.append(f)
if i == 0:
table.append((ti, k, i, f))
else:
table.append((ti, "", i, f))
i += 1
ti += 1
md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
headers=["#", "module", "# module", "file"])))
file_list = sorted(file_list, key=lambda name: name[-10:])
```
%% Cell type:code id: tags:
``` python
first_mod_channel = sorted(modules)[0]
instrument_src_mod = [
s for s in list(dc.all_sources) if f"{first_mod_channel}CH" in s][0]
mod_channel = int(re.findall(rf".*{first_mod_channel}CH([0-9]+):.*", instrument_src_mod)[0])
agipd_cond = AgipdCtrl(
run_dc=dc,
image_src=instrument_src_mod,
ctrl_src=ctrl_src,
raise_error=False, # to be able to process very old data without gain_setting value
)
```
%% Cell type:code id: tags:
``` python
# Evaluate creation time
creation_time = None
if use_dir_creation_date:
creation_time = cal_tools.tools.get_dir_creation_date(str(in_folder), run)
offset = parser.parse(creation_date_offset)
delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
creation_time += delta
# Run's creation time:
creation_time = calcat_creation_time(in_folder, run, creation_time)
offset = parser.parse(creation_date_offset)
delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
creation_time += delta
print(f"Creation time: {creation_time}")
if acq_rate == 0.:
if acq_rate == -1.:
acq_rate = agipd_cond.get_acq_rate()
if mem_cells == 0.:
if mem_cells == -1:
mem_cells = agipd_cond.get_num_cells()
# TODO: look for alternative for passing creation_time
if gain_setting == -1:
gain_setting = agipd_cond.get_gain_setting(creation_time)
if bias_voltage == 0.:
if bias_voltage == -1:
bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
if integration_time == -1:
integration_time = agipd_cond.get_integration_time()
if gain_mode == -1:
gain_mode = agipd_cond.get_gain_mode()
else:
gain_mode = AgipdGainMode(gain_mode)
```
%% Cell type:code id: tags:
``` python
if mem_cells is None:
raise ValueError(f"No raw images found in {filename}")
raise ValueError(f"No raw images found for {instrument_src_mod}")
mem_cells_db = mem_cells if mem_cells_db == 0 else mem_cells_db
mem_cells_db = mem_cells if mem_cells_db == -1 else mem_cells_db
print(f"Maximum memory cells to calibrate: {mem_cells}")
```
%% Cell type:code id: tags:
``` python
print(f"Using {creation_time} as creation time")
print("Operating conditions are:")
print(f"• Bias voltage: {bias_voltage}")
print(f"• Memory cells: {mem_cells_db}")
print(f"• Acquisition rate: {acq_rate}")
print(f"• Gain setting: {gain_setting}")
print(f"• Gain mode: {gain_mode.name}")
print(f"• Integration time: {integration_time}")
print(f"• Photon Energy: 9.2")
```
%% Cell type:code id: tags:
``` python
if gain_mode:
for to_disable in disable_for_fixed_gain:
if corr_bools.get(to_disable, False):
warn(f"{to_disable} correction was requested, but does not apply to fixed gain mode")
warning(f"{to_disable} correction was requested, but does not apply to fixed gain mode")
corr_bools[to_disable] = False
```
%% Cell type:code id: tags:
``` python
if use_litframe_finder != 'off':
from extra_redu import make_litframe_finder, LitFrameFinderError
if use_litframe_finder not in ['auto', 'offline', 'online']:
raise ValueError("Unexpected value in 'use_litframe_finder'.")
inst = karabo_id_control[:3]
litfrm = make_litframe_finder(inst, dc, litframe_device_id)
try:
get_data = {'auto': litfrm.read_or_process, 'offline': litfrm.process, 'online': litfrm.read}
r = get_data[use_litframe_finder]()
cell_sel = LitFrameSelection(r, train_ids, max_pulses, energy_threshold, use_super_selection)
cell_sel.print_report()
except LitFrameFinderError as err:
warn(f"Cannot use AgipdLitFrameFinder due to:\n{err}")
warning(f"Cannot use AgipdLitFrameFinder due to:\n{err}")
cell_sel = CellRange(max_pulses, max_cells=mem_cells)
else:
# Use range selection
cell_sel = CellRange(max_pulses, max_cells=mem_cells)
print(cell_sel.msg())
```
%% Cell type:code id: tags:
``` python
if round_photons and photon_energy <= 0.0:
if use_xgm_device:
# Try to obtain photon energy from XGM device.
wavelength_data = dc[use_xgm_device, 'pulseEnergy.wavelengthUsed']
try:
from scipy.constants import h, c, e
# Read wavelength as a single value and convert to hv.
photon_energy = (h * c / e) / (wavelength_data.as_single_value(rtol=1e-2) * 1e-6)
print(f'Obtained photon energy {photon_energy:.3f} keV from {use_xgm_device}')
except ValueError:
warn('XGM source available but photon energy varies greater than 1%, '
warning('XGM source available but photon energy varies greater than 1%, '
'photon rounding disabled!')
round_photons = False
else:
warn('Neither explicit photon energy nor XGM device configured, photon rounding disabled!')
warning('Neither explicit photon energy nor XGM device configured, photon rounding disabled!')
round_photons = False
elif round_photons:
print(f'Photon energy for rounding: {photon_energy:.3f} keV')
```
%% Cell type:markdown id: tags:
## Data processing ##
%% Cell type:code id: tags:
``` python
agipd_corr = AgipdCorrections(
mem_cells,
cell_sel,
h5_data_path=instrument_src,
h5_index_path=index_src,
corr_bools=corr_bools,
gain_mode=gain_mode,
comp_threads=os.cpu_count() // n_cores_files,
train_ids=train_ids
)
agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
agipd_corr.hg_hard_threshold = hg_hard_threshold
agipd_corr.mg_hard_threshold = mg_hard_threshold
agipd_corr.cm_dark_min = cm_dark_range[0]
agipd_corr.cm_dark_max = cm_dark_range[1]
agipd_corr.cm_dark_fraction = cm_dark_fraction
agipd_corr.cm_n_itr = cm_n_itr
agipd_corr.noisy_adc_threshold = noisy_adc_threshold
agipd_corr.ff_gain = ff_gain
agipd_corr.photon_energy = photon_energy
agipd_corr.compress_fields = compress_fields
if recast_image_data:
agipd_corr.recast_image_fields['data'] = np.dtype(recast_image_data)
```
%% Cell type:code id: tags:
``` python
module_index_to_karabo_da = {mod: da for (mod, da) in zip(modules, karabo_da)}
```
%% Cell type:code id: tags:
``` python
# Retrieve calibration constants to RAM
agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))
metadata = cal_tools.tools.CalibrationMetadata(metadata_folder or out_folder)
metadata = CalibrationMetadata(metadata_folder or out_folder)
# NOTE: this notebook will not overwrite calibration metadata file
const_yaml = metadata.get("retrieved-constants", {})
def retrieve_constants(mod):
"""
Retrieve calibration constants and load them to shared memory
Metadata for constants is taken from yml file or retrieved from the DB
"""
err = ""
k_da = module_index_to_karabo_da[mod]
try:
# check if there is a yaml file in out_folder that has the device constants.
if k_da in const_yaml:
when = agipd_corr.initialize_from_yaml(k_da, const_yaml, mod)
print(f"Found constants for {k_da} in calibration_metadata.yml")
else:
# check if there is a yaml file in out_folder that has the device constants.
if k_da in const_yaml:
when = agipd_corr.initialize_from_yaml(k_da, const_yaml, mod)
print(f"Found constants for {k_da} in calibration_metadata.yml")
else:
try:
# TODO: replace with proper retrieval (as done in pre-correction)
when = agipd_corr.initialize_from_db(
karabo_id=karabo_id,
karabo_da=k_da,
cal_db_interface=cal_db_interface,
creation_time=creation_time,
memory_cells=mem_cells_db,
bias_voltage=bias_voltage,
photon_energy=9.2,
gain_setting=gain_setting,
acquisition_rate=acq_rate,
integration_time=integration_time,
module_idx=mod,
only_dark=False,
)
print(f"Queried CalCat for {k_da}")
except Exception as e:
err = f"Error: {e}\nError traceback: {traceback.format_exc()}"
when = None
return err, mod, when, k_da
except Exception as e:
warning(f"Module: {k_da}, {e}")
when = None
return mod, when, k_da
print(f'Preparing constants (FF: {agipd_corr.corr_bools.get("xray_corr", False)}, PC: {any(agipd_corr.pc_bools)}, '
f'BLC: {any(agipd_corr.blc_bools)})')
ts = perf_counter()
with multiprocessing.Pool(processes=len(modules)) as pool:
const_out = pool.map(retrieve_constants, modules)
print(f"Constants were loaded in {perf_counter()-ts:.01f}s")
```
%% Cell type:code id: tags:
``` python
# allocate memory for images and hists
n_images_max = mem_cells * 256
data_shape = (n_images_max, 512, 128)
agipd_corr.allocate_images(data_shape, n_cores_files)
```
%% Cell type:code id: tags:
``` python
def batches(l, batch_size):
"""Group a list into batches of (up to) batch_size elements"""
start = 0
while start < len(l):
yield l[start:start + batch_size]
start += batch_size
```
%% Cell type:code id: tags:
``` python
def imagewise_chunks(img_counts):
"""Break up the loaded data into chunks of up to chunk_size
Yields (file data slot, start index, stop index)
"""
for i_proc, n_img in enumerate(img_counts):
n_chunks = math.ceil(n_img / chunk_size)
for i in range(n_chunks):
yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
```
%% Cell type:code id: tags:
``` python
step_timer = StepTimer()
```
%% Cell type:code id: tags:
``` python
step_timer.start()
if max_tasks_per_worker == -1:
max_tasks_per_worker = None
with multiprocessing.Pool(maxtasksperchild=max_tasks_per_worker) as pool:
step_timer.done_step('Started pool')
for file_batch in batches(file_list, n_cores_files):
# TODO: Move some printed output to logging or similar
print(f"Processing next {len(file_batch)} files")
step_timer.start()
img_counts = pool.starmap(
agipd_corr.read_file,
zip(range(len(file_batch)), file_batch, [not common_mode]*len(file_batch))
)
step_timer.done_step(f'Loading data from files')
if img_counts == 0:
# Skip any further processing and output if there are no images to
# correct in this file.
continue
if mask_zero_std:
# Evaluate zero-data-std mask
pool.starmap(
agipd_corr.mask_zero_std, itertools.product(
range(len(file_batch)),
np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
)
)
step_timer.done_step('Mask 0 std')
# Perform offset image-wise correction
pool.starmap(agipd_corr.offset_correction, imagewise_chunks(img_counts))
step_timer.done_step("Offset correction")
if blc_noise or blc_stripes or blc_hmatch:
# Perform image-wise correction
pool.starmap(agipd_corr.baseline_correction, imagewise_chunks(img_counts))
step_timer.done_step("Base-line shift correction")
if common_mode:
# In common mode corrected is enabled.
# Cell selection is only activated after common mode correction.
# Perform cross-file correction parallel over asics
image_files_idx = [i_proc for i_proc, n_img in enumerate(img_counts) if n_img > 0]
pool.starmap(agipd_corr.cm_correction, itertools.product(
image_files_idx, range(16) # 16 ASICs per module
))
step_timer.done_step("Common-mode correction")
img_counts = pool.map(agipd_corr.apply_selected_pulses, image_files_idx)
step_timer.done_step("Applying selected cells after common mode correction")
# Perform image-wise correction"
pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))
step_timer.done_step("Gain corrections")
# Save corrected data
pool.starmap(agipd_corr.write_file, [
(i_proc, file_name, str(out_folder / Path(file_name).name.replace("RAW", "CORR")))
for i_proc, file_name in enumerate(file_batch)
])
step_timer.done_step("Save")
```
%% Cell type:code id: tags:
``` python
print(f"Correction of {len(file_list)} files is finished")
print(f"Total processing time {step_timer.timespan():.01f} s")
print(f"Timing summary per batch of {n_cores_files} files:")
step_timer.print_summary()
```
%% Cell type:code id: tags:
``` python
# if the yml file contains "retrieved-constants", that means a leading
# notebook got processed and the reporting would be generated from it.
fst_print = True
timestamps = {}
for i, (error, modno, when, k_da) in enumerate(const_out):
qm = cal_tools.tools.module_index_to_qm(modno)
# expose errors while applying correction
if error:
print("Error: {}".format(error) )
for i, (modno, when, k_da) in enumerate(const_out):
qm = module_index_to_qm(modno)
if k_da not in const_yaml:
if fst_print:
print("Constants are retrieved with creation time: ")
fst_print = False
module_timestamps = {}
# If correction is crashed
if not error:
print(f"{qm}:")
for key, item in when.items():
if hasattr(item, 'strftime'):
item = item.strftime('%y-%m-%d %H:%M')
when[key] = item
print('{:.<12s}'.format(key), item)
print(f"{qm}:")
for key, item in when.items():
if hasattr(item, 'strftime'):
item = item.strftime('%y-%m-%d %H:%M')
when[key] = item
print('{:.<12s}'.format(key), item)
# Store few time stamps if exists
# Add NA to keep array structure
for key in ['Offset', 'SlopesPC', 'SlopesFF']:
if when and key in when and when[key]:
module_timestamps[key] = when[key]
else:
if error is not None:
module_timestamps[key] = "Err"
else:
module_timestamps[key] = "NA"
module_timestamps[key] = "NA"
timestamps[qm] = module_timestamps
seq = sequences[0] if sequences else 0
if timestamps:
with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fd:
yaml.safe_dump({"time-summary": {f"S{seq}": timestamps}}, fd)
```
%% Cell type:code id: tags:
``` python
if skip_plots:
print('Skipping plots')
import sys
sys.exit(0)
```
%% Cell type:code id: tags:
``` python
def do_3d_plot(data, edges, x_axis, y_axis):
fig = plt.figure(figsize=(10, 10))
ax = fig.gca(projection='3d')
# Make data.
X = edges[0][:-1]
Y = edges[1][:-1]
X, Y = np.meshgrid(X, Y)
Z = data.T
# Plot the surface.
ax.plot_surface(X, Y, Z, cmap=colormap.coolwarm, linewidth=0, antialiased=False)
ax.set_xlabel(x_axis)
ax.set_ylabel(y_axis)
ax.set_zlabel("Counts")
def do_2d_plot(data, edges, y_axis, x_axis):
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111)
extent = [np.min(edges[1]), np.max(edges[1]),
np.min(edges[0]), np.max(edges[0])]
im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
ax.set_xlabel(x_axis)
ax.set_ylabel(y_axis)
cb = fig.colorbar(im)
cb.set_label("Counts")
```
%% Cell type:code id: tags:
``` python
def get_trains_data(data_folder, source, include, detector_id, tid=None, modules=16, fillvalue=None):
"""Load single train for all module
:param data_folder: Path to folder with data
:param source: Data source to be loaded
:param include: Inset of file name to be considered
:param detector_id: The karabo id of the detector to get data for
:param tid: Train Id to be loaded. First train is considered if None is given
:param path: Path to find image data inside h5 file
"""
run_data = RunDirectory(data_folder, include)
if tid is not None:
tid, data = run_data.select(
f'{detector_id}/DET/*', source).train_from_id(tid, keep_dims=True)
else:
# A first full trainId for all available modules is of interest.
tid, data = next(run_data.select(
f'{detector_id}/DET/*', source).trains(require_all=True, keep_dims=True))
stacked_data = stack_detector_data(
train=data, data=source, fillvalue=fillvalue, modules=modules)
return tid, stacked_data
```
%% Cell type:code id: tags:
``` python
if dinstance == "AGIPD500K":
geom = AGIPD_500K2GGeometry.from_origin()
else:
geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
(-525, 625),
(-550, -10),
(520, -160),
(542.5, 475),
])
```
%% Cell type:code id: tags:
``` python
include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
tid, corrected = get_trains_data(out_folder, 'image.data', include, karabo_id, modules=nmods)
_, gains = get_trains_data(out_folder, 'image.gain', include, karabo_id, tid, modules=nmods)
_, mask = get_trains_data(out_folder, 'image.mask', include, karabo_id, tid, modules=nmods)
_, blshift = get_trains_data(out_folder, 'image.blShift', include, karabo_id, tid, modules=nmods)
_, cellId = get_trains_data(out_folder, 'image.cellId', include, karabo_id, tid, modules=nmods)
_, pulseId = get_trains_data(out_folder, 'image.pulseId', include, karabo_id, tid, modules=nmods, fillvalue=0)
_, raw = get_trains_data(run_folder, 'image.data', include, karabo_id, tid, modules=nmods)
```
%% Cell type:code id: tags:
``` python
display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
```
%% Cell type:markdown id: tags:
### Signal vs. Analogue Gain ###
%% Cell type:code id: tags:
``` python
hist, bins_x, bins_y = calgs.histogram2d(raw[:,0,...].flatten().astype(np.float32),
raw[:,1,...].flatten().astype(np.float32),
bins=(100, 100),
range=[[4000, 8192], [4000, 8192]])
do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
```
%% Cell type:markdown id: tags:
### Signal vs. Digitized Gain ###
The following plot shows plots signal vs. digitized gain
%% Cell type:code id: tags:
``` python
hist, bins_x, bins_y = calgs.histogram2d(corrected.flatten().astype(np.float32),
gains.flatten().astype(np.float32), bins=(100, 3),
range=[[-50, 8192], [0, 3]])
do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
```
%% Cell type:code id: tags:
``` python
print(f"Gain statistics in %")
table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
f'{gains[gains==1].size/gains.size*100:.03f}',
f'{gains[gains==2].size/gains.size*100:.03f}']]
md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
headers=["High", "Medium", "Low"])))
```
%% Cell type:markdown id: tags:
### Intensity per Pulse ###
%% Cell type:code id: tags:
``` python
pulse_range = [np.min(pulseId[pulseId>=0]), np.max(pulseId[pulseId>=0])]
# Modify pulse_range, if only one pulse is selected.
if pulse_range[0] == pulse_range[1]:
pulse_range = [0, pulse_range[1]+int(acq_rate)]
mean_data = np.nanmean(corrected, axis=(2, 3))
hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
pulseId.flatten().astype(np.float32),
bins=(100, int(pulse_range[1])),
range=[[-50, 1000], pulse_range])
do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
pulseId.flatten().astype(np.float32),
bins=(100, int(pulse_range[1])),
range=[[-50, 200000], pulse_range])
do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
```
%% Cell type:markdown id: tags:
### Baseline shift ###
Estimated base-line shift with respect to the total ADU counts of corrected image.
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
h = ax.hist(blshift.flatten(), bins=100, log=True)
_ = plt.xlabel('Baseline shift [ADU]')
_ = plt.ylabel('Counts')
_ = ax.grid()
```
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(10, 10))
corrected_ave = np.nansum(corrected, axis=(2, 3))
plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)
plt.xlim(-1, 1000)
plt.grid()
plt.xlabel('Illuminated corrected [MADU] ')
_ = plt.ylabel('Estimated baseline shift [ADU]')
```
%% Cell type:code id: tags:
``` python
if cell_id_preview not in cellId[:, 0]:
print(f"WARNING: The selected cell_id_preview value {cell_id_preview} is not available in the corrected data.")
cell_id_preview = cellId[:, 0][0]
cell_idx_preview = 0
print(f"Previewing the first available cellId: {cell_id_preview}.")
else:
cell_idx_preview = np.where(cellId[:, 0] == cell_id_preview)[0][0]
```
%% Cell type:code id: tags:
``` python
display(Markdown('### Raw preview ###\n'))
if cellId.shape[0] != 1:
display(Markdown(f'Mean over images of the RAW data\n'))
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
data = np.mean(raw[slice(*cell_sel.crange), 0, ...], axis=0)
vmin, vmax = get_range(data, 5)
ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
else:
print("Skipping mean RAW preview for single memory cell, "
f"see single shot image for selected cell ID {cell_id_preview}.")
```
%% Cell type:code id: tags:
``` python
display(Markdown(f'Single shot of the RAW data from cell {cell_id_preview} \n'))
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
vmin, vmax = get_range(raw[cell_idx_preview, 0, ...], 5)
ax = geom.plot_data_fast(raw[cell_idx_preview, 0, ...], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
```
%% Cell type:code id: tags:
``` python
display(Markdown('### Corrected preview ###\n'))
if cellId.shape[0] != 1:
display(Markdown('### Mean CORRECTED Preview ###\n'))
display(Markdown(f'A mean across train: {tid}\n'))
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
data = np.mean(corrected, axis=0)
vmin, vmax = get_range(data, 7)
ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=-50, vmax=vmax)
else:
print("Skipping mean CORRECTED preview for single memory cell, "
f"see single shot image for selected cell ID {cell_id_preview}.")
```
%% Cell type:code id: tags:
``` python
display(Markdown(f'A single shot of the CORRECTED image from cell {cell_id_preview} \n'))
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
vmin, vmax = get_range(corrected[cell_idx_preview], 7, -50)
vmin = - 50
ax = geom.plot_data_fast(corrected[cell_idx_preview], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
```
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
vmin, vmax = get_range(corrected[cell_idx_preview], 5, -50)
nbins = np.int((vmax + 50) / 2)
h = ax.hist(corrected[cell_idx_preview].flatten(),
bins=nbins, range=(-50, vmax),
histtype='stepfilled', log=True)
plt.xlabel('[ADU]')
plt.ylabel('Counts')
ax.grid()
```
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
vmin, vmax = get_range(corrected, 10, -100)
vmax = np.nanmax(corrected)
if vmax > 50000:
vmax=50000
nbins = np.int((vmax + 100) / 5)
h = ax.hist(corrected.flatten(), bins=nbins,
range=(-100, vmax), histtype='step', log=True, label = 'All')
ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),
alpha=0.5, log=True, label='High gain', color='green')
ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),
alpha=0.5, log=True, label='Medium gain', color='red')
ax.hist(corrected[gains == 2].flatten(), bins=nbins, range=(-100, vmax),
alpha=0.5, log=True, label='Low gain', color='yellow')
ax.legend()
ax.grid()
plt.xlabel('[ADU]')
plt.ylabel('Counts')
```
%% Cell type:code id: tags:
``` python
display(Markdown('### Maximum GAIN Preview ###\n'))
display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
```
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
ax = geom.plot_data_fast(np.max(gains, axis=0), ax=ax,
cmap="jet", vmin=-1, vmax=3)
```
%% Cell type:markdown id: tags:
## Bad Pixels ##
The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:
%% Cell type:code id: tags:
``` python
table = []
for item in BadPixels:
table.append((item.name, "{:016b}".format(item.value)))
md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
headers=["Bad pixel type", "Bit mask"])))
```
%% Cell type:code id: tags:
``` python
display(Markdown(f'### Single Shot Bad Pixels ### \n'))
display(Markdown(f'A single shot bad pixel map from cell {cell_id_preview} \n'))
```
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
geom.plot_data_fast(np.log2(mask[cell_idx_preview]), ax=ax, vmin=0, vmax=32, cmap="jet")
```
%% Cell type:code id: tags:
``` python
if round_photons:
display(Markdown('### Photonization histograms ###'))
x_preround = (agipd_corr.hist_bins_preround[1:] + agipd_corr.hist_bins_preround[:-1]) / 2
x_postround = (agipd_corr.hist_bins_postround[1:] + agipd_corr.hist_bins_postround[:-1]) / 2
x_photons = np.arange(0, (x_postround[-1] + 1) / photon_energy)
fig, ax = plt.subplots(ncols=1, nrows=1, clear=True)
ax.plot(x_preround, agipd_corr.shared_hist_preround, '.-', color='C0')
ax.bar(x_postround, agipd_corr.shared_hist_postround, photon_energy, color='C1', alpha=0.5)
ax.set_yscale('log')
ax.set_ylim(0, max(agipd_corr.shared_hist_preround.max(), agipd_corr.shared_hist_postround.max())*3)
ax.set_xlim(x_postround[0], x_postround[-1]+1)
ax.set_xlabel('Photon energy / keV')
ax.set_ylabel('Intensity')
ax.vlines(x_photons * photon_energy, *ax.get_ylim(), color='k', linestyle='dashed')
phx = ax.twiny()
phx.set_xlim(x_postround[0] / photon_energy, (x_postround[-1]+1)/photon_energy)
phx.set_xticks(x_photons)
phx.set_xlabel('# Photons')
pass
```
%% Cell type:markdown id: tags:
### Percentage of Bad Pixels across one train ###
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
geom.plot_data_fast(np.mean(mask>0, axis=0), vmin=0, ax=ax, vmax=1, cmap="jet")
```
%% Cell type:markdown id: tags:
### Percentage of Bad Pixels across one train. Only Dark Related ###
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
cm = np.copy(mask)
cm[cm > BadPixels.NO_DARK_DATA.value] = 0
ax = geom.plot_data_fast(np.mean(cm>0, axis=0),
vmin=0, ax=ax, vmax=1, cmap="jet")
```
Loading